diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 6ea799d..0000000 --- a/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "maxflow/gunrock"] - path = maxflow/gunrock - url = https://github.com/nsakharnykh/gunrock -[submodule "maxflow/cub"] - path = maxflow/cub - url = https://github.com/NVlabs/cub diff --git a/README.md b/README.md deleted file mode 100644 index a5e1054..0000000 --- a/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Rapids - Code Share - -This repository is a way to share experimental codes that are not yet integrated in RAPIDS products. Prototypes may not build or run. \ No newline at end of file diff --git a/hash-graph-dehornetify/.gitignore b/hash-graph-dehornetify/.gitignore new file mode 100644 index 0000000..4d25f81 --- /dev/null +++ b/hash-graph-dehornetify/.gitignore @@ -0,0 +1,6 @@ +/build/ +.files_for_cscope +cscope.out +.*.swp +logs/ +outputs/ diff --git a/hash-graph-dehornetify/CMakeLists.txt b/hash-graph-dehornetify/CMakeLists.txt new file mode 100644 index 0000000..3035b4d --- /dev/null +++ b/hash-graph-dehornetify/CMakeLists.txt @@ -0,0 +1,55 @@ +cmake_minimum_required(VERSION 3.8) #language level CUDA support was added in CMake 3.8 +set(PROJECT "HornetAlg") + +project(${PROJECT} LANGUAGES CXX CUDA) + +include(compiler-util/CMakeLists.txt) +# include(../compiler-util/CMakeLists.txt) + +# add_subdirectory(../hornet build) + +################################################################################################### +# - include paths --------------------------------------------------------------------------------- + +# include_directories(../hornet/include) +# include_directories(include/rmm/detail) +# include_directories(include/rmm) +include_directories(include) +include_directories(test) +# include_directories(mem) +# include_directories(SingleHashGraph) +# include_directories(MultiHashGraph) + +# include_directories(/home/ogreen/hash-graph/moderngpu-oded/src) +# include_directories(/home/ogreen/hash-graph/hornetsnest/externals/cnmem/include) +# include_directories(../externals/moderngpu-oded/src) +# include_directories(../externals/cnmem/include) +include_directories(externals/moderngpu-oded/src) +include_directories(externals/rmm/include) +# include_directories(externals/cnmem/include) + + +################################################################################################### +# - library targets ------------------------------------------------------------------------------- + +file(GLOB_RECURSE CU_SRCS src/*) +# file(GLOB_RECURSE CPP_SRCS ${PROJECT_SOURCE_DIR}/externals/xlib/src/*) + +add_library(alg ${CU_SRCS}) + +################################################################################################### +# - add executables ------------------------------------------------------------------------------- + +# add_executable(sing-hash test/SingleHashGraphTest.cu mem/memory.cpp mem/memory_manager.cpp externals/cnmem/src/cnmem.cpp) +# add_executable(sing-hash test/SingleHashGraphTest.cu mem/memory_manager.cpp) +# add_executable(multi-hash test/MultiHashGraphTest.cu mem/memory.cpp mem/memory_manager.cpp externals/cnmem/src/cnmem.cpp) +add_executable(sing-hash test/SingleHashGraphTest.cu ) +add_executable(multi-hash test/MultiHashGraphTest.cu ) +# add_executable(multi-hash test/MultiHashGraphTest.cu ) + + +# link_directories(externals/rmm) +# target_link_libraries(sing-hash alg cuda rmm) +target_link_libraries(sing-hash alg cuda -fopenmp ) +# target_link_libraries(multi-hash alg cuda rmm -fopenmp ) +target_link_libraries(multi-hash alg cuda -fopenmp ) diff --git a/hash-graph-dehornetify/compiler-util/CMakeLists.txt b/hash-graph-dehornetify/compiler-util/CMakeLists.txt new file mode 100644 index 0000000..fdae560 --- /dev/null +++ b/hash-graph-dehornetify/compiler-util/CMakeLists.txt @@ -0,0 +1,133 @@ +################################################################################################### +# - set build options ----------------------------------------------------------------------------- + +set(CUB_LIBRARY ON) # if set to off, some test cases using CUB will not compile. +set(RMM_LIBRARY OFF) # if set to ON, gpu::allocate invokes RMM_ALLOC, if set to OFF, gpu::allocate invokes cuMalloc (which in turn, invokes cudaMalloc) + +################################################################################################### +# - cmake modules --------------------------------------------------------------------------------- + +include(FeatureSummary) +include(CheckIncludeFiles) +include(CheckLibraryExists) + +################################################################################################### +# - compiler options ------------------------------------------------------------------------------ + +set(CMAKE_CXX_STANDARD 14) +set(CMAKE_CXX_STANDARD_REQUIRED ON) +#set(CMAKE_C_COMPILER $ENV{CC}) +#set(CMAKE_CXX_COMPILER $ENV{CXX}) + +set(CMAKE_CUDA_STANDARD 14) +set(CMAKE_CUDA_STANDARD_REQUIRED ON) +#set(CMAKE_CUDA_SEPARABLE_COMPILATION ON) # no other RAPIDS projects use this + +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) # if on, save output of compile commands to compile_command.json + +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") # for compatibility with Arrow +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") # for compatibility with Arrow +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0") # for compatibility with Arrow + +# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70") + +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --gpu-architecture=compute_70 --gpu-code=sm_70") +# set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --gpu-architecture=compute_60 --gpu-code=sm_60") + +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") + +# set warnings as errors +if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror") +endif(CMAKE_COMPILER_IS_GNUCXX) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror cross-execution-space-call -Xcompiler") + +# set default build type +if(NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE Release) +endif() + +if(CUB_LIBRARY) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DCUB_WRAPPER") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DCUB_WRAPPER") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DCUB_WRAPPER") +endif() + +if(RMM_LIBRARY) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DRMM_WRAPPER") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DRMM_WRAPPER") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -DRMM_WRAPPER") +endif() + +################################################################################################### +# - Find and add different modules and supporting repos ------------------------------------------- + +find_package(OpenMP) +if(OpenMP_CXX_FOUND) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS}") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}") +endif() + +################################################################################################### +# - include paths --------------------------------------------------------------------------------- + +include_directories("${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}") +# include_directories(../primitives) +# include_directories(../externals/cpp-btree-master) +# include_directories(../xlib/include) + +if(CUB_LIBRARY) + include_directories(externals/cub-1.8.0) +endif() + +if(RMM_LIBRARY) + # include_directories(../externals/rmm/include) + # include_directories(../externals/rmm/src) + # include_directories(../externals/rmm/thirdparty/cnmem/include) + include_directories(../externals/rmm/include) + include_directories(../externals/rmm/src) + include_directories(../externals/rmm/thirdparty/cnmem/include) +endif() + +################################################################################################### +# - library paths --------------------------------------------------------------------------------- + +link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the link directories for nvcc + +################################################################################################### +# - library targets ------------------------------------------------------------------------------- + +################################################################################################### +# - build options --------------------------------------------------------------------------------- + +option(USE_NVTX "Build with NVTX support" ON) +if(USE_NVTX) + message(STATUS "Using Nvidia Tools Extension") + find_library(NVTX_LIBRARY nvToolsExt PATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --define-macro USE_NVTX") +endif(USE_NVTX) + +if(CMAKE_BUILD_TYPE MATCHES Debug) + message(STATUS "Building with debugging flags") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -O0") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0") +elseif(CMAKE_BUILD_TYPE MATCHES Release) + message(STATUS "Building with optimization flags") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") +endif(CMAKE_BUILD_TYPE MATCHES Debug) + +if(CMAKE_BUILD_TYPE MATCHES Prof) + message(STATUS "Building with debugging flags") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -O0") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -O0 -pg") +endif(CMAKE_BUILD_TYPE MATCHES Prof) + +################################################################################################### +# - link libraries -------------------------------------------------------------------------------- + +################################################################################################### +# - custom targets -------------------------------------------------------------------------------- + diff --git a/hash-graph-dehornetify/experiments/.ipynb_checkpoints/snmg-hg-checkpoint.ipynb b/hash-graph-dehornetify/experiments/.ipynb_checkpoints/snmg-hg-checkpoint.ipynb new file mode 100644 index 0000000..60d876a --- /dev/null +++ b/hash-graph-dehornetify/experiments/.ipynb_checkpoints/snmg-hg-checkpoint.ipynb @@ -0,0 +1,622 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['strong_scaling', 'weak_scaling']\n", + "['build', 'intersect']\n", + "['noindex_nomanaged', 'index_nomanaged', 'noindex_managed', 'index_managed']\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import sys\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import glob\n", + "import itertools\n", + "\n", + "from collections import defaultdict\n", + "\n", + "# EXPERIMENTS = [\"strong_scaling\", \"weak_scaling\", \"duplicate_keys\"]\n", + "EXPERIMENTS = [\"strong_scaling\", \"weak_scaling\"]\n", + "EXP_TYPES = [\"build\", \"intersect\"]\n", + "MODES = [\"noindex_nomanaged\", \"index_nomanaged\", \"noindex_managed\", \"index_managed\"]\n", + "\n", + "SYSTEMS = [\"dgx2\", \"summit\"]\n", + "\n", + "# data[EXPERIMENT][EXP_TYPE][MODE]\n", + "results = dict()\n", + "\n", + "%matplotlib inline\n", + "from six import iteritems\n", + "from matplotlib.lines import Line2D\n", + "\n", + "fmarkers=Line2D.filled_markers\n", + "\n", + "plt.style.use('ggplot')\n", + "\n", + "print(EXPERIMENTS)\n", + "print(EXP_TYPES)\n", + "print(MODES)\n", + "\n", + "for sys in SYSTEMS:\n", + " results[sys] = dict()\n", + " for exp in EXPERIMENTS:\n", + " results[sys][exp] = dict()\n", + " for exp_type in EXP_TYPES:\n", + " results[sys][exp][exp_type] = dict()\n", + " for mode in MODES:\n", + " results[sys][exp][exp_type][mode] = defaultdict(list)\n", + " # strong_scaling keycount --> tuples of (gpucount, time)\n", + " # weak_scaling keycount / dev --> tuples of (gpucount, time)\n", + " # duplicate_keys keycount --> tuples of (tablesize, time)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def loadStrongScaling(system):\n", + " exp = \"strong_scaling\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " gpucount = int(line_info[1])\n", + "\n", + " if line_info[2] == \"oom\" or line_info[2] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][keycount].append((gpucount, time))\n", + " \n", + " \n", + "def loadWeakScaling(system):\n", + " exp = \"weak_scaling\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " gpucount = int(line_info[1])\n", + " \n", + " key_per_dev = int(keycount / gpucount)\n", + "\n", + " if line_info[2] == \"oom\" or line_info[2] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][key_per_dev].append((gpucount, time))\n", + " \n", + " \n", + "def loadDuplicateKeys(system):\n", + " exp = \"duplicate_keys\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " tablesize = int(line_info[1])\n", + " gpucount = int(line_info[2])\n", + " \n", + " if line_info[3] == \"oom\" or line_info[3] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][keycount].append((tablesize, time))\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "for system in SYSTEMS:\n", + " loadStrongScaling(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "for system in SYSTEMS:\n", + " loadWeakScaling(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'duplicate_keys'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msystem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mSYSTEMS\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mloadDuplicateKeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mloadDuplicateKeys\u001b[0;34m(system)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mtime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mexp_type\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkeycount\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtablesize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'duplicate_keys'" + ] + } + ], + "source": [ + "for system in SYSTEMS:\n", + " loadDuplicateKeys(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9\n", + "16777216 [(1, 0.0126341), (2, 0.0156662), (4, 0.0180244), (8, 0.0218276), (16, 0.0320809)]\n", + "33554432 [(1, 0.0303186), (2, 0.0292536), (4, 0.0320338), (8, 0.0314122), (16, 0.0497111)]\n", + "67108864 [(1, 0.0733604), (2, 0.0617933), (4, 0.0593879), (8, 0.0592865), (16, 0.0660326)]\n", + "134217728 [(1, 0.188676), (2, 0.137769), (4, 0.116816), (8, 0.100187), (16, 0.0940442)]\n", + "268435456 [(1, 0.443171), (2, 0.313036), (4, 0.251006), (8, 0.197405), (16, 0.185187)]\n", + "536870912 [(2, 0.688961), (4, 0.500017), (8, 0.389059), (16, 0.342239)]\n", + "1073741824 [(4, 1.45416), (8, 0.823694), (16, 0.700535)]\n", + "2147483648 [(8, 2.19697), (16, 1.58418)]\n", + "4294967296 [(16, 3.42857)]\n", + "here3?\n" + ] + } + ], + "source": [ + "print(len(results[\"dgx2\"][\"strong_scaling\"][\"build\"][\"noindex_nomanaged\"].items()))\n", + "for k, v in results[\"dgx2\"][\"strong_scaling\"][\"build\"][\"noindex_nomanaged\"].items():\n", + " print(k, v)\n", + "print(\"here3?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def get_cmap(n, name='hsv'):\n", + " '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct \n", + " RGB color; the keyword argument name must be a standard mpl colormap name.'''\n", + " return plt.cm.get_cmap(name, n)\n", + "\n", + "def plotThroughput(system, exp, exp_type, mode):\n", + " fig, axes = plt.subplots(nrows=1,sharex=True)\n", + " # fig.set_size_inches(5,3)\n", + "\n", + " plot_data = results[system][exp][exp_type][mode]\n", + " \n", + " cmap = get_cmap(len(plot_data.items()))\n", + " for i, data in enumerate(plot_data.items()):\n", + " keyc = data[0]\n", + " data_points = data[1]\n", + " x = [i[0] for i in data_points]\n", + " y = [i[1] for i in data_points]\n", + " \n", + " for j in range(len(y)):\n", + " if exp == \"weak_scaling\":\n", + " y[j] = (keyc * x[j]) / y[j]\n", + " else:\n", + " y[j] = keyc / y[j]\n", + " \n", + " label = \"2^\" + str(int(np.log(keyc) / np.log(2)))\n", + " axes.plot(x, y, label=label,\n", + " color=cmap(i),\n", + " marker=fmarkers[i])\n", + " \n", + " axes.set_ylim(ymin=0)\n", + " \n", + " #axes.plot(threads, threads, label=\"Ideal\",color=plt.rcParams['axes.color_cycle'][4])\n", + " #axes.set_yticks(np.arange(0, 5.0, 0.5))\n", + "\n", + " plt.title(system + \" \" + exp + \" \" + exp_type + \" \" + mode, pad=20)\n", + " plt.legend(loc='upper left', ncol=1, fancybox=True, shadow=True, fontsize=11)\n", + " \n", + " axes.set_ylabel(\"Keys / sec.\",fontsize=8) \n", + " axes.set_xlabel(\"GPU Count\",fontsize=8) \n", + " #plt.xticks(fontsize=8)\n", + " #plt.yticks(fontsize=8)\n", + " \n", + " title = system + \"_\" + exp + \"_\" + exp_type + \"_\" + mode\n", + " plt.savefig(title + \".pdf\", format=\"pdf\");\n", + " plt.show()\n", + " \n", + "def plotSpeedup(system, exp, exp_type, mode):\n", + " fig, axes = plt.subplots(nrows=1,sharex=True)\n", + " # fig.set_size_inches(5,3)\n", + "\n", + " plot_data = results[system][exp][exp_type][mode]\n", + " \n", + " cmap = get_cmap(len(plot_data.items()))\n", + " for i, data in enumerate(plot_data.items()):\n", + " keyc = data[0]\n", + " data_points = data[1]\n", + " x = [i[0] for i in data_points]\n", + " y = [i[1] for i in data_points]\n", + " \n", + " # compute keys / sec throughput (higher is better)\n", + " for j in range(len(y)):\n", + " if exp == \"weak_scaling\":\n", + " y[j] = (keyc * x[j]) / y[j]\n", + " else:\n", + " y[j] = keyc / y[j]\n", + " \n", + " # compute speedup of throughput\n", + " for j in range(len(y)):\n", + " if j == 0:\n", + " continue\n", + " y[j] = y[j] / y[0]\n", + " y[0] = 1.0\n", + " \n", + " label = \"2^\" + str(int(np.log(keyc) / np.log(2)))\n", + " axes.plot(x, y, label=label,\n", + " color=cmap(i),\n", + " marker=fmarkers[i])\n", + " \n", + " axes.set_ylim(ymin=0)\n", + " \n", + " #axes.plot(threads, threads, label=\"Ideal\",color=plt.rcParams['axes.color_cycle'][4])\n", + " #axes.set_yticks(np.arange(0, 5.0, 0.5))\n", + "\n", + " plt.title(system + \" \" + exp + \" \" + exp_type + \" \" + mode, pad=20)\n", + " plt.legend(loc='upper left', ncol=1, fancybox=True, shadow=True, fontsize=11)\n", + " \n", + " axes.set_ylabel(\"Speedup\",fontsize=8) \n", + " axes.set_xlabel(\"GPU Count\",fontsize=8) \n", + " #plt.xticks(fontsize=8)\n", + " #plt.yticks(fontsize=8)\n", + " \n", + " # plt.savefig(mytitle + \".pdf\", format=\"pdf\");\n", + " plt.show()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unmanaged Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd3wc1bX4v2e2Slr15op7NxgwxhgCNt30vqYlgeRBIEDgkfcDkhDgwYPwEiDwqCEQignBCzGBUAyE0IvBFINlbOMKbuptpe1zf3/MaLVadWnXkuz5+iPvzsydO2dm79xz7zn3nitKKSwsLCws9my0gRbAwsLCwmLgsZSBhYWFhYWlDCwsLCwsLGVgYWFhYYGlDCwsLCwssJSBhYWFhQVDXBmIyE0isn6g5bDoO8m/oYhcICLRgbh2P/JZICJKREaZ22PN7R90c95mEbm+v9fvgXwpe6bmfZ2firwsdh09KWtDWhmkAxHJE5G7RaRMRJpEZKeI/F1EpvYz30dE5O0Uibk7swQYOdBC9JIPgeHA9oEWpBOG4jO12MVYyqA9w4FxwA3A/sCJgAf4t4jkp/viIuJM9zUGM0qpgFKqfKDl6A1KqbBSaqdSSh9oWTpiKD5Ti13PkFEGIuISkQdFpF5EakXkQcCVlEYTkdtEpFJE/CLyjIhc1dJFFoOXReRTEXEknPMvEflAROxKqW+UUqcopf6ulFqrlFoBnIuhJDrt9ouIQ0TuEpGtIhISkR0i8ox57Cbgp8B8s5utROQC85gSkV+IyNMiUg/81dw/xZTVb/79U0QmJlzvAhGJisghIvK5iDSb9zU7Sa6jRORrEQmKyFci0iJDj7r6InKKiHxh5l8nIp+IyH4JxyeIyLMiUmOm+UpETjSP5YvIUyLynYgERGStiPxSRKSL67UxaeyK+xSRc0Vko3nuv0RkXMKxdqYkEfmBmfdYc7uNmaiTa8wSkQ/Na6wTEW8P5OrpvR8kIu+az7jWLEslyfn0Id/DzWfZ8kwP70DGUhF5XIx3rtF8jw5LOH6tWW7GJuy7UUSqu3peCWlvEpH1ZjlcI0Zv/S0RmZCU7ngR+UyMd69CRB4QkayE44+bv+0VYryjfjF66w4RuUREtpjP7mFJaJCJyNEi8rZZvutF5B0ROTDp2kpEfi4ii81n8L2IXJOU5lwRWW7mUSXGuz05Kc1+IvJxQhk5U5LMOyLiEZF7RGSb+bt9ISKnJ+XT67IGgFJqSPwBfwQqgFOAqcAdQAOwPiHN1YAf+CEwydyuAaIJaYqBbcAd5vZvzDR7dXHt8YAC5nWR5mpgK7AA2AuYA1xlHvNgVPIfAsPMvwzzmAKqgSuACcBkIAPYArwJzDb/3gLWA07zvAsAHXgXONR8Jq8DGwC7mWYk0Aw8AkwHjgQ+M695fg+e+TAgDFyD0VuahqEY9044Xg78C0NRTjB/n+MTjl+L0cMaB5xv/j4XJlzjpqTf8IKk3ytt92leuwl43/y95gDLgZWAdCSfue8HZt5jze0F5vYoc3usuf0DczsDo8y9AswC5gGfmjJf34V8Pbn3YRjvwdPA3qZsXwHv9fOZjjCfzWPmMz3azDf+TM37Wg38HTgAmIjxPoWAaWYaAV4DPgLs5vUiwMk9fO9bfqNlGO/BLOAL4J2ENPsAUYw6YhpwHPAdsDghzeNAPfCEmeZkIGj+Jk+a93giEAAuTTjvNOAsjPdyBkYZqwEKE9IojPfgIox34BfmvsMT0lxo5j8B2A94EfiW1vc5E9gB/NO8n4Mw6ot4GTGf5VvA2+bvPB64GOMdPbI/ZU0pNTSUAZBl/nAXJe1fQduKZBtwS1KaZ0h4Ecx9h5uF50azYJ7exbVtZkH8BNC6SHcP8G/MSqSD448Ab3ewXwGPJu37qfnjFSXsKzUL6o8SXmgF7J+Q5iBz3xRz+1ZgM2BLSLOQniuD/Uio9Do4fguwE8jqxW95D/BGwvZNdK8M0nKf5rUVMDFh32Rz31EdyWfu660y+A8MJZifkMdMM013yqC7e78FoxHiTEgzy0xzWD+e6f9gNEjsCWlOpK0yuMC8tj1J7n8Ddydsl2BUdA8A3wP39KK83ITxrhYn7DsbQ5m5ze3FwCdJ551iphljbj+O0ZhMfE4vA1WAK2HfC8BzXcijAbXAeUnv8P8lpVsD/K6LfArM8w4xty8yy0huQpqpiWXELGfBxDTm/r8A/+hPWVNKDRkz0QQMk9CHSfvfb/kiIjkYrZmPk9J8lJyZUuot4E6MgvaIUmppRxcVERtGq2EyhsLoyib8GEbLbL2IPCQiZ0jP7f+fJG3PAFYrpaoSZC4H1prH4rsxWrEtbDM/S83P6cCnSqlYQpp2z6MLvsJo1a0SkedF5EoRGZ1wfDbwoVKqqaOTxTDBXSciX5pdYz9wCTCmFzJAeu+zUikVNwMppdZhVBDTeyljV0wHvlFK1SZcZxVGS7U7urv3GcDHSqlwQt4rzbwTy0pv852OUcEmjkJ6n7bMweiZ1EmrOdOP0fqflCBPBfAT4FKMXvA19I7tSqnKJFkFQ8mAcZ/vJp3zjpkm8Xf8JvE5YTRk1iqlQkn7Ek1s40zzz3oRacDoheXSvgx/mbS9jdZniYjsa75Dm0SkEaPnQkI+LWUkXiaUUmuAuoQ85wBOYFvS8z6f1ufd57Jm7y7BIKHFxqz6mcZIaFTyhwAxYKKIiDJVaEIaJ/A3jFbWAqXU1q7yVEp9KYat+WiMnsc9wC0icpBSqqEbkTqqTDu6D0narydVgC3HtA72dZVvhyilYiJyHEYhPAo4A7hdRM5SSr3Ug/x+CfwKw4T2OdAI/CdwQk9lMEnrfXZAok9DT9oGcPQhv77K05d7725/T/LtSObkbQ34BsOUkkxz0vZ8jPetFKMyrehCtmTCSdt9fQaRDo51tC8x35cwGgeXYfRqwhhKMbmh15GMGoCIZGKY4d7HUIo7zTRlSfl0V0Y0jEp9TgfHWq7f57I2VHoG6zFu9pCk/Qe3fDE16nYMG1kiB3WQ300Yrf1DMFq31yYeNH+8FzG07GFKqe+SM+gIpZRfKfW8UuoXGDbUaRgvAab8tp7kg1FIZohIUYJMpabMZT3MAwx77hxT+bWQ/Hy6RBl8opS6TSl1GEaL60Lz8GfAIYmOuiQOA5YppR5VSn1htsAndZK2P/TnPosTnZGmU68Qo5IDo9IqScp7/17KVwZMF5G8hOvMwKgU+0sZMC/J6TnLzLs3ZaWjfOcm3XfyAIoVGHbrBqXU+qS/+DBbETkK+C8MO/0W4AmRzgcR9FHW+Un75mNUiqv7mqmIFGLUAbcrpV5TSq3GMNOUdH1mO6Zh+Cp/o5R6Syn1DZBP20bGamCaiMTLhIhMAfIS0qwwt90dPO+WOqrPZW1IKAPTDPEQ8D8icrIYI21+j2FTS+RO4CoROU9EJonIVcAxJGhKEZmP0Vr9sVJqOYat7mYROcg8no1hGpkCLAJ0ERlm/mV0JqOI/D/zujPMHsJPMFpC68wkm4Cp5vEiEXF1lheGM7ASWCIi+4sxyuMZjK7nku6fWJwHMFpiD4rINDFGg9xqHutJD+pgEfmtiMwVkb1E5EgM51bLC/YARhl6QYyRKeNE5ESzNwGGWWuBGKNSJovI/wBzeyF/T+nPfTYDj4nIbBE5AMPB+DWGUxwMh10mRi9vgoichdFK7A1PY/SKnjJHehyEYecN9DKfjrgPyAEeF5GZYkx0Wwy8r5R6rx/5PohRgT1sPtMjaX2mLfwVo1y/LCLHiDHZbq6I/EpETgUQkWJTnjuUUq8A52A04q7uh2zJ/AHYX4zRfFNFZCFwL/DXnjbkOqEW4z28yCy/8zCsBb393bZgONWvMMvQkRiWg8Sy+VcMW/+TIrKPiMwFHjWv1ZLu3xjlcqmInCYi481ye4WIXGSm6XNZGxLKwOQ64B+YziIMDXl/Upq7MV6OezBGHByEoSCCACJSADyF4cB6FUAp9XcMe//fTK08G6MFNBbDproj4W9RF/I1YBTwjzAqk9OAM5RSa83jj2J49T/EKGDndJaRUiqAocRCGLbQdzBMSQuTbJ5dopTahtEaOxjDpnkP0DJMLdiDLOoxWtgvYIx8+AtGob3FzH8HxrNqxBi9UIZRYbS0eG4xZX8B47nkA//XU/l7Sj/vcwfwMMaImA8wXprTWsyG5u93EYbTchWGkv91L+VrBo7H6HF8gvEMW0bH9QvTl3QMMAqjfL1kynlGP/PdBpwEHEjrM706KU0QowW+AuMdWgcsNc/ZYrb+H8eoDH9rnrMJw290m6l8+41S6iuM338+xju7GMM5fEk/89UxRhJNwPCfPY5Rx+zoZT5VGHb9ozHekTswekp6QpqWMlKK8Ts+ZV7Lj1mGzTJ5MsYzvgvDSf0yhtl1Q1I+vS5rLcPndltE5C/ALKXU7G4T7wGIMQb8HWAfpdTXAy1PuthT7tNi90VExmCMkjtZKfXPdF9vqDiQe4SIjMBokb+FYaI5CfgRcPlAyjWQiMilGK2l7Rj2zz8Cy3e3CnJPuU+L3RcxJkhuwzC9jQF+j9Gren1XXH8omYl6QgyjW/c+hpnoRxgTSB4aUKkGljEY/oa1GHbg9zBH84jIrxOHqCX/DaDMfaHT+7QYnJh+qE7Ln4icN9Ay7mIKMeYjrcHwTXyHMYAl1OVZKWK3NxNZdI7pQyno7Hji+HsLi1QjInYM31xnlCulGneROHs8ljKwsLCwsNjtzEQWFhYWFn3AUgYWFhYWFpYysLCwsLCwlIGFhYWFBUNwnoHX6/0LRijdCp/PN7ObtGMwZs0WY8QgP9/n83UZcM7CwsJiT2Qo9gwex4hV3xPuAJ70+Xz7ADcDv0uXUBYWFhZDmSHXM/D5fO96vd6xifu8Xu8EjDhFxRiBxy7y+XxrMGai/qeZ7C2M2EYWFhYWFkkMxZ5BRzwMXOHz+WZjBIB6wNy/ktaAXacB2V6vt3AA5LOwsLAY1Ay5nkEyXq/XgxGt8lmvN77uc0t46P8C7vN6vRdgRP/chrGEnoWFhYVFAkNeGWD0bup8Pt++yQd8Pt924HSIK40zfD5fT5YatLCwsNijGPJmIp/P1wBs8nq9ZwF4vV7xer2zzO9FXq+35R5/hTGyyMLCwsIiiSEXm8jr9f4NWAAUAeXAjRgrAD0IDMdYn/YZn893s9frPRNjBJHCMBNd5vP5dkkEQAsLC4uhxJBTBhYWFhYWqWfIm4ksLCwsLPpPWhzIPZkl7PV6F2Cs8ekAqnw+3/weZG11YywsLCz6hnR1MF2jiR7HWJj+yY4Oer3ePIy5AAt9Pt93Xq+3pKcZb9++PSUCpoKioiKqqqoGWoxOGezyweCXcbDLB5aMqWCwywf9k3HEiBHdpkmLmcjn872LEQuoM84Flvp8vu/M9BXpkMPCwsLComcM1DyDyYDD6/W+DWQD9/h8vs56ERcDFwP4fD6Kiop2mZDdYbfbB5U8yQx2+WDwyzjY5QNLxlQw2OWD9Ms4UMrADswGjgQygI+8Xu/HPp9vXXJCn8/3MEa4CQA1mLpyg71rOdjlg8Ev42CXDywZU8Fglu+DtU5++EAhoYjgcjhY/PNqDpkS7lUeA2Ym6gFbgWU+n6/J5/NVYcwBmDVAslhYWFgMSj5Y6+THDxYQihi+31BE+PGDBXyw1pnyaw1Uz+AFjJhBdsAJzAX+2NfMlFI0NTWxq+dMBAIBotHBF+pIRMjKyhpoMSwsLPrJD+8vJBRtOwgoENb44QOFbLxnR0qvla6hpfFZwl6vdyvGLGEHgM/ne8jn833j9XqXAV8BOvCIz+db1dfrNTU14XK5cDgc/Rd+NyAUCrFt2zZLIVhYDAGUgooGjfU77awvt7PB/Fxfbm+nCFpo6SmkkrQoA5/Pd04P0vwB+EMqrqeUshRBAi6Xi2AwyLPPPsuhhx5KTk7OQItkYbHHE47Clkqzom+p+M3vjcFWi32mS2dCSZQDJ4R5sc5GTG9f8bscqbeC7A5RSy06wGazYbPZ+OKLL5g/vyfz+SwsLFJBbZOwfmdLRe9gQ4WN9TsdbKlqW7EPy4sxsTTKGXMDTCyNMGFYlImlUYbn6YiZbPydbiK17ZWBI99SBha9QEQGpU/DwmKoE9Nha42tvWlnp51qvy2ezmlXjCuOMnVkhBP3DzCxNMrEYVEmlEbxuLuv0CO1HY/x6Wx/f9hjlUHG0qVk3347tu3biY0YQeN11xE4/fSU5F1TU8OVV17J5s2bcblcjB07lv/93/+lsLB1kbXbbruNP/3pT1x55ZVcffXV8f26rvOzn/2MNWvW4HK5KCoq4vbbb2fs2LFtrnHXXXdx55138uabbzJ16tSUyG1hYdGWpqCwsSLJtLPTzsaKtvb8Ao/Ryj92VpAJpUZlP7E0yujCGHZbFxfoAqWn6CZ6yB6pDDKWLiX3mmvQAgEA7Nu2kXvNNQApUQgiwqWXXsrBBx8MwC233MJtt93GnXfeCcAdd9zBypUr+fDDD7niiitwuVxcdtll8fPPOussjjrqKDRN47HHHuOaa67B5/PFj3/99dd8/vnnjBw5st+yWljs6SgF22rgkzXOuA1/g+nA3V7bWkVqotiryKj0508PtWnlF3j6V3PHwlC1zk55mYOKVQ7js2zX+kF3O2WQc8MNOFav7jKN87PPkHDbSRtaIEDeL39J5tNPd3peZPp0Gm6+uVsZ8vPz44oAYP/99+fJJ40J1vfffz8bNmxg8eLFOJ1Onn76aa644gr+/Oc/c9FFF6FpGsccc0z83NmzZ/PII4/Et0OhEL/+9a+5//77Oeuss7qVxcLCwiAUgc3JDlzzsymkYSyRAh63zsTSKAdNCjOxtJmJpi1/bHEUVwrq51CDULHaQXlLpb/KQdW3dnRzhJAjU6dkepSZZwb4/PFdNyJwt1MGPSLcyey9zvb3A13XefLJJ+MVfGIPAMDtdvPnP/+50/Mfe+wxjj766Pj2HXfcwRlnnMFee+2VclktLHYHavxam9Z9S8X/XZUNXbWadkbkG5X8onnNzJrgZpinjomlUUpzWx24/UEp8O/UKF9ltPLLywwFUP9da7WbVRyjZGaE8UcEKZkRoXRGhPxxMcR0CVjKoB/0pOVecuCB2Ldta7c/NnIk1c89l1J5rr/+erKysrjwwgt7fe6DDz7It99+y7PPPgvAihUr+PLLL/n1r3+dUhktLIYa0Rh8X21rNy5//U47tU2tRnqXXTG+JMrM0RFOPSAQb+WPL4mSleDALSpyUlXV98agHoOajfYEE4+d8lUOAjWtsuSPizJsnwizzm02Kv6ZETwlXZuXsopjNFW2dzpkFcf6LGtn7HbKoCc0XnddG58BgJ6RQeN116X0OjfffDObNm3i8ccfR9N65/1/7LHHeP7551myZAkZGRkAfPzxx2zYsIGDDjoIgB07dnDeeedx1113WcNHLXZL/EGJ2/FbKvwNO+1sqrQTTnDgFmUbtvzj9wvGbfkTS6OMLIhhS/HAm0hAqPzGbrT2zcq/8hs7UXOugM2pKJoSYdKxZmt/ZpTiaRFcnt4PB738y/L493THT9ojlUGLkzhdo4kAbr/9dr766isWL16My+Xq1blPPfUUTz31FD6fj/z8/Pj+yy+/nMsvvzy+PXfuXJ544glrNJHFkEYp2F6rsaHcYZh3Klor/511ra1im6YYUxRj4rAIR84Mxp23E0qj5GelJxRNc41GxSp73KFbvspBzQY7ypwv4MrVKZ0eYd/zmymdGaFkRoTCSVFsQ3AO7B6pDMBQCKms/BNZu3Yt9957L+PHj+fkk08GYK+99uLRRx/t9ly/3891113HqFGjOPvsswFjRvFLL72UFlktLHYVwQhsShimGR+5U2GnOdTafM9260wcFuXQqeaIHbPCH1McxZmmGkspqPvO1mrfNz8bd7Qqo+wRUUpnRphyYpDSmYZ9P2dULCX+hcHAHqsM0smUKVPY1oFPoid4PB62bt3ao7TLly/v0zUsLNKFUqYDN9G0s9POpioHmyuGoxIcuKMKDHPOgROb25h2inNS48DtjFgEqs1hnOWrHFSsdlC52kGwvhQA0RSFk6KMnheidEYk7tjNKNi9V921lIGFhUWvicZgS5UtHnJhfUIrv66ptZXvduhMKI0xZ4LOabObmTgsYpp2YmQ401+5hhqFym9abPuGg7dqnYNY2BzGmaFTPC3Kfot0ciY0UjozQtGUCI6MtIvWI44pymJ2OMpV/jDpXnrHUgYWFhad0hBo68BtGbmzudJOJNbafC/JiTGhNMpJCSEXJpZGGZEfQ9NanJ+NaZNTKfCXa1SUtTXz1G5ureIyC41hnAf8h5+SmRFKZ0TJHxdFs7XI15w2+fpKmdPGtw6NJVlOfqxHuUQTSvX0KFFLGVhY7OHoOmyvtbW15ZsRNcvrW23mdk0xttio6I/dJxgPrDahNEpu5q4zoegxqN1ka+PUrShz0FzVKmve2Cgl0yPM9DYbpp6ZETyl6TU/pYoYUKcJtZohbNgU+jFN54lSD4uawlzlD6dcKVjKwMJiDyEQxoiz02ZCloMN5TaCkVbTTm6G4cBdEA+5YJh29iqK4ehjnJ2+EglA1VpHmzANld/YiTQb8moORdHkKBOODMXH7pdMi+DKGXj7vgICArWaUCtCjVnBJ/8ZFb8W367XOtZYxgRl4cksB89lbubbncNTKq+lDCwsdiOUgqpGrd24/PXldrbW2OIOXBHF6IIYE4dFOXhyKG7WmTgsSqFnYFrQgVpp09IvL3NQvd6OMs1Rzmyd0hkR9jmnOe7YLZocxZb6FSDbEQPqNaFGOq7Q21fuxl+oiwfp0RX5CX9jorrxXSnydZ08XXFFfmY8vaai6MTQeJ1m7Umg+9GJvcFSBhYWQ5BIzFwoZaedHX6NrzbmxSv++kBrKz/DacTZOWB8mEXzzMBqJVHGlUTJ2AWVaEcoBQ3bEodxGhO4Gra1VkeeYTFKZ0aYtLB1GGfu6NYwDf0hILSr1CNajO89zk4qd416AdVJxW5TbSv1vaI6s+KVesd/ebqis8cfIUqd5qdO8wPTERVFEUPxGshidGr7/xA6wFIGFhaDmLpmaRtJ02zlb6m0E01cKCVXmDAsyqlzEmPmRxiep9PLye8pJRaBmvV2Nm/R2PBxjuHgXe0gWGcIJZqiYEKUkXPC7H9hEyUzopTOiJBZ2H0U0L601us0IdhhpR6DHDdZevuKPV/FEvbp7Sr2bAUd5xijXpqp1Rqp1Rqp0xr53tFIneY3t81Pabvt1wIJuTyEkjLgKVSalEAL6VoD+S/AiUCFz+eb2UW6OcDHwCKfz5faoEBdULTxGJyhsnb7w64ZVI1/vd/5p3M9g7lz5+JyueKzmn/zm9+wYMGCfstsMXDo5kIpiYHVWr5XNrQa6R02I87OlBFRTtgvGI+Zf+D0XMLN6QtT0FPCTUY0zpa4PBVlDirXOoiFjKrS7s6ieFqEqScG4vb9oqlRnJmqTWv9M02o1ezdVO59a63nddJan5CXj15VTUexAnR0GiUQr9BrtUY22f183kFFnvhZL00o6dh3oSmNXD2LPOUhX8+mWM9jcnQ0ebqx3fIH63g5YwX/cvtR2AlL+harSlfP4HHgPuDJzhJ4vV4b8L/Aa2mSoVPCGbNxhL5FaA1MpXASzjggJfmnez2Dhx9+2ApBMQRpDgkbzSUQEydlbaqwE0xY4DwvS2fSsAhHzTTi7LSM2ulsoZScTNjVoyL9FVpb+/4qB7WbbWD6JJwFOp6ZEQr/oxk1K4bMdVI+uokyp/B+vFJ3Uqu5umitG6Sqta5QNEmwTaVeoflZa2/kPU1na055h632Os2PLp33VHL0TPL17HhFPiY6jHzdQ77KJk/PJl/3mJ+taXJUJho967KdEvwBFVotDxX/kye0V9DR06IU0qIMfD7fu16vd2w3ya4A/g7MSeW1c3begCPU9XoGqDAQSdoZxRFaReGWMzs9LeKaTsOwgV3PwGJwoxRUNGjtl0Mst7Otpv1CKRNKoxw2ta0Dt78LpaSKgEC1ErZ9Z2f7KgfVZQ4av3YQXmWH8taKLDg2Ru2+OjsvCLF1X53K/RX+kYm1sQZEsSkneQkV9uiozj5d2NVbbOsdtdYDEqJWGttU3FuTTTDSdrtO8xPpohLNynS3VtgqmxGRQnO7bUUeb70rD7m6BzvpH2JVoufzf7GruaTyJO72PMsK55qUX2NAfAZer3ckcBpwBN0oA6/XezFwMYDP56OoqP08vEAg0G5fl4iTmK0EW6wCQaEQYrZikNR71FK9ngEQD1Y3Z84crrvuOnJzc9udp2kamqbh8Xg6fGaDBbvdPijle3uVcMrvjBa72zGcF34VZcHM1i5/OAobdsK67cLabeaf+b0h0Nom9bgVU0Yq5s9QTBkZZcoIxZSRMGGYMhdK0QCn+dc3unuGMRR1QDVQI6rTz9oQBFdpaCsF9xca+SttFK3UcPqN+4nZFTXTdSoXxmiYFSG6XwzZR5GbB4VKmA4cChQojcIYFCihEOOz1G4nKxpFWjSEADYIEaaGBmqkwfikgSppYC0N1LbsSzjW8j0onYebzlAuCskhX+VQSA4jKaFA5VCg5xiftH4WqlzyyabEXoCWEAW1Rb5dUM/3GLvdzvSCSTyMGcI+xa/NQDmQ7wau9fl8Ma/X22VCn8/3MPCwuak6CuGauOh7T1ruAFqknNIN80CFQFxUjV+Gbi/pofg9J5XrGQAsXbqUkSNHEgqFuPHGG7n++uu59957252r6zq6ruP3+9Ma9ra/pDssb1/4YK2THz9YEDfdBCPCibfaOW5WkEBE4gulxBIcuMPzjGiaZxwYiUfTnDgsyrBOFkpprIe+zMcNQDv7eSQnm++DTZ3a1+tF2tnWXbVQtNJG6ecaI77UKPrCxog1WrxCVB6Fbe8omeeGyJ1pTOAaOTlCkSOptR4EdhojYOo1P7Wa3zDBSCMbND8rzFZ5wBZhZ6yqXau9WQt2eq9OZTda4jGjJT5KL2JvfRx5qmPTS57uIU/3kNFhX6IrFFqRDLpymEx/3pURI0Z0m2aglMEBwDOmIigCjvd6vVGfz/ePXSWA7iilKXcRWXWLacpdlBZFkOr1DID4uscul4sf/1M1gKMAACAASURBVPjHfVIyFl2z6IFCVKRt5RmJCS9+7mbaiCgzRkU4ZXYgXumPL4nicfdukpMO1AttJht1+ydCsMMJScZImMxEE4tSjIro5McUed8JnpU2HF/Z0L+yE1xlI5iw2panNGY4dA+PUDQzRMbMBvRxNdTZW80s32l+ViZU5MnO0katc6eFTWkUkEOu3XCYjogVMl0f22GF3mJ+ydezyVCu1p6ERdoZEGXg8/nGtXz3er2PAy/tSkXQgr/oKhyhdfiLr0p53ulYz6C5uZloNEpOTg5KKV544QVmzJiRatH3aNbvtLdTBK0I/7q+st3eALC9pxV6F631FjSlWm3rSjEypjMz0rldfUJePqqqGkcUqje0rLZlZ3uZjcoyJ+Fau9ELEYVtYj36AeWEf7qV+lmbqNr3W6qGb4+36LsaASNKyFVZ8cq7SM9lYnSU4SxtqdSVp11rPVtlUlxUPOhb3oOWmCLr4SCOBzaQdZmTpovcYEu9kkzX0NK/AQuAIq/XuxW4EXAA+Hy+h9Jxzb6gO0qpHvv3lOebrvUMKisrueiii9B1nVgsxqRJk7jttttSLv+eyIZyG3e/ks0/VmRgBBJo/7JpDsXP8jN62Fo36LC13mGlrscdpjmK+DiTlhEwLaNfas2hjBWBIGu/cdG0Kge9rBj1RRHOshHYgobvIeoKUb33BirPXEflvt9Sud9aqvfeQMRj+Nc6GwFjmGDaO0vzdA+5KqvHI2AsUoNtY4z8S5qwb4whAci+I0jG0gi1D2URG59ah4YoNfAxPHqB2r59e7udjY2NZGdnD4A4g5dNmzaxbt067HY7Rx555ECL0ykD7TPYVGHj7lezWfpJBi6H4sL5TTww2w5/dEMooZJ3KfhNExOmxbqcWZo8EsadcK0AoU7HpHc0+ajlu70ym+IvJ1H8xRSKvpxE8ReTyV+3F6KMijmc34h/1hYis7Yj+1TimllH7qQgeVpWa4t9AEbAJDLQv3N3DFb5SmfWodUpEjtrSgM9Xyj/Kq/H+Zg+gy67E9YMZIs9ki1VNu55NZvnlmfgsMFFRzRx1rFNPDfCDh6B64Nws9uIDuZQ8Jt/wz538O72RwkT6bAy/87mZ6UjsTL3x1v0dZq/yxEwbt1JXiybERsmMOyLOUz6cgKelWNwfDUC2dHa0HGNDpA/I8Cwk6oYPR1GzVSM2yef6uoCoKA1w14OsLMYJCiFfY2Oe1kY92sRbLXtG+uiQ3RK6pW5pQws9ii2Vtu4Z5kH30eZ2DS4cEETpy1swjfSwQmZmfHZJzIzhPI1A8tAngIzFMCkYed2OQLGoexxG3qens2YaCmz9InmBKQEk0swB9s3JUS/LqTp62yqy9xUlDkI+80wDTYjGmfJwRFKZtQb0TinR8jI76BysHysQ5uYwrkiintZBPdrEexbdJRAZLaN5lMcuF+PkBihQs+C5rNTPwzeUgYWewTbajTufS2bZz7MRAR+dFgTJxzXzDOj7ZySkYkAxwXqGR17iwc9+6Czqo0SaOG85qPbm17is009ZCp3uxEwoQYjTEP5qtZQzKu/taObjmpHpk7J9CgzzggYQdlmRiiaHMHuxmJ3JaBwvWdU/u43ItiqFcoJoR/Y8V/mJni0A71EQxoU7rfqIZDQCLAJwaMtZWBh0St21Gnc91o2T3+QiVJw7iHNHH1CE8/s5eBMdwZOFHtHVtEoT/Bi5pcATI6MIkN38Y3TDzgIS+ts9ZsaOh/KqxQ07kgI02AqgPqEYZxZxcZqW+OPCMbX1s0bG0MbRJObLNKD1Oq43zQUgOsto7Wv5wjBIx0Ej3UQOtyB8rRtSKgcYec3hm8g3X4NSxlY7JaU12vc/5qHp97PIqbDooObWXBiM38ba+d8dyYOFcKjXqBRW8yXjgYODE/jvPoLWRg8kBf33oemShvzk/JsKq2Gz0OAsdpWzUZ7fMEVI0aPnUBNa62ePy7KsH0izDq3OR6YzVMyOEJNWOwabNt03K+FcS+L4Pw4isQgNkwIeF0EFzoIHWQH5+Cw81nKwGK3orJB4/7XPSx+L4tIDM46qJk5Jzfy5wkR/urIR1QDsBh4mQPDEzkueB5HBw+gSG8dmdFU2XEzPau8kNeubYqvthUNGvZ9m1NRNCXCpGPN1v7MKMXTIrg8Q2qknkUqSHIAO7+OARCZrOH/uZvgQgeRfWzQxXDkgWKPVgbNWjnv5P+c+bUPkqmnbgZyOkNYB4NBbrrpJt577z3cbjezZ8/m97//fcpkH6pUN2o88IaHx9/JJBwVTj6oEfeZZSwbl8cz2migCpf+CEcHKzgxuD+Hhx7AozK6zTeZb/6ZQen0CPue32w4dWdEKJwUxeZI/T1ZDBG6cADXX59B8BgHsQmD3w64RyuDlZ67KXcuZ6XnbuY1pG7yVjpDWN966624XC7ef/99RITKyvYzYvckavzCn/7l4S/vZBEMC/vM20Djok/5x14zQfZGU+XMDb3Gpf4M5odOw0n/au0ry3Zao3cs2jqAX49gq+nYATyU2O2UwfKcG6hxdBPCGogRpsr5OYhibdZiqh2rsHUTObIgMp25DQMXwrqpqYnnnnuOFStWIGaNVFxc3K08uyO1TcLDb3r481uZBMIa+Yd8TPM5K/hy5NHASeTrNVzQuJFf+AtwMi9l17UUwZ5LXxzAQ4ndThn0lCbb1oQtRZNtKzmx8Sm/TipDWG/evJn8/HzuuusuPvzwQ7Kysrjmmms48MADUy73YKWuGW59O8Jzbw4jHHTAIW/Boo34Rx0LsjeTIgH+X2MTxwXtaH2I8fvl4szuE1nsMQwlB3B/2e2UQU9a7s1aOX8vnUd8jrcowrZ65lc9kFLfAaQ2hHUsFmPLli3MnDmT3/72t3z++edccMEFfPDBB7t1OI4oMd6KruOed2x8+eoBqGYPzHuHYWc10Tj2QJq0uewXivILfxMLQrE+x7n8+D4P7/wuB5tTEQu3zyWrONa/G7EY/AxhB3B/2e2UQU9Y6bkbRduRHgo95b6DVIewHjVqFHa7nVNPPRUwzE8FBQVs3LiRWbNmpUzuwUCAEO+6V/Ki+oJlb44h+OIZ4M+l6IBVTDxTKJu8Hzs1jQVBQwnMDfe9olYK3vldNsvvz2baKc2ccHcdNtNiOFhj1likkJhC3g+Qs6R50DqAtfJK7IsuRbv3FvSS9CwGtUcqgwrnZ+hJcWJ0CVPhXJGya6QjhHVBQQEHH3ww7777LvPnz2fDhg1UVVXFRxoNderEz7/cK1jmXs5b+lqCy05Cnv8lyp/D9H13MumMGG/MGMPHmnBcIMIV/mZmRfo3bl+PwRu/zuXLp7LY9/wmjr6t3poAtifQzgFch30QO4A9dz+CfPgpnj/+mYbf/Sot17CilqaBtWvXcsQRRzB+/HjcbiOmQG9CWE+dOpVRo0bF76klhDXAli1b+OUvf0ltbS12u51rr72WI444ol0+QyVqaahI8bfmZSzLWM5HzjKiIQfZL/+Q8D/OJtSYxUEzAxR4w7y5t0YEODUQ4XJ/mCnR/k/eioXhpSvzWfNiBgdd3shh1zW2cxAPhZ6BJWPPkFod97+Myj/ZAew4q4Cq2cFB5wAePn4eEmof4FC5nOzY+FGP87Gilg4QU6ZMYdu2bX061+PxsHXr1k6Pjxkzhueee66vog0K1tu38qp7Oa+5P+EL57fghHH+ccx94RbK/vkD6hqdHDgjRNaiet7dW0NDw9sc4ef+EGNjqWm8RALCPy7OZ+O/3Sz4TQNzf+5PSb4Wgwvb1pjR+l8Wwbm8cwdwUVE2qio00OK2o/yjF8m5+W7cr72NFgiiZ7gJLjychhtSvyCXpQws0o6OzkrHBpa5l/OqezkbHIai3Dc8kRuaL6X2hQUseXUUHzTY2G9aCPfZDXy8j+BSGhc2hfmZP8wIPXU92FCD8NwFBWz9xMmx/1vHvud3vmSjxRBDKezfJCiAVUPbAayXFqOys5BgCOV2IcEQenZWWvwGljKwSAsRonzkLGNZhtED2GmrwaY05oVn8pO641nQeCBvvz+a+1/PZXutsPeUMKP+s5nP9xOydeFyf5iLmsIUplAJADRXa/jOLaByjYOT769l2imdh6O2GCLEFM5PE2YAfzf4HMD9QauqoelHZ+K6/CeE7vsLtor0mNssZWCRMpolyNuuL3nVvZw33Suo15pw604OD+3HwuBcjgzOJitihJE+c1k2O+pszJimk3dFE1/vDwUxxTUNIS5oCpObBldWwzaNJecU0rDNxumP1TDhiMFnFrDoIaYDOGNZBNcbCTOAD3Xgv8J0ABcPHgdwf6h95A7A8Ls03HZd2q6TrjWQ/wKcCFT4fL6ZHRw/D7jW3PQDl/p8vpXpkMUivdRII2+4P2WZeznvulcSlDB5uodjggdyXHAuh4VmkaFcRGLg+yiTe5Z52FZjZ+LEMBOvCFC2PwzTdW6qD3Nec5jMNI1nqNlgY8k5hQQbNLxP1zB6buerjlkMTqQmYQbw27vfDOCBJl09g8eB+4AnOzm+CZjv8/lqvV7vccDDwNw0yWKRYrbZKlnm/oRX3cv5xLmamOiMiBZxbtNRLAzOZW54enyN3UgMnlmewT2vZvNdtZ2x4yOM/nkT62cr9orp3B9zcFxFA70bfNs7ylfZ8Z1XCArOebaaYXtHuj/JYlDQqQN4kctQALvRDOCBJi3KwOfzvev1esd2cfzDhM2PgVHpkMMiNSgU6+zf86p7Ocvcy/nauRGAyZHRXOY/neOCc9k7Mr7NCl/RGCz91FACmyvtjBoXoeRnTWyeo5gcjfF/dWFOCUQYVlREOgccbv3UyXM/KsDp0Vn0t2oKJ1qziAc1nTmAp2jG+P8WB7AVJCrlDAafwU+BVzs76PV6LwYuBvD5fBQVtfeiBwLW6t/JaJqGpml4PJ4On1l36Oh8Iqt5QXuPF7X3WC/GcNe5+gxujV7CyfqhTGY0uCCxWR/TYcn7Grf+3cb6HcLIcTqFFwXYOifGfkq4N2rnZOVAy8qALLDb7X2SryeseU3wnWMnbxRc9IpO/l753Z+URDrlSxVDXsaYQj4Mor3oR3vRj2yOogTUPDfR3+Wjn5QFk5zJRW3XyTdISLeMA6oMvF7v4RjK4AedpfH5fA9jmJEAVEcTV6LRaL/kKNdquNvzLJ851/J61V39ygvSt57B999/z09+8pN42oaGBvx+P2VlZe1k0HUdXdfx+/09nuwTJsKHrlW86l7O6+5PqLDVYVc2DgntzU+DJ3BMcA7D9IJ4+qqENn1Mh39+lsEfX/GwvtxOyV5Rsq8Ns+0gnQPDUe6pCcXjBtUkXDNdk5G+edHNS7/Ip2hyFO/T1cQydfpymcEwWao7hqSMXTiAg5dlJjiAY0ADae0+diTfIKQ/MpqTzrpkwJSB1+vdB3gEOM7n81UPhAwtSmBJ1lsodMLSP6XSQrrWMxg9ejRvvPFGPN0NN9xALNY/s0eTBPi36wtecy/nTfdnNGjNZOpuDg/tx3HBuRwRnE2uyur0fF2Hl75w88dXslm3w0HhqCgZ1wSoOCjGgnCUX1SH+hU3qC98+ddMXrs2l5EHhDnziRrc6RiaZNFrOnUAH2U6gBdYDuCBZECUgdfr3QtYCvzQ5/OtS2XeN+Q8ymrH5i7ThImw1VZJha0OUKiE8ndm4W87PW96ZCw3N/y0WxnStZ5Bm3sIh3n++ed5+umnu5UnmWqtntfdn/Kqeznvu74iJBEKYjkcH5jHwuBcfhDam4xuOuS6DstWurnz5WzWbHeQPzKK85cBqg+JcVwowhXVoX7HDeoLyx/M4u3/yWXc4UFO+3MtjgxLEQwkLQ5g+7+3Muy9QBsHcOBYB2HLATxoSNfQ0r8BC4Air9e7FbgRjCWmfD7fQ8ANQCHwgNfrBYj6fL4D0iFLR6xzbKVRmruJ1JEaUrmeQSKvv/46w4YNY++99+7y+o1aM7/K+RMfuco4t/loXnMv5xPnGnTRGRUt5odNx3JccC4HhKfGRwB1hVLw+ldu7ng5m9VbHeQMj2G7OkD9wVFODUe4vCo1cYN6i1Lw7u3ZfHxfNlNODHDSvbXxyKMWu5BOHMD6dKflAB7kpGs00TndHP8P4D/Sce2etNwrtFrTPPRv9CTz0HPVt6RUnlSuZ5DIkiVLOPvsszs9N4bOUu1tvsrezHdZIRD479zHmBYZw5X+M1kYmMuM6Ng2I4C6Qin41yoXd76UzdffO/EMiyFXBggcGuXsUITLqkOMSVHcoN6idHjjN7l88WQWs85r4pjfWZFHdymdzAAOH2Cj/rfGDOD8A0tpHOQ2+T2dwTCaaJdToudzW8PFXOU/q1OlkApSvZ5BCzt37uSjjz7innvu6fT8HbYqVstmYujxHtAH5Q8wNjasV3IoBW+tNpTAl1ucZJbE4IoAkcOi/DQU5pKqMMNTHDKiN8Qi8Mp/5rH6+Uzm/ryR+b9uH3m0rzRr5TxjX8TB2r0pX/RoyLMHzQDeU9gjlUELyUphhXNNyvJOx3oGLfh8Po488kgKCgo6ONugo+q5N4pAKXj3Gxd/eCmbLzY7cRfH4PIg2mERrgiFuag69XGDekskAC9cUsCGf7mZ/6sGDro8tZFHV3ruZqt8mPJFj4YqlgN492aPVgYttCiFVLF27Vruvfdexo8fz8knnwz0bj2D6667jlGjRsXNQInrGYChDG65pWtzlgBakkr4NOdmjClkKuGzBRX/93XZNJb8/VTWrBuBoygEPw9iX1DLObzJsZG3ydQCrMk28gBQotrkm5hfh9+lJZ3CaXMRyg+25tXuHAXS9nxQRBsy2XDhLfjfH8aoe/5Iw8Uv8Ho879Z0rfdKh/uNh9X+mehEqHGUgSjWZS0mojWSHRtDRqyYDL2UjFgxmXop7lgRdtxd/hZDma5mAAeOdRCeZweHpQB2ByxlkAbSuZ4BwPvvv99tPjmEGUETm3FjUzox0Vib+SSGmhDTV9DyZ+zZUHYIr/3t12xafQi2whBcEsR9xFaOtz/MXLUEFwFqbEJN/Hxaz1eSlDdJ16HtOco4ZtPsxOyxNvK09WOY26p1f7Qqn+9Oup3gV5MZ9cRvyF30GuF4/q3p28vQelwS9yszXcI91NtbB7kpdLa4XyYm4dZ1sxNw6nmmcighI1ZCRptPQ2lkxIpxqrwe+2gGDGsG8B6LpQx2UzQUk6ljIlFW46A+si/nVy7tMO3y9U7+8FI2H61zYc/X4aIQIw8PcVk4xFlVmbi4Ckj9YhrQ+4k0Dds1fOcWEv3ezpmP1jDhyCug6oqUytSslfP30nmtFb8Y/51V/gmCRsBWSUCroNlWQUCrIJDwWen8nGatnJjWPjS2ppxmz6JFWRSTGSslQy8mw/zMjJXg1ouxsQuHQkUVzhVdO4Bj4y2P/O6OpQx2c9zEOIWdTAq1n+T96QYHf3g5hw/WuLDl6fDTEGMPD/KLSJhT6iKDrnDUbrLxzNmFBOs0znqqmr3mpSfy6ErP3UkmNKN38JXnPuY13Eam3rXvRaGIiJ+AVk7AVklzksIIaJX47d9Rqa0gaOt4vqUrlm8oDb043svIbNfrKMGpcvp0jxJQuN41Wv+uf5kOYBeEfmA5gPdUBtv7bpEGdAlT4VwR3/58k4Pfv5zNe6vdaLk6XBhi2hFBroqEOLYhymCsAipW2/GdW4geNSOP7pO+yKMVzs/Qpa2iSX6GXSEITpWNM5ZNbmxil2l1IgS0qiRl0dLrqCRgK6fC+SnNtgp0ab/+gk25yaIUV1FhGyWRaZqoWvwbGXoxthqbsQZwiwM4aDmALVqxlMFuiic2moNj12Ovt3Pk9iMBWLnFwe0vZ/PuKjeSo8OPQ+x/RID/jIaY3xgbtNbsbSscPPujQhwZinOfraZoUmqHACdzStXr8e/pjlmj4SBLH06WPrzLdApFWBoSlEZlvOehZ9RTq76nwb6Jcm05IVtt/LzczSOY/MIRTP7HEYx4b3+0mJ2mEbVs+PG3VB6/k+aDw7i1kriJKkMvwaGyB79vwyLlWMpgN2VrjY3/edaGbs9lhyuDV1dl8PZXbsSj4IchDjkywNXREHObBndI503vunj+J/l4SnUWPVNN7ujBLW+6EASXysUVzSWPSW2OFTmLqKo2FZZSaKuDOF5vIHOZImOVMay5cVota69ezsaTP2HbnM8NpWKrbNcDArDpbsMsleQIb+1llJi+jSI0qwrZbbB+yd0UZZq8IzG49ul8cCs4L8RRRzTzn3qIWc27PmREb1n7spsXL8uncKIRedRTsmtkLtp4DM5QayTYlniPYdcMqsa/3vFJA0lU4fwo0uoA/l5HidN0ADtNB3A+uYxnP05kP9NNYfQ26ky/RnncMZ5osqqzf8sO24eEtbr211WCWy9McoCXdujngMEdHtpiD1UGxxRlMTsc5Sp/mNI0TJxKVwhrgDfeeIM//OEPKKVQSnH11Vdz/PHHdyuTFoM359cyOTT4lQDAV89ksOz/5TF8vwhnPVmNOy+NE9yUjqgQ6AFEhYi6JuEIrUVoNUcpnIQzdln4rG5JdAA73qynqFpvdQD/omcOYKO3kY8rmk8ek7tMGyVI0FbV6SiqgFZJnWMtAa0S1cFMfofKwl1S3GY0VUdDcd16IVoPYmRZpJ49UhmUOW1869BYkuVkUVM45UohXSGslVJceeWVLF26lKlTp7J69WpOPfVUFi5c2G24Cz0iTB6AAHK9RsX49E8Z/PuWfMYdWs8ZD67C5W5GAiFEDyIqaH6GEBWENvuSvuuh+D5azmmXPmQogm6JEc7YH/QQaOlcpLNzpEbv0AGsn+ChfoGeVgewHTee2Cg8sVHQhe9eoRPS6to5w8lqpCb8HQFbOXX2tWx3vUdEa2h/j0rDrRe16V0kztXISPh0qMy03Oueym6nDG7IcbHa0X3LImxOmlmc5WRxlpOSmGJUTO9ydPf0SIybG7qvONIZwlpEaGxsBIzFbUpKSnoU98hl78MwTBVtU8m2r3j7XkG3pLFvjFAabTb2xYK8+vhv+fdff8u+hz3LD687H/vOnsutsKM0N0rc5qcL4t/d6Fo2ytFyPAMlLpTmNtIknKM0Nxn1f8fV9CFCFIUGaBTsuBK9/HqCnqMJZh9PyLMApWV0K1d/sH2fMAHsk4QZwGe3zgAuGl5McJAEgRM03HoBbr2A/OjU+P4idxFVdW1ljBJoZ5pKHEUV0CqpdXxj9jba+4rselYHvYv2Jiq3XogMyjFyg4vdThn0FmUqhXIbNGsaM1Mcgz+VIaxFhIceeogLL7yQzMxMmpqaeOKJJ7qVIdPexLNn30XujjUdtIyN7yRW7C2VNn0ftaPEaVSwZuXaUtkiLuO7Iw8lbrSMPIJhRUxl8PId5/PxkvnMPm0FJ924Dr/91jYVtEqo2JW4QMtIqvhTV5xDmYdSumEeqCiIk/Lx7+AIr8Xd8AoZ/mVkNixFlwxCniMIZJ9AyHMkyubp/4WVwr7aUAAZyyI4ynbfGcB2MsiO7UV2bK8e9DZqE/wa5e38HDWOMgKut4ho7eNTibLh1otM5dDq34h/6iXYmUJUHNhVepX7YGa3UwY9abmPHNE6UcepjHZfi7moJMU+hFSGsI5Go9x333089thjzJkzh08//ZRLL72Ut99+m6ystquRiTl71qlF+OdJJ7Og8DOU392ugtY1D9gK21a0LRVwvAXtBs3cbpPO1WabNhVzz+y+RUVF1Oyo4tWr8yhbmsmcn/k5/LcjCEjvn1cq0R2lNOUuIqvuKZpyF6E7RxFyjiLkOZJ6dTvO5o/IaHgFd+MyMhpfRomLYNZ8gtnHE8w+GmXL6/nFokkhoL+3ZgAnYvQ2CnHrhXTXPolIc9uht1plG99Gs1ZBtaOMoFaJkqSG33Bw6NntZoQn+jRatl16/m7X29jtlEFPSbcSgNSHsC4rK6O8vJw5c+YAMGfOHDIzM/n222/Zd99925w/Jvs7bpz73+RlKaYfvpSd9sEZgjkShH9cnM/61zM49JoG5v3CP2gavf6iq8jUN+IvTgrFIQ7CWYcRzjqM+mG34gyswN3wMhmNr5Dhfx21w04o6wemYliIbi9sl3ebGcBvRLDVql47gC3a41CZOGJjyYmN7TKdToyQVhPvXdhym6ls3pikNL5iq6uCqNbc7nxRdjL0om4m+hmfqQhkuCvCqe+RymBGOMYB5miidCgBSE8I6+HDh7Njxw7Wr1/PxIkT+fbbb6msrGTMmDHt8lBaFiCE3PuhD1JFEPILj55nZ8PbTo6+tY79L2j/0g0kuqOU6Kw30buyx4uNcOZcwplzaSi9CUfwS7PH8Ap5O69B7byOcOZBhikpuhDn2/nWDOBBgIbNNBEVQxSK9CKq/B3/zhFp6sCv0TqKqsm2nSrHSoJaVSeBDHPjs8A7G4KbESvBpfI7ney3K8Kpi1Kprwy9Xu9fgBOBCp/PN7OD4wLcAxwPNAMX+Hy+z3uQtdq+fXu7nY2NjWRnZ/dP6BSydu1ajjjiCMaPH4/bbbQKehPCeurUqYwaNSp+T4khrJcuXcr999+PmM3n//qv/2LhwoXt8tm0cT3r378R+/CzOPzoU1N1aykjUCM8+8NCyr92cPwf65hxRmCgReqQPs9AVgp7qIzMNe+QsawZ27v7oL6ejeg29NJmgsc6aD4uPyUhoNM9SzoVDHYZUyGfTpSgVt1qltIqabaVt3GIG72OzgIZOlqH3iYoCpvu5sucO9Algk25OaP8o173DkaMGAHdLPSbrp7B48B9wJOdHD8OmGT+zQUeND93C9IZwvr000/n9NNP7z4jsaGX/ghdG3ydv8YdGkvOLaRui50fLYlSOm9wKoI+0cYBvBeOsnMBiE6OEvvpKrS5T+MY8zKZAnb3LIL1xxPIOZ6Yc/wAC27RXzTsZOqlZOqlXaZrDWTY+Sgqv20rlY7P2wUyVOhp6x2kaw3kd71e79gukpwCPOnz+RTwsdfrzfN6vcN90T5V4gAAIABJREFUPt+OdMhjMXio3WxjyTmFBKo1zlpczYyTchjEDcae0ZkDeI7dcAAf6yA2zgYsABZgC2+Km5JyKn9HTuXviLimEcg+gWDO8USdk3eL0UIWHdM2kOGELtP6tW0sLT00HqRQlzDrs5Ywy39Vyn0HA9VsHAl8n7C91dxnKYPdmMo1dpacU0gsLJztq2b4vumLPJpuOnUAH+rAf6Wb4FGdO4BjznH4iy7DX3QZtsg23I2v4G54heyqO8mpuoOIc4LpfD6BiHumpRj2YL723EfyIrbp6h0MlDLoqHR36Lzwer0XAxeDsdxjUVH7GCeBwG5kZkgRmqahaRoej6fDZ7ar+e4T4W9n2nFkwCWvRRk2PRcAu90+KORrR0yh/V8dtt9vpOTafPQr8qBOR3ulCe1FP/JGMxJQqDwN/XgPkZOzUEdnoXk0MoGez40tguGzgF8RCe9Aq3oBW9XzeKrvJ7v6XpR7LHrhaehFp6Gy54C0VzCD9hkmMNhlHKzy1dhXdhhOvSbzS4qcqZV3oJTBVmB0wvYooL1nGPD5fA8DD5ubqiMnTzSa3pDGQxFd19F1Hb/fP+COu83vOVn6kwKyimMseqYae0ksbhoajI5F28YY+Zc0oW2MIQGwXV+F9t9VSMAYLBIbLjSf7SR4rIPwQS0O4BAEQ9DeL9gLHOA8E0aciVZSg9v/Gu6Gl3Ftuw/btj8Ssw8jkH08wezjCWceGJ/LMRifYTKDXcbBKt8JvBL/nixjFT2X13Qgd8lAKYMXgcu9Xu8zGI7jestfsHuy7lU3L/48n/zxURY9XY2ndPDHRyo6tRGtVtEyJ0kiQARUBlQtzSayd/pnAOv2AprzzqE57xwkVo/b/wbuhlfIqvsrntq/ELMVEcxeSDD7BCg4Ka2yWOwZpEUZeL3ev2F4y4q8Xu9W4EbAAeDz+R4CXsEYVroeY2jpwE43tUgLX/syePWXeQybFeGsxdVk5Kcx8mgKiU624fqobW9TgPB+diL77Pr2k7LlEsg9k0DumUjMj6vpTTIaXiGjfilZdU+hdhSQl3UUgezjCWUdNmCB9CyGNukaTXRON8cVcFlXaSyGNiseyeLNG3MZc2iI0x+twZk1NBQBQPM5ThxfRdGaWvfpWdB89i5cpL4TlM1DMOcUgjmngB7A3fQOueE3cVf9k8x6H7qWTdBzlBlI7/D/396dx0dV3X0c/8ySzExWEsIWdmSTXcQFhSiLFdGqj4Xj0la7qC2te4WCtdZia1Ottj5PFVQqanHhiKK2oLZoURAXtAoICiogJIJAAiSBZNb7/HEnw0zIMiRzM5Pwe79eviAzNzO/RJLfveee8z2WB+mJ9uOY17uHF4y1eW9vTqffjd3o/rNC+t3Yjbc3J+4Hvby8nO9///uMHz+eyZMnc/XVV1NWFjtf+O6776Z3797cf//9MY+HQiGuueaayOdedtllbN++PfL8ihUrOPfcc5k0aRLf+c532LFjR8LqTgTDgNX3ZfP6b3IZOLWaaU+UtalGAFBzTjo46vwzd9jMx1OJ3UNN9hSCgxaye8A6yno+SXX2+bgOrSS/9Bq6bBlOXsm1uA++hC14dICbENGaE35yf9OHpLa3N6dz1bx8vH7zB97rt3HVvPyENYTa/QxWrVrFihUr6N27N3fffWQaWPR+BqtXr+bBBx+M+fzp06fz5ptvRn7xz5o1C4ADBw5w00038dBDD/H6669zxRVXMGfOnHprcOzaheP3vyf7nnvwvPBCQr6uphgheP03Obx9fzbD1GEumrcfZxscsTBybOz+tANfl+bh8w7g69I8dn/aASMnhc+D7C4zRK/wPr4Z8DH7ej1Lde500g+/T/7XP6Pr5yPI3/kDPAc0tmA9u5aJ494xDxNprW+2opBEueO5HDaVpDV6zLtfpGMYsT/Y1T47l/5vR07v33B+/pAefuZOP3pDjrqs2s9g+/btdOrUiRNOMBeqTJw4kRtuuIHy8nLy8/NjiwgEzP1w9+8nN9xMquNZudxMoQC8cmsHPnkugzFXVzHxNxX1zYIUrcHmxJc5Hl/meA52/V1MkJ676t9xBemJ40+TP65Kqb9F/d2mlFrQ2PFtQd1G0NTjLVHffgbz5s0jPd28Cqndz+Caa66p9/Oj9zPo168fe/bs4eOPPwZg6dKlAE1GX9irq8kuLk7I11OfQA28+JM8Pnkug3G3VjDxTmkEKSMcpFfRdS7f9F/L3j7/5FD+NTh92+iwexZdPh9Fx6+mkVG+ELt/d7KrFUkUz5VBJDRFa20opRpfP51k8Zy597uxW2SIKJorzWDJzWX1fEbzJXI/g5ycHObNm8edd96J1+tlwoQJ5Obm4nQ2/b/RUVpK/ve/j3f8eLxFRQQGDUrI9EjfIRsv/Cifr1a7mDT3IGN+fKjpTxLJYbPh95yE33MSFZ1/hdO78UjC6je3wze34/OMiaxlCKb3bPo1RbsRTzPYp5S6GlgDjAUS+9syCf7+szKumpdPte/I6asnPcQTM8oT+j6J3s8AoKioiKKiIgD27t3L/Pnz642wrsvIzMTx1VfkvvEGAMHOnfGOG4e3qAjv+PGEunY9pvoAqvebyaO716dx/l/2M2y6rARvM2w2Au5hVLqHUdl5Fk7vlshQUu6eueTumYvPPYKa7KlUZ08l6Erpc0CRAPE0g6sw4yB+DmwGrrS0olZw5iAfT8wo5/sPdcTrt+FKM3hiRjlnDmrGPsENsGI/A4A9e/bQuXNnQqEQxcXFfO973yMjo/Hwg5DHw8HiYqovuQRHaSnpq1fjeustXG++SUb45rJ/4EDzqmH8eHxjx2JkNb6FY9U3ZvLo/q1OLn5kPwOntGjprUiygGsgVZ0GUtXpZjNIr/IV3BXLydlbTM7e4nCQnnnFEHAl5qpSpJYm9zNQSjmA6UAnzKjpk7TWa1uhtvoc9/sZ3Hrrraxduxa/309RURF33nln5D2i7Vizhi2zZ5Pj8XDWT35S/83jUAjnp5/iWrXKbA7vvYetpgbD6cQ3enTkqsE/ahREDUUd2OFg8WUdObTXziWPldNnfPObaKrGANRK9frA2hqjg/TSq9diwyCQ3s9MWD2GIL1U/z6men3Qshrj2c8gnmbwDPAmcKXW+gyl1Aqt9eRmVdRybaIZpIJt27axZcsWnE4nkyZNiu+TampI/+ADszmsWkXa+vXYDINQdjbeM87AO348JT3O5ZlfnkTAa2P638soHN2y5NFU/yFM9fqg9Wq0+7/BXfUqnoplpB9+FxtBAmk9I0NJfs/oeoP0WrPG5kr1+sD6ZhDPMFEnrfV8pZRqVhWi7XC78Y0bh2/cOCrnzMFWXo5rzRrzqmHVKva8VsZT/BSnfS8/OedP5O3oh7fXOEIpmPYoEi+U1oXDeVdxOO8q7IHaIL3lZJY/Rlb5ww0G6Ym2IZ5msEcpdSngUUr9D7LnwHHDyM+n5oILqLngAr56O50XfpBHpruSq0f8ksL3nsf+mrl4yT9kSGRIyXfaaRgeiUBo7xoO0ns6KkjvXGqyz8ebeUbTLyiSLp5m8CPgauC/mFHT9U+IF+3W5/9y8dJP8+nQO8Clz9Rg7zqX3cHfkLZhQ+R+Q+Zjj5E1fz5Gejq+MWPM5lBUhH/YMHDIGWJ7FhOkFzqEq6o2SG8pmQeeImTvgLH/QlxpEyVIL4XF0wwGa63/qpTqDPwA6AN8ZmVRInV8ssTD8ls60HWEn+lPluHJD99jcjjwjxqFf9Qoqq6/Hlt1NenvvRdpDjnFxVBcTKhDB7xnnhm5cgjGMQ1WtF2GPZOanAupybkwEqTnrliGZ99LdAw+ScieFRWkN1GC9FJIPM3gPmASMBfzRvJCzPUGop378LFMVvw6l95nevmfx8pxZTU82cDwePCefTbes88GwL53L6633zbvN7z1Fp5lywAI9O59ZH3DmWeC3G9ov8JBejXZU3Dm51C540XcFctxV71KRsWLhGxuvFkTqck+n5qsSRgOmQSSTPE0gwyllAtwaa2fUUr91OqiRHIZBqz5Sxar/5TDgHOrufCh/TiPnr3aqFCnTlRffDHVF18MhoHjyy8js5Q8L79M5lNPYdhsGKNHkz12rHm/4ZRT4BjXZIg2wp6ON2si3qyJHDSKST/8Tnj186t4Kpdj2NLxZhaFp6x+C8PRIdkVH3fiaQaLgJeA3yil3MA2a0uy3l9HdeHQ3qPHsTM7Bbnu429a/Prl5eXceOONbN++HZfLRZ8+ffjjH/9Ix45HAsHuvvtuHn74YW688UZuueWWmM+fM2cOa9euxWazkZaWxpw5cxg/fjxgrjq+4YYb2LlzJ263m3vuuYfRo0e3uOZahgFv/DaHDx7NYti0w5x33wHsLd31wmYj2L8/h/v35/APfwiBAGkffYRr9Wqy3nmHrPnzyf7rXwm53fhOPz2y+C1w4olwjCu3RRtwVJDeh7gr/om78hXyqlaEg/TOjArSk6vH1tDkOoMUk5B1Bn/s3vB+oL8srXcr5mOyf/9+Pv3000hy6V133cWBAwe47777ADPCeu3atdx///1cf/31TJo0iZ///MhePxUVFeTk5ACwceNGLr30UjZs2IDNZuOWW26hV69e3HTTTbz//vvcfPPNrF69GludhT/NWWcQCsCrszqwYXEGJ/+oikm/tT5wrqCggLLt20l/550j6xu2bAEgWFBwZEhp3DhC3btbW0wD9bXn+eetJa4aDYO0mo9xVy7HU7Ecp387BnZ8GaeFrximEErrlrz6kiwV1hm0KSvuyGHPpsYjrBvz9LSG43w7D/Ezea61EdZApBGA2eiif9H/4x//4L333gPg1FNPxeVysW7dOkaNGnVsX2gdAS/847o8tiz3cMbNlYz7RWWrJQ4YWVl4zzkHbzid1b5rV6QxuFavJuPFFwHwn3AC3qIifOPH4x07FiPq+yTagaggvcpOt+H0bsJTsaxOkN7J4bUM50uQXoI12AyUUnla6/3NfWGl1BTgAcABLNBaF9d5vhfwBNAhfMxsrfXy5r5fqqovwjpabYR1Xffeey9Lly7l4MGDPProo9hsNsrLyzEMI2bvgu7du/P111+3qBn4DttY+uM8tr/lZuKdBznlmuQmj4a6daNaKaqVAsPA+dlnkeaQ8eyzZC1ciBGezVQ7hdV30kmQ1vyTAJFibDYC7qFUuoeGg/Q+x125DE/FcnL33EXunrvwuYdTk32+BOklSGNXBn9QSuUBnwP/AtZorQONHB8RzjN6EDgHKAHWKqVe1lpvijrsdkBrrecppYYAyzGnrbZIPGfujQ0TXbEkNSKsZ86cycyZM1m9ejW///3vI3sXJFrNARvPXdmRXR+lcd79+xlxaYolj9psBE48kcCJJ3Lo2mvB5yP9ww8jq6KzHniA7D//mVBmJr6xYyNTWAMDBkiYWjsScA2gynUTVQU34fBtNzfqiQnSGxyJxQi4Bsv/+2ZosBlorX8KoJQagPlL/drw/sfvAS9orUsaed1TgS+01lvDr/EscBEQ3QwMoPY6Pxdo+WB9imlJhHWtcePGUVlZyWeffcaIESMAYnY2Ky0trR0PPGZVe+zoKzpS/qWTix7ez6CpbSB5ND0d39ix+MaOpfKXv8R24IAZmRFe3+BesQKAYNeusRHdnTsnuXCRKMH0PlR1/BlVHX+G3V8aSVjN2vdnsvfdTyC9b1SQ3nBpDHFq8p6B1vpzzKuDh8Jn/KcB3TDP+BvSHdgZ9XFJ+POi3Qn8Syl1PZAJ1Bt+p5S6FjNCG601BfXMS6+uPraz2cxOwQZnEyVKcyOsDcPgyy+/pH///gCsW7eOsrIyevXqBcAFF1zAk08+GbmBXFNTE2kS0ex2O3a7naysrHq/Z+XbYfG0NA7ugh8tDTBwchbQeGy1FZxOZ731xa2gAPr3hyuvJAT4tm/H/sYb2F5/Hc9//kPGkiUAhIYOxZg0idDEiRhFRZCZ2Tr1tYLju8YC6DYSmI3ftxv7vpewly0lq2we2WV/xXD1JlRwMaGCSzCyT20wSO/4/h6aLJlNpJSaDpyrtb46/PH3gVO11tdHHXMLYNNa36eUGgv8DRimtQ418tJtIrW0JRHWoVCIadOmceDAARwOB263m5kzZ0Y2tNmzZw/XX389JSUleDwe/vCHP3DKKacc9TqNzSba97mTxZd1xF9tY/qTZXQf07Lk0ZawdBZHKETaxo2RIaX099/H5vVipKWZkRnhKwf/iBExEd2tVl+CSI1HswfKcVX9C0/FMlyHVmHDHw7SOy8cpHdaTJBee/8eJnM2UQkQfau/B0cPA/0YmAKgtX4nvIahANhjUU2tZtCgQU3uS9wQu93OC+ENZ+rTuXNnFi9e3NzS2LUujee+m4/dCVcs2UfnIXHdBmqb7Hb8w4fjHz6cqp//HKqrSV+79khkxr33wr33EsrJMSMzwusbgn374lm6lOziYhxff03nwkIqZ8+uf08IkZJCznyqO1xGdYfLwkF6K8JBes+QtX8hQUfH8OpoCdKr1WQzUEpN0Vq/qpTqD9wMLNZav9XEp60FBiil+gKlwGXAFXWO2YEZc/G4UupEwA3sPdYvQMRvxzvpPP+DfNwdQlz2bBl5fRM3LNYmeDz4iorwFRVR+atfYS8rM3d9CzcHzyuvABDMy8NeUYEtaH5/nKWl5M6aBSANoQ0yg/S+Q3Xud8JBem/gqVgWG6RX/m1c6bVBese43L6diOfK4FbgVeA24FHgf4GjxyWiaK0DSqnrgNcwp40+prXeqJSaC3ygtX4Z+AXwqFLqZsybyT/QWrepFXBtyRf/NpNHc3sGuPSZMrK7NTYad3wIdexIzUUXUXPRRWZkxrZtuFatImfu3EgjqGWvrib39tsJdu2Kf/hwjBQalhTxM4P0vk1NzrfDQXpvmVNWy16mY/Dv4SC9SeYVQ9YEDHvjW8q2J/HsdPYe5raXv9ZaX6OUWqm1Prs1iqtHm7hnkAqi7xl0q7iAZTd1oPNQP9MXlZORnzqNIBXHarv16IGtiZ8L/wkn4B85Ev+IEeafQ4dixHlTOtFS8XtYV6rXWJCfQ+WOl3BXLMNd9SqO4P6oIL2p1GRNTnqQXircM/gD8Dvgd+Fx/XebVY1oVQd3Ovj37xw4vLnkl+ZhTzO4bHEZrmy5+GpKsLAQZz33fALdunHw3ntJW7eOtPXrca1ZQ0b4/o5htxMYMAD/iBH4apvEkCEgG/20DfZ0vFkT8GZNCAfpvRtey/BKnSC9qeEgvbxkV5xw8TSDvpirg2tPyWdbWI+wSMhvk0YQp8rZs8mdNQt71JTlkMdD5W234Z0wAe+ECZHH7d98Q9r69aSvX0/aunW4Vq4k47nnADAcDgKDBh1pDiNH4h88WJJZU53NiS9zHL7McRzs8jvSqz/AXbkMd8XyqCC9M6KC9Dolu+KEiKcZbAXuUUrlAv8Almity60tS4jkqb1JXDubKNjIbKJQly4xuUoYBvZduyLNIW39etyvvkrmM8+YT6el4R88ONIcfCNHEhg0SKI0UpXNji/jVHwZp1LR+U7SataFg/SW0WH3bIzdt+HLOC28+vk8y4L0WkPc6wyUUvnAfMzpoK8BD2itV1tYW33knkGcPn5rB4t+tQWHtwP5pRcCiUlkTbSUH0tORH2GgaOkJNIc0tetI23DBuwHD5pPu1z4hwyJGWIKDBjQ4NoHS2q0WKrXeMz1GYYZpBeOxUjzmUm7Ps/o8OrnqQTTeyW3xigJuWeglDoPc2poHuaVwbXhF30JKGpWZakgaJD5SA1ZD3qpus7FoWvc4EjMsnUr9zOYP38+Tz31FNu2bWPhwoWcU3tGKlKXzUawZ0+CPXtSc8EF5mOGgeOrr0hbty5yFeF5/nkyn3gCgJDbTWDYsJghpkC/frKfdKqIDtLrNBOn94twkN6yOkF6U8NBev2TXXGT4jn1GA7cprWOuaOmlLrGmpKs59gaJO+nh3BuDWKvhuw/1eB5wc/++ZkE+7X8h81mszFjxoyY/QzuvvvumP0M1q1bx5o1a7j++utxuVwxaaZz5sxpcD+D008/nXPPPZeZM2e2uE6RRDYbwT59CPbpY05tBQiFcGzdGjPElPH009jDK9dDmZnmIrraIaYRIwj26ZO8r0FEBFz9qXLdSFXBjTh8X0WGknL2/pGcvX/E7xoUbgznp2yQXjzNYDHwa6VUFnAV8EOt9QKt9WZrS2uenDsOk7ap8cVU6WsDEDhyzWSvhrSNQTpPqMB3SsPfEv8QBxVzm553bOV+Bs2Nqk5k7pKwiN1OsH9/qvv3P3J/IhjE+cUXMUNMmU8+ia3GDBUM5eTA6NFkh4eZ/CNHEuzZMyV/2Rwvgum9OdRxBoc6zqgTpPcXsvf9OSpIbyp+94iU+X8VTzP4G3Ad8JDWOqiUuhxYYG1Z1jIywF4n6doGhCxYX5LI/QyORW7PIOfcHsTpPMikSal3r0DEKTwjKTBokLm/A4Dfj3PLliPDS5s2kfXoo9j8ZsZUqEOHI8NLtQ2isDBlfukcT0Jp3TmUfzWH8q/GHtgT2fO5NkgvkNYjMpTk95zcYJBea4inGTi01p+p2n+IkNKb0sZz5u553kvunMPYo/ZwCWXCwd9lUP2dxE77S+R+Bunp6QmtTbRRaWkEhg4lMHQoXH45aQUF7CstJW3z5pgrCNe8edgCZvZUsGPHyCK52kYR6to1yV/I8SXk7MzhvCs5nHcltkA57nCQXmb5QrLKHyHo7EpN9hSqI0F6Tgq2fot078bIa9SG1ftcQ9nX718JrS+eZvCGUmo+UKiUegD4d0IrSIKac9LJvb0aMwUjzGGj5pzE/rK1aj8DIY7ickWuBCKqq0n79NMjM5jWr8e1ciW2kLkCPdilS+wiuZEjCaV4jHN7YcQE6VWYQXqVy8k48CyZ+x+PBOmFnF0xvFuwcSRZ2CAdn2dMwmuKZz+Du5RSw4DXgc2YAXNtmpFjY/enHSx9D6v2MxAibh4P/tGj8Y8ezeHwQ7bDh0nbuNG8ggg3iOwVKyLxG4HCwpiYDd/w4RhR26yKxDMcOVTnXkJ17iVHgvQql+OpeBF76BBHTf632anqdFPC62hsD+TngO9qrX1a60+AT5RSgzC3wDw14ZW0I5s3b+b//u//6NevHxdeaM7xj3c/A8MwmDVrVsx+BvPmzaNDB7N5zZs3jwULFlBeXs7NN9+My+Vi5cqVss5CxMXIyMB3yin4ovbAsFVVkfbJJzFDTLUJrgCBXr1iZjD5hw/HyM1NRvntXmyQXg2uQ2+Rs2cuTt82bJhXBYdyLyXkTPzOfY1dGTwLLFdKTdNaH1BKnQvcBVyZ8CraGSv3M5gxYwYzZsxobmlCHMXIysJ3+un4Tj898pjt4EHSNmyImebq+ec/I88H+vaNjdkYNgwjq/V3ymvX7G682d+izD2SLl+OBcNr2VUBNL4H8vNKqV3AMqXUa8AZwLe01gcsqUQIkTKM3Fx848bhGzcu8pitvJz0DRuOXEG8/z4ZL75oHm+zEejfP+YKIjBsGIYE9bVYKK0Lh3IvJfPAIsuuCqDxYaK7MO+wlgK3APOAW5RSaK3vsKQaIUTKMvLz8Z51Ft6zzoo8Zt+7l7T164/MYFq1ioznnzePt9sJDBqEf8QI7GecQVq/fmaSq/v43DymJaoKbiIjtNWyqwJofJhoRfjP14EHLatACNFmhTp1wjtpEt6ofbbtu3fHzmBasQLH4sV0Agyns/4kV5k23ahQWhcCI18nZGG+U2PDRG9a9q5CiHYr1LUr3q5d8YYXWmIYFFRXU7Vy5ZH7D8uXk/n00+bT6en4Tzwxdohp4EBJcm1l8cUiCiFEc9ls0KsXNVOnUjN1qvmYYeDYsSM2qO/FF8n8+9/Np91u/EOGxAb19e8vQX0WsqwZKKWmAA9g7oG8QGtdXM8xCrgT897EOq31FVbVI4RIITYbwd69CfbuTU14+jWhEI7t22OD+hYvxr5wofm0x3N0UF+/ftDMBZ0iliXNQCnlwLzPcA5QAqxVSr2std4UdcwAYA5wptZ6v1LKmlvkjbB/s5e8n93G/nl/INQ5cSsvrYqwDoVC/OQnP+Gzzz7D5XJRUFBAcXExfSS5UrQHdjvBfv2o7teP6osvNh8LBnFu3RqzBiJj0SLsC8x4tFBWltkgatdAjBxJsHdvyWFqBquuDE4FvtBabwVQSj0LXARsijrmGuBBrfV+AK31HotqaVDWXxaQ/t5HZP35USr+MCdhr2tVhDXA9OnTmTx5Mna7nYULFzJr1iy01gmrXYiU4nAQGDCAwIABVE+bZj4WCOD8/POYm9SZjz1Gls8HQCg314zZiA7q69FDGkQTrGoG3YGdUR+XAKfVOWYggFLqbcyhpDu11q/WfSGl1LWYG+qgtaagnuyU6qi9anPu+BNpm7Y0Wlz6u/+NLL8HyHpyCVlPLsGw2fCdPrrBz/MPGUjF3FsbfW2wLsLabrdH0k8BTj75ZBYsqD9A1m63Y7fbycrKqvd7liqcTqfU10LHZY1du0J4wycAv8+HbdMmbB9+iO3DD0n7739Jf/jhSFCf0bEjxujRGCefTCj8J927RxrEcfk9rPv6Fr1ufS24bsSGExgAnA30AFYppYbVXdSmtX4EeKT2Nerb9i0Q/h8eL9/o4Ti/2om9/AC2kIFhtxHK70Cgd89jep14WBlh3dhOZ6FQiFAoRFVVVfvabrCVpXp9IDVG9Ohh/le7WVBNDWmffRYzxOR84w0cQXNvj2CnTpErB9u4cZT37Uuoc6uPVsctAdteNsqqZlACRP9m7QHUDdUvAd7VWvuBbUqpzZjNYW1L3jieM3eA3Nl3k7HoBQxXOvj8VE+dlNCholpWRVjPmzePzz//nOeeey7RJQvRPrjd+EeNwh+1IZStuhrnxo0xN6ldb7yB7f776QoEu3aNXQMxYgShqHt97ZlVzWAtMEAp1RdzBfNlQN2ZQi8ClwOPK6UKMIe9vGeMAAAOjElEQVSNtlpUz1Hs+8o5dOU0Dn/3EjKeegHHnsSftVgVYb1w4UKWLl3K4sWL8chyfyHiZng8+MeMwT/mSAS07dAhCnbu5PBbbx0J6nvttcjzgR49YoP6RozA6GBt6nEyWNIMtNYBpdR1wGuY9wMe01pvVErNBT7QWr8cfu5bSqlNQBCYqbUus6Ke+uxf8KfI3yvunp3w17cqwnrRokUsWrQIrTV5eXkJr1uI442RmYkxbhyHBg+OPGarqDCTXKNuUnuWL488H+jTJ3YviOHDMdp4crDNMI5Ky05lxtdfH72FY2VlZUpFOG/evJmJEyfSr18/3OEclngjrEOhENOmTYuJsJ45cyZFRUVUVVUxePBgevToEfl6XS4X/4xKk6y1bds2tmzZgtPpZFJUVECqSfXx7lSvD6TGRIinPtv+/UcluTpLSiLP+084IWYvCP/QoRiZma1aY0PC9wwanU4lK5AtYFWEdVZWFiVR//iEEK3HyMvDV1SEr6go8pi9rMwM6qu9/7BmDRnhn1/DbicwYEBkBzrfiBH4hw6FFB3alWYghBDNFOrYEe+ECXgnTIg8Zv/mG3N4KdwkXP/5DxnhiR6Gw0Fg4MDYm9QnngjHMJRsFWkGQgiRQKEuXfCecw7e2mnfhoF9166Y4SX3a6+R+eyz5tNpafgHDz5yk3rkSAKDBkWC+jwvvEB2cTGOr7+mc2EhlbNnU33JJQmvW5pBO9XG7gUJ0X7ZbIQKC6kpLKRmyhTzMcPAUVISu9XoP/5B5lNPmU+7XPiHDCGUlYXr3Xex+f0AOEtLyZ01CyDhDaFdNAPDMDAMo96FWccjwzAIhhfWCCFSkM1GsGdPgj17UnPBBeZjhoFj+/bYvSBWr45JSwCwV1eTXVwszaA+brebiooKcmWTbgAOHDjAzp1mGog0SCHaCJuNYN++BPv2pSa8irpbjx71HuqoZ1ZlS7WLZpCens7evXvZu3cvjlbMO7fb7YRCoVZ7v3gYhkF5eTkHDx7E6XTSrVu3ZJckhGimYGEhznpmJgbjiJc4Vu2iGQB0796dTz75hK1bt7baEElGRgaHDx9ulfc6Vna7nYEDBzJw4MBklyKEaKbK2bPJnTULe1QYZ8jjoXJ24hfKtptmADBs2DCGDRvWau/XHhbSCCFSV+19gdrZREGZTSSEEMen6ksuofqSSyw/uZP94oQQQkgzEEIIIc1ACCEE0gyEEEIgzUAIIQTSDIQQQiDNQAghBNIMhBBCYOGiM6XUFOABzD2QF2itixs4bhrwHHCK1voDq+oRQgjRMEuuDJRSDuBB4DxgCHC5UmpIPcdlAzcA71lRhxBCiPhYNUx0KvCF1nqr1toHPAtcVM9xdwH3ADUW1SGEECIOVg0TdQd2Rn1cApwWfYBS6iSgp9b6n0qpWxt6IaXUtcC1AFprCgoKLCi3eZxOZ0rVU1eq1wepX2Oq1wdSYyKken1gfY1WNYP6dlSJbNejlLIDfwZ+0NQLaa0fAR6pfY1USuFM9VTQVK8PUr/GVK8PpMZESPX6oGU1Fsax/4FVw0QlQM+oj3sA0VvzZAPDgJVKqe3A6cDLSqkxFtUjhBCiEVZdGawFBiil+gKlwGXAFbVPaq0PApHrHaXUSuBWmU0khBDJYcmVgdY6AFwHvAZ8aj6kNyql5iqlLrTiPYUQQjSfZesMtNbLgeV1HrujgWPPtqoOIYQQTZMVyEIIIaQZCCGEkGYghBACaQZCCCGQZiCEEAJpBkIIIZBmIIQQAmkGQgghkGYghBACaQZCCCGQZiCEEAJpBkIIIZBmIIQQAmkGQgghkGYghBACaQZCCCGQZiCEEAJpBkIIIbBw20ul1BTgAcABLNBaF9d5/hbgaiAA7AV+pLX+yqp6hBBCNMySKwOllAN4EDgPGAJcrpQaUuewj4AxWusRwBLgHitqEUII0TSrrgxOBb7QWm8FUEo9C1wEbKo9QGv9n6jj3wW+Z1EtQgghmmBVM+gO7Iz6uAQ4rZHjfwy8Ut8TSqlrgWsBtNYUFBQkqsYWczqdKVVPXaleH6R+jaleH0iNiZDq9YH1NVrVDGz1PGbUd6BS6nvAGOCs+p7XWj8CPFL7Gvv27UtIgYlQUFBAKtVTV6rXB6lfY6rXB1JjIqR6fdCyGgsLC5s8xqpmUAL0jPq4B/B13YOUUpOBXwFnaa29FtUihBCiCVY1g7XAAKVUX6AUuAy4IvoApdRJwMPAFK31HovqEEIIEQdLZhNprQPAdcBrwKfmQ3qjUmquUurC8GH3AlnAc0qpj5VSL1tRixBCiKZZts5Aa70cWF7nsTui/j7ZqvcWQghxbGQFshBCCGkGQgghpBkIIYRAmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYRAmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYRAmoEQQgikGQghhECagRBCCCzc9lIpNQV4AHAAC7TWxXWedwFPAicDZcClWuvtVtUjhBCiYZZcGSilHMCDwHnAEOBypdSQOof9GNivte4P/Bn4oxW1CCGEaJpVw0SnAl9orbdqrX3As8BFdY65CHgi/PclwCSllM2ieoQQQjTCqmGi7sDOqI9LgNMaOkZrHVBKHQQ6AvuiD1JKXQtcGz6OwsJCi0punlSrp65Urw9Sv8ZUrw+kxkRI9frA2hqtujKo7wzfaMYxaK0f0VqP0VqPCX9OyvynlPow2TW05fraQo2pXp/UeHzUl6AaG2VVMygBekZ93AP4uqFjlFJOIBcot6geIYQQjbBqmGgtMEAp1RcoBS4DrqhzzMvAVcA7wDTgDa31UVcGQgghrGfJlYHWOgBcB7wGfGo+pDcqpeYqpS4MH/Y3oKNS6gvgFmC2FbVY7JFkF9CEVK8PUr/GVK8PpMZESPX6wOIabYYhJ+NCCHG8kxXIQgghpBkIIYSwMI6ivVJK9cSM0egKhIBHtNYPJLeq+oVXgn8AlGqtL0h2PdGUUh2ABcAwzCnFP9Jav5PcqmIppW4GrsasbwPwQ611TZJregy4ANijtR4WfiwfWAz0AbYDSmu9P4Xquxf4NuADvsT8Ph5IRn0N1Rj13K3AvUAnrfW++j6/NTRUo1Lqesz7sQFgmdZ6VqLeU64Mjl0A+IXW+kTgdODn9URtpIobMW/gp6IHgFe11oOBkaRYnUqp7sANwJjwD6MDc1Zcsj0OTKnz2Gzgda31AOB1kjsZ43GOru/fwDCt9QhgCzCntYuq43GOrrH2RO8cYEdrF1SPx6lTo1JqAmZywwit9VDgT4l8Q2kGx0hrvUtr/d/w3ysxf4l1T25VR1NK9QDOxzz7TilKqRygCHNGGVprXzLPFBvhBDzhdTAZHL1WptVprd/i6PU40dEuTwAXt2pRUeqrT2v9r/AMQ4B3MdcdJU0D30MwM9JmUc/i19bWQI0zgGKttTd8zJ5Evqc0gxZQSvUBTgLeS3Ip9fkL5j/sULILqUc/YC+wUCn1kVJqgVIqM9lFRdNal2Keee0AdgEHtdb/Sm5VDeqitd4F5skK0DnJ9TTmR8AryS6irvCU91Kt9bpk19KIgcB4pdR7Sqk3lVKnJPLFpRk0k1IqC3geuElrXZHseqIppWrHGj9Mdi0NcAKjgXla65OAQ6TYOhOlVB7mGXdfoBDIVEp9L7lVtW1KqV9hDrM+lexaoimlMoBfAXcku5YmOIE8zOHpmYBOZLinNINmUEqlYTaCp7TWLyS7nnqcCVyolNqOmRg7USm1KLklxSgBSrTWtVdUSzCbQyqZDGzTWu/VWvuBF4AzklxTQ75RSnUDCP+Z0OGDRFBKXYV5Q/S7KZg0cAJm018X/pnpAfxXKdU1qVUdrQR4QWttaK3fx7zqL0jUi8tsomMU7sR/Az7VWt+f7Hrqo7WeQ/gmnVLqbOBWrXXKnNVqrXcrpXYqpQZprTcDk4BNya6rjh3A6eGzxmrMGj9IbkkNqo12KQ7/+VJyy4kV3ujql8BZWuvDya6nLq31BqKG1sINYUwyZxM14EVgIrBSKTUQSKdOynNLyArkY6SUGgeswpxqWDsef5vWennyqmpYVDNItamlozBvbqcDWzGnGyZlOmRDlFK/BS7FHNr4CLi69uZdEmt6Bjgb84zwG+A3mL8kNNALs4lN11onJfSxgfrmAC7MHQ0B3tVa/zQZ9UH9NWqt/xb1/HaS3Awa+D7+HXgMGIU5TfdWrfUbiXpPaQZCCCHknoEQQghpBkIIIZBmIIQQAmkGQgghkGYghBACWWcgjlNKqbMwp+vZgSDwa631GqXUQeC/QBpmdEIhMFlrfXv48+4EVmqtV0a9VgZm/MfA8Oc9orV+gmYKJ7pOTNEFjaKdkisDcdxRShUAvwUu1lqfjRnsVh1+eoPWegLwC8xsp3j8Bngz/FrjgG0tLLEDcEkLX0OIYyJXBuJ4NBVYVJspFU6f/ajOMR8Tf7rmGVrrX4ZfywDeAlBK/S/mAqEK4LuYoYaTtda3K6V+EP7clZgLicoxIxEuAq4FzlFKrcRcQLb32L9EIY6NNANxPCrEXEGOUuoK4GeYq2JvjTqmCNjc3DcIJ0pmaq2LwgF3P6XhdNs8zCyky4HvYG583iuVIkRE+yfDROJ4tAuzIaC1fhr4HkcCv4Yrpf6D2SCKgRrMKIVabo4MKTXmBMx7D2BmGvUnNic/Om1yk9Y6BJRiDhEJ0erkykAcj5YDS5RSWmt9kNifg9p7BgAopXzASUqp2hOn0cA9dV5vjVLqu1rrp8JBhmdi5i19K/z8GMztHg8C3cKPDQfWh/9et0n4MXdWE6LVyJWBOO6Ex+B/C7yklHoDeAhzX+v6ji3DjCt/CzOgcEk9IXC/Bc4Kj/G/DZwQjhiuVkqtAq4A5mP+8i9USi0HOjVS4m4gXym1JLy/sRCWk6A6IYQQcmUghBBCmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYQA/h/+gMsVuJInQAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeXwV1dn4v2fuln0jrAlJCKvgAiLuVdytVVSqg1b7Vlu3Lv602lJbrbXaorWWV1/XqnVvlVGxoraKVkFBRXGrioQtAZJAyL7e5N6ZOb8/ZhJuQla4l2zn+/nkAzNz5swz5555nrM85zlCSolCoVAohidafwugUCgUiv5DGQGFQqEYxigjoFAoFMMYZQQUCoViGKOMgEKhUAxjlBFQKBSKYcyQMgJCiFuEEJv6W47+RAhRJIS4qb/l6IqO8gkhVgghHu2PZ+9DPk8IId6KOO6x3gkh5gohpBAie1+f3wv5olKmQohLhBBmNGRS7D/6Wte8sRZosCOESANuAU4B8oB6YDVwo5Ryff9JNmSYDww2RXMNA7sBNRjLVNFPDOSKPFAYC0wAbgYOBc4EkoC3hRDp/SnYUEBKWSWlrOtvOfqClLJWSlnd33J0xWAsU0X/MWiNgBAiIIR4UAhRK4SoFkI8CAQ6pNGEEIuEEOVCiAYhxHNCiGtbu7jC4TUhxMdCCF/EPW8JIVYLIbxSym+klGdLKV+UUhZIKdcC38MxDsd2I98qIcQfIo5/73bRTo44t1IIcWfE8Snuc4NCiBIhxONCiBER1w8VQvxbCLHLfZ+PhRCn91BOJ7tldH0vytQnhFgshCgWQrQIIXYIIZ7rkGaBEOITIUSzEKLSlSc9Qv4VQogq95krhRCH9/DMdkMXrcdCiN8KIXa6eT0hhEiMSNPt79oD8W7+dUKICiHEn4QQbd9BZ0NGbvoVEcfthoO6eK+r3XJsEkK8AeT0JFgv310IIX4hhNgihAgJITYLIa7tLJ+9yPe2iLr1HLBHI6e7Oiqcb/IzIcQ/I9LHCyG+EkIs6en93fRFQohbhRD3uHKWCSHuEkJ4ItL4hBB3uM8PCSHWCSG+1yEf6f4GS4QQjUKIbUKI84QQqUKIvwsh6t0y/G6H+/4ohPjG/d22CyEeEkKkRly/RAhhCiGOEUJ86qb7WAgxu0NZPuL+NkH3OYuEEB3107WRdUQI8X3RYRhHCDFbCLHc/U3KhRBLhRC5HfLpc11rh5RyUP4B/wvsAs4GpgF3AXXApog01wENwPeBye5xFWBGpBkJlAB3ucc3umlyunl2PiCBo7pJcyvwQcTxe668t7vH8UALcLp7fCLQBFztyjoHeAd4FxBumrnAD4DpwBTgD0AImBLxnCLgJvf/F7nv/71elul1QLH7nBxXhmsjrl8KhIHfujIcjDM0kulePxc435VtBvCoW5YjOpPPPV4BPNrhuMb9facBp7vHv+/L79rF+xW5deRWYKp7fyNwXVfyueceBVZEHD8BvBVxfAvt693ZOMMx17ll8SOgzK0z2d3I15t3/ykQBK5w3/0qoBn40T6W6TVuWfzAlXmhmybyW+lNHZ2CM2T6M/f4EWALkNrLOlgEVAM3uM9Y4JblpRFp/gxUsruu/QawgZMi0khgp/s+k4AHXNn/DVzinrvXfefI+nkT8C2cod+TgPXAkxHXL3Gf9a6bbhqwHNgMeN00Gs63eYSbzzxgR4fybh2yu8Z9z0uA0sg6gvONNQC/d59zEPA8sAGI25e61q7M91UZ98cfkOhW/Ms7nF9L+4+xBLitQ5rn6KAsgBPcgvwdjpKb382zPcDrwEeA1k26uW6eKUACjsK/HvjIvX4KjgJPjPhQ7+iQR477Y87s5jlf4MxPtFNiwC+AWuCUPpTrPcDbuB90J9e3Aff1IT8N54O+qKN8Eccr2FNh/bdDPg/R3qD26nftRJ4i4L0O5xYBxV3J557rqxFYBfy9Qx539fRh9vLdtwN3dkjzv8CWfSzTYuCPHdK8QHsj0Ks6iqN4m3GMbQg4vA91pghY1uHc68Cz7v9bv6WfdEjzEvB2xLEE7o44HumeuzfiXLp77sxu5DnXfZ7mHl/i3nNoRJoj3XNTu8nn58DGiOPVwNMd0txBeyPwBPBchzQBHGN2zr7Utci/wTocNBGnMN7vcH5V63+EECnAOODDDmk+6JiZlPId4C84H/OjUsqlnT3U7ZI+hWNx50sp7W5k/ADnAzgOp8Ww1b13ltu9PBHHIDS66ecA17rdvgYhRAOwzr022X3+SCHEA0KI9UKIGjfNDKBd9xCnlfgH4EQp5ZvdyNiRx3FaG5vcbvB3hRB+99mjgPE4rZ5OEUJMEEI8LYTYJISow2l1p3YiX0983uG4BBjtPqPXv2sXdEy3Gshy840W0+mmbvZAT++ejdMKjWQlkCeESNiHfLN6IXOPdRRASvkk8DJOj/G3UsqPupGrT7LitOD9dF4GMzqc+yJCpnLAAv4bca4a5xsd1XpOCDFfCPGuEKLUfb+/u88bE5GvjMzblY8IGRFCXC6EWOMOZzUAt9P+O5hOz3V4DnBuh/KuBOLYXd77UteAwesdJNx/5T6mcRI6yv0YnEoySQghpGtSI9L4gWeBQ4C5Usri7vKUUrYIId7H6VKGcFop5UKI9Ti9hBOBNyJu0YA/AU93kt1O998ncFpeC4FCnGGB53AqaSQf4PRufiSE+LTju3Qj8+dCiAk4vZQTcHoGtwkhjoxM1k0WrwIVOEMW23Hee1Un8vVEqKNo7J6/6vXv2ktEh2O7k3O+vch3b+Xr7t27yrujvH3Nt7dl2ps6ihAiCceJwsJpMPWVvS2DjufCneTd8Vxb3kKII3CGW24HfonTiz0SeJL2ddiWUlqdyNKaz/nA/ThDWitxGkPnA3/s4R06ouGU9R2dXKvsQz49PmQwsgmnohzT4fzRrf+RUtbijLEd1SHNkezJLTiV9RhgNvCryItuC2sZjtU9Tkq5rZdyvo2j7E8E/hNx7lz3OW9HpF0LzJBSburkr8FNcxzwgJRymZTyS5xxxvxOnvsljhKfDzwshOiNkgBAStkgpXxJSvn/gMOAA4DjpZS7cIYMTuvsPndycDrOcMEbUsp1OEMCozpLv7f08XftjI7pjgJK5W5vml04PY1IZvVJSKd13LFudjzuM66MxcDxHS4dBxRKKZv2Mt9anNZsTzL3po4CPIhjAE4ELhZCXLA3cnXBJpzhmc7K4Ot9zPtYoEJKeZOUco2UcgNOz6uvHAd8JqVcLKX8REq5EWduIJJ19FyH1+LMu23upLxbvdP2ua4Nyp6AlLJRCPEQ8AchRBlQgDMhMg3nI27lL8Dv3db3R8B3gFOJsJxCiOOBXwNnSSnXCCEuB54VQqyQUn4ohEgG/oVTGc4GbCFEa9ewVkoZ7EbUt3GGZSycCbTWcy/gtEgiu383A8uFEP+L0/Kox+nynY8zyRZ03/MiIcQqnLmJW91/Oyujr913ext4XAjxwx6GrxBC/BJHwX6OM+54oSv7BjfJ74EH3TJ/AacRcQJOb6QKKAcuF0JsBkYAd+L0VqJNj79rN8wUQtwC/APHyF2D0who5S3gJ0KIl3CG8K7C6cZX9VG+54UQH+HUnWNxJqGjwe3AX4QQG3HG6E8EfozT+9oX/oLT61uPM0wxDzi5Q5oe66gQ4mL3+Ei3Z/kb4K9CiDVSysJ9lBEpZZMQ4v9cWctx6ur5ON/mKfuYfQEwUgjxI5zv9VjgJ3uZz4+EEGcDX+G4lc/vkOYvwBK3jvwbpwH7P+611nq8CKd+PyOEuAfn+8oDzgHukVJuIRp1rTcTBwPxD8e75q84k5+1wMM4H0jkBJ3mnqvAmWV/DseToN69noEzbPGXDnn/FWe4JRVn6EZ28XdJDzJ6XNm+iDiXhjNh/FYn6b+Fo4TqcbwWvgHuZrfXwUE4439BnAm0n7jpn4jIo4j2E6+TcCZ0/w54epD3SuATnO5rA/AxcHaHNBfhjIe24HRJXwPS3GvHu9eacT6E7+K03G7pRr4V7DmJ+WiHZ94EFPX2d+3m/YpwuuSPu+9YheNp4olIk4zTBa/GaVDcQh8nht1z1+C0roPub/QDejcx3NO7C5yhikKchsQWIjy49rFMF7ll2ohj5H/Onk4UXdZRt67VAVd3kPffwBrA14vvul39cM91LH8fzhBJCc6IwDo6eMC5ZX1xh3MmHb5ZnLp6WcTxbTjeNY04SvVCN6889/olnZRJtptmboR8f3XrVx1Og+NngOxw388j6sgbOHN5kvbeSgfhzK9Uu+k24ei6jH2pa5F/rW5dwwYhxGPAIVLK2T0mVgwa1O+qGOwIIW4GrpFSjugxcRQZlMNBvUUIMQ5n/P0dnGGNs3C6XD/rT7kU+4b6XRWDHeEsTr0ep7fRiDOs+kucCeX9K8tQ7gkIIUYDS3AmV+JwulL3Sikf6VfB+gkhxNd07a75jJTyqv0pz96iftfBiTs/8Juurkspk/ajOP2KEMKL4003G2cIshDHhfzPUsr9GvdpSBsBRXvc5eZduTvWSccDSKGICUKIDJx5uE6RUg7rCMD9hTICCoVCMYwZrOsEFAqFQhEFlBFQKBSKYYwyAgqFQjGMUUZAoVAohjGDbp2AruuP4SzD3mUYxoE9pM0FHsMJI1sFXGwYRreB3xQKhWI4MRh7Ak/gbIrRG+4CnjIM42CcODu3x0oohUKhGIwMup6AYRjv6rqeF3lO1/WJOCvtRuIEPrvcMIz1OFEtf+4mewf4JwqFQqFoYzD2BDrjYeBqwzBm4+yo9YB7/gucIGbghBlI1nV9v8blUCgUioHMoOsJdETX9SScMKzP67reerp1Q+dfAPfpun4Jzk5EJTiRBBUKhULBEDACOL2ZGsMwZna8YBhGKW4cb9dYfNcwjNr9LJ9CoVAMWAb9cJBhGHVAoa7r5wPoui50XT/E/X+mruut7/hrHE8hhUKhULgMuthBuq4/i7PRSybO5g+/w9k960FgLE6AtOcMw7hV1/XzcDyCJM5w0E8Nw2jpD7kVCoViIDLojIBCoVAoosegHw5SKBQKxd4z2CaGVbdFoVAo9g7R2cnBZgQoLS3tbxHayMzMpKKior/F6JKBLh8MfBkHunygZIwGA10+2DcZx40b1+U1NRykUCgUwxhlBBQKhWIYo4yAQqFQDHDKNMFJ3jC7tE6H9fcJZQQUCoVigHN3kp/3heTuJH/U8x50E8OdIaWksbGR/b3mIRgMYpoDLxSREILExMT+FkOhUOwjQeD1OC9/T/RjC1iS6OfahhCj7OjpuiFhBBobGwkEAvh8vv4WZUDQ0tLCjh07SE5O7m9RFApFH5DARq/GioCXlQEvHwY8NAsBbgPXxukVLKqLXuCDITEcJKVUBiCCQCBAU1MTL730Ei0tKkqGQjGQqRHwSpyXX6TGMWd0EieMSuL3qXEUewTnNoXwSQnCmQsICcGSRH9U5waGhBFQ7InH4yEcDvPll1/2tygKhSICC1jr8/CX5ABnZSZw0JhkrspI4LV4H7NCFnfWBPmorJ6V5Y345J4rvFp7A9FiSAwHKTrH4/GonoBCMQAo1QQr47ysCHhZFfBSowmElMwM2/y/hhBzW0xmhaw9FPInfi8h0d4MhIRgrd8LROfbHrY9gfilSxl1+OGMzc5m1OGHE790adTyrqqq4vvf/z7f+ta3OPnkk7nsssuorKxsl2bRokXk5uayePHidudt2+byyy9vu/eCCy6gqKhoj2csXryYrKws1q9fHzW5FQpFdAgCKwIebkkJcMLIROaMSeYXafGs9Xs4rTnMA1VN/HdnA69WNPLL+hbmdGIAAJZXNFJSWkdJaR0tIX/b/5dXNEZN1mFpBOKXLiV14UK8JSUIKfGWlJC6cGHUDIEQgh//+Me89957vPXWW+Tm5rJo0aK263fddRdffPEF77//PqtWreL+++9vd//555/PypUreeuttzjttNNYuHBhu+tffvkln376KVlZWVGRV6FQ7BsS2ODVeDjRz/cyEjhwbDIXjUjkqUQ/YyzJb2ub+c+uBtaWNbC4ppmzm00yBkgE5yE3HJRy88341q3rNo3/k08QoVC7c1owSNr115Pwj390eV94+nTqbr21RxnS09M5+uij244PPfRQnnrqKQDuv/9+Nm/ezNNPP43f7+cf//gHV199NY888giXX345mqZx6qmntt07e/ZsHn300bbjlpYWfvOb33D//fdz/vnn9yiLQqGIDTUC3nO9eFbEednhcdrUk8IWFzWGmNticVTIJH5g6PouGXJGoFd0MAA9nt8HbNvmqaeealPsP/3pT9tdj4uL45FHHuny/scff5xTTjml7fiuu+7iu9/9Ljk5OVGXVaFQdI0FfObzuGP7Hj73ebCFIMWWHNti8vOWFua2mGRZA1zrd2DIGYHetNRHHX443pKSPc5bWVlUvvBCVOW56aabSExM5NJLL+3zvQ8++CAbN27k+eefB2Dt2rV8/vnn/OY3v4mqjAqFonMiJ3TfC3ipjZjQvaYhxPHNJrPCnY/nDxZiIruu648BZwK7DMM4sIs0c4G7cbaDrDAM4/hYyNIZ9TfcQOrChWjBYNs5Oz6e+htuiOpzbr31VgoLC3niiSfQtL5Nvzz++OO89NJLLFmyhPj4eAA+/PBDNm/ezJFHHgnAjh07uOiii1i8eDHHH7/fik+hGLIEgTUBT9tirQ0+DwBjLJvTm8Mc32zyrRZrwIznR4NYGbAngPuApzq7qOt6GvAAcLphGNt0XR8VIzk6JTh/PgDJd9yBp7QUa9w46m+4oe18NLjjjjv473//y9NPP00gEOjTvc888wzPPPMMhmGQnp7edv5nP/sZP/vZz9qOjzjiCJ588kmmTZsWNbkViuFE64TuM5rFvzISWOOu0A1IyREtFguampnbYjLVtDvfkWUIEBMjYBjGu7qu53WT5HvAUsMwtrnpd8VCju4Izp8fVaUfSUFBAffeey/5+fnMmzcPgJycHP72t7/1eG9DQwM33HAD2dnZXHDBBYCzAvjVV1+NiawKxXCj8wldi0lSDKoJ3WjRX0NZUwCfrusrgGTgHsMwOu01DEamTp1KSSdzDr0hKSmJ4uLiXqVds2bNXj1DoRhO9GZC99yEFBIq6vpb1H6hv4yAF5gNnATEAx/ouv6hYRgbOibUdf0K4AoAwzDIzMzcI7NgxNi+wkHTNDRNIykpqdMyGyh4vV4l3z6iZNyTYiRvajZvCsl/NJsaAULCYVLwa1twiq0xRwq8ngAkOPKZw7QM+8sIFONMBjcCjbquvwscAuxhBAzDeBh42D2Une2xORDDOfc3tm1j2zYNDQ0Deu/Ugb6360CXD5SM0P2E7mlBs9MJ3Zr9KF80iNUew/1lBF4G7tN13Qv4gSOA/+0nWRQKxSCjdUK3VekPxwndaBErF9FngblApq7rxcDvcFxBMQzjIcMwvtF1/XXgvzhB8R41DOOrWMiiUCiGBj2t0D2hxeTIkDVsJnSjRay8gy7sRZo/A3+OxfMVCsXgZ6iu0O0LL2eeSrX/690n3FGd9NAMzq5YHpVnDOaFbgqFYohR0iHk8u4VutaQWaHbF0aFZlPr24gtdoe00aSfUaHDovaM4VKWCoViADIcV+h2R7NWRY23gGpvATW+Aiq9X2LTPqaZQOOQhmuj9sxhaQT26GK5RKuLVVVVxTXXXENRURGBQIC8vDz+9Kc/MWLEiLY0ixYt4q9//SvXXHMN1113Xdt527a58sorWb9+PYFAgMzMTO644w7y8vIAZ5VwIBBoW4V84403Mnfu3H2WWaHYH6gJXYeQqKPGW0CNb0Obwq/2FtDsKW9L47dTSQtPIdWcTJ13C1JYaNLPpMYFJNjRC7IwLI1ArLtYrfsJtIaTvu2221i0aBF/+ctfgPb7CVx99dUEAoF20UXPP/98Tj75ZDRN4/HHH2fhwoUYhtF2/eGHH1ahIhSDhhoBK4XNK6lxw25CNyyaqPVupNq7nhpfATXeDVT71tPk2dGWxmsnkGZOJbvlRNLDU0kzp5EWnkKCPQaBoEkr48XRR2FhRb0XAEPQCKxJuZkqX/f7CViEsAm3O2djUun7in+POK/L+zLC0zmirn/3E1AoBjqdT+iapHh8Q3ZC16SZOu9mqn0FbgvfUfj1nm3OKjVAkwHSwpMZ03IU6a6iTzOnkWRlIbrZ3yvBHs2kxgUUJD4T9V4ADEEj0Bs8+Im3RhH07HJ+ICmIt0biIXqbN7cS7f0EgLYgcnPmzOGGG24gNTU1ylIrFH2jpwnds+MSmVBeNegVjk2YOm+hM27vKvxqbwH13kKksAEQ0kuqOZER4UOY1HR+W8s+2cpDw9PnZ8YvXcpJjy2n6X9tDv/pZ/wnvZJt1VO5dHl0FrcN9t9kD3rTUgciulgteAhwVsXrUbewEN39BACWLl1KVlYWLS0t/O53v+Omm27i3nvvjabICkWP9HVCNzOQzMBej9seG4sGzzZX0a93h3EKqPNuxhbOKIKQGslWHmnhqUxoPou08FTSzKmkmBOi1qCMf/FFUhcupKZ5FN65D3MPF2MjsKLYYB1yRqC37O5iPR2TLhZEfz8BoG1f4UAgwA9+8IO9Mi4KRV8ZqhO6Ekmjp4Ra8RHbkta2Kfwa30Ys0dyWLsnMIc2cwvjmk0kzp5AWnkqqOQkvcfvwcImoqcFTWoq5uZz6dY3UbbGoK9ao2xVPTU0q5cGj2EU5YRIRWMgYqOxhawQADmm4lhrfhqhPtEBs9hNoamrCNE1SUlKQUvLyyy8zY8aMaIuuUABDa4WuRBLUylxvnPVtE7Q13g2YWqOTyAcJ1hjSwtOY1ngUaeFpjsI3p+CTiX1+pggG0UpK0EpKad5QTf3GEHVFkrodPmork6lpyKDayqaKwwiS3u5ejwiRllBJE3GESQRETAwADHMjkGCP5tuVL0Y931jtJ1BeXs7ll1+ObdtYlsXkyZNZtGhR1OVXDE9M4PMhsEK3Wat03C4jXC9rfBsIabtDxsVZmaSZU5jctIA0cyp5iYdDxWgCspfza+EwnrIyPKWlsK2Upg2N1G02qduuUVseT3VNGrWh0VSRSzWHESah3e0BbxNpKdWkZjYydlwpKRN2kjw1juTpiaSOt0kcaSM0iJt9Im/tvIyPuBQbDWtfeh5dMKyNQKyI1X4Cubm5LF8enaXiCgUM7hW6LaKWGt8GR9m387XfPfvg+NpPZUJw95h9ujmVOHtEu7wyEzKpkO59to1WWYmntBRPSQn21l3Ub2ihbqukboefmsokahozqSaHKmZRy5nYHUooMa6WtLF1ZI5uJj+nmORJPpKnJZCaJ0jJsgikSoQQQFKHt2ofEVnc+CPOW/hLTg/exuv8NibGYCD+tgqFIkYMxhW6jq99+0VVNb71NHl2tqXx2okRY/ausg9PJd4ejegwSyHq6vCUrsfjDtWEC6vZuV2jYotFbXkCNTWpVNvZbit+Ng20ny/UhEVKSi2pIxrJzjKZnldC8tQ4UiZ6SckyScmy8MUDeIDIYSSrz+8euRXu+aVXc/LoR3lt0pNsq47eOiFlBBSKIcxgmtA1aabWu6ndoqoa7wYavNva0nhkHKnhSYxtOabN9TLdnEaiNc7xtW9uxrNjh9uKfxdRsoPg5kbqtkLtDj+1lUnUtA3TzKSKs2khpZ0cPm+I1Iw6Uke3MCknRMqkMlIm+UgZb5OabZE02kLzgqPkW10+LfZGyfeG1q1wMzMzaa6o4CSAKPpaKSOgUAwxWid0VwS8rOwwoXtxY4i5/TyhaxOm1rul3aKqat966j1Fbb72mvSRYk5kZGiWO24/jfSWSaSUBPCVluEpKcGzYwdy+2oatrxDbbGH2l3xzmQrua6Sn0U147Fo75gRH9dEamYjKVkmWXlBkqdYZM9IQiTXkJJtkTDCRggA4f7FTsEPBJQRUCgGOSaStQNwQtfxtd9Kta+AjVoxJemfUuMtoM67pb2vvTmB9PBU8qtOJWNHBpmb40kvsPEXl2FuraRu6zbqduykvOoTNsjxrpI/hGrmUcdYZIfVtkkpDaSObmFkjs3EiUFScoOkZlukuH+BpMhysIAgmZmJVFS0jyIwXFBGQKEYhERO6K72hakZmdhvE7oSmwZPieNjH+GCWevb1M7XPjk8loyyseQXf4vMTX4yv7CJ/9BLU6GPml3x1ITjKGUkX5Hb1ppvov0ErsdjkTKiiZRxJjkTBCkTGknJNknNchR88lgLb7uG//BU7H0hVjuLPQacCewyDOPAbtLNAT4EFhiG8UIsZFEohgKRE7orAl42Rkzonm1rHFnbwLdaTNJj2NiXSJq0na5Hzu5VtO187YGkmmRGbEomb90YklaNwPd+BnLdSOpaHOVeSC6fkEuNyCUk27tO+gMhUse0kDJeMnWCRkp2ndOCzzJJzbZIHGWj9T3ygqIbYtVQeAK4D3iqqwS6rnuAPwFvxEiGHinTBD9Jj+fB6iCj7Oh9PbEMJd3c3Mwtt9zCe++9R1xcHLNnz+bOO++MmuyKgUFPE7oXREzojszMpKLZ7DHPvuD42q+nxlNAjfU5Ndo3VCcWEQo0taWJK04k5d08slYdhferXOSWXFp25VIbzmWTlsdaexy2s6tsGwkpLaSMM0nLg5zxNqnZtaRkW6RmO141cWnSHY9X7C9itb3ku7qu5/WQ7GrgRWBOLGToDXcn+Vnj93B3kp9FdS1RyzeWoaT/+Mc/EggEWLVqFUIIysvL9xRAMSipjlihGzmhOzlWE7pSEqrfRl39GmrNzx33y5TtVI6uIJjagl2TirU1F89XucR/fALer3LRinJpKc+joWkCZebIdtkJYZM8MkRKjmRMjmRqVjMp2Y2kZFvkzkjGSqzEnzBwXE8VDv0yJ6DrehZwLnAiPRgBXdevAK4AMAyDzMzMPdIEg8G2/9+cEmCdr+f+Ygj41O9BCsHTiX6+8vUc8ml62OLWXhiLWIWSbmxs5IUXXmDt2rXuQhMYObL9h9iKpmlomkZSUlKnZTZQ8Hq9w1Y+E8laIXlTs3lTSD4WEltAqoQTpeAUU+MUWyMHP/ihqwrapYzBIGzfjti+nfDOTVSGv6DCu56KlO2Uj6qiLDmJulAO1j9ZAL4AACAASURBVNZcrK25sPEwtHW52Nsm0FKeS7g5uf1z/DZp2Tbph2nk5ErSckzScyA9R5KeI0nNAo8vcpJ2t3rxer2YZvvx/YHEQK+HEDsZ+2ti+G7gV4ZhWLqud5vQMIyHgYfdQ1lRsad/rGn2vStc7NldWaV7nG/Zfc6nJ6IZSrqoqIj09HQWL17M+++/T2JiIgsXLuTwww/v9Lm2bdPQ0EBnZTZQyMzMHFbydbtCt8XqdEK306ebphO2oKSEtPp6mgsKnNWtFdupiSuiMq2c4vQMdqTnUu7LpaEpF2vrUdjbFmAV5WJvy0GG2q86DSSZji/8NJuUk01SsmvbJlxTsy0SMu0uh2psoLq26/cebr9zLNgXGceNG9fltf4yAocBz7kGIBM4Q9d10zCMf+5rxr1pqZdpgqNGJyHdGi2FoNYDD1REd24AohtK2rIstm7dyoEHHshvf/tbPv30Uy655BJWr15NcnJyD7kp+oPuJnS/HQxzfIu554SulO3CFnhKS/f4f3NlHUXjc9g+LocdablU+nKpDZ9CsCoHa1suduk4sNv3iONGNZGWbZF+gIfUU0xSsmraKflAihqqGY70ixEwDGNC6/91XX8CeDUaBqC33J3kp2N1t93z0ZwbiHYo6ezsbLxeL+eccw7gDDNlZGSwZcsWDjnkkKjJrdh7ejOhe0JVLdOLtuHtQsFrpTsIhhKpIpdKkUvJiFx2pk+jIu4M6ppzCDZnY4VGwGacPwBvGH9WJUnjG0g9pomMrGJGjksiLUs63jXjLLzRjz2mGALEykX0WWAukKnrejHwO3DcBAzDeCgWz+wLn/i9hDr0a0NCsNbvBaJjBGIRSjojI4Ojjz6ad999l+OPP57NmzdTUVHR5jmkiB7xS5eSfMcdeEpLGTVuHPU33NAWx6Uj7SZ0Ax52eJ0W+JTKan74cQGnfPARx614j+TCIjwlJVDfSB1jqXJ94atEHlUJZ7IrMZ8qmUWdNgarNeqkxBkPamrEk7uVQP4uMo4rJiXbIjMrjlFj05gx/QAsX73rOhnpchm9Bo1i6CLkAAoU1QtkaWnpHifr6+sH1HBIQUEBJ554Ivn5+cTFOc2vvoSSnjZtGtnZ2W3v1BpKGmDr1q1cf/31VFdX4/V6+dWvfsWJJ564Rz6FhYVs2LCBpKQkjjnmmCi+XXQZiGOx8UuXkrpwIWWpKVzw3BP87bxf8mn11Wwecw6XvrwdWVrKf80W3klL5u2cLD7Jy8H2aKTW1nHym29z6msrOOL1AuJ2+qkml8q4aVTGTaVa5FEdHkdtMB3bat/+0kZUoOVuxZOzDU/uVuKya0jNlozIimPM2HRGpeSSZk3G18GvHgZmGXZkoMs40OWDqMwJdDqjo1YMx4BYhZIGJ5z0Cy+odXWxJPmOO9CCQW777d14nj2B/6v9DK9lYZUEuHrLG6w8/ATsugxSCuGgh0r4+WcFjNoqEDVp1ATn8XXjxXwdmWGLxJ9RjTd3OzJ3BXF5m/DkbsWTu5WE7AZGjItnRCCHdHOqG+74dPyyfVAzorsMQKFoQxkBhQLwbN+Of/Vq/KtXs/jhMWx58u94bzmOGRYIS9Dqr5N5+UVcWBs5vzMZ229Tl9VEYEYFSbmfk5hbQDjvC+SEr53WfXYxcd7Etp2q0sNTSTNPJS08hTiZ4YzaqJEbRT+hjIBiWFK3rZKNK7ezpcBkZ1kS1Y1jaam/FM/Oa0hc6qGrWZyD52/Dk7sNmbceM+8zghPfp2XclwjNGVb12knuHrTTSDdPIC08hbSqacTbI/eIa69QDASUEVAMWRrrBOu3edmyzc+OTZLaAovQNj/e4njido0DDmpLK0daaBNMfMeHwbeNbaVZjHvPBxK8od3Ku/oxx7HNY8c5m5iYU0mvn9e2kYkT114pe8XgQRkBxaCmqVpQuM3Hpm0+Srd6qS7y0rLFg2ezRqCivVuuOdbEHtMAs0oIZDUzeqqH3FkpTMyrxsz4lArfZ1T4P+ch/7ms0c4nsDOeObf5mP64D2E5xuDEqsdIC08lyRqPhopkphj8KCOgGNBICU2VGiVFHjZv81Gy1UdVkZfmQg/aFg1fdXtF3zTeJpzVhH/mTpK8hYwLFzAxtYIDpnhIP/JggjOnUZWwgQr/51T4Pmen/zM2eAvdhwlSzUmUyOOwiKNpjGTl/SE+/m2YObf5yF+tkdN8Wj+UgkIRO5QRUPQ7UkLjLo1dRV62bPNRvNVLVZGXYKEHtnjw1u0eXrE1SX2upHmihefsRtKSdjKu6UumbnqDmYXvkLejEDvzIFoOO4bmY46i4ujJlCebfOX/nHLfi1T71rVtaJJgjSEzNItJTRcwMjSTEeGD8csUzgWgjg9Sfk1B4jNMTfo+R924qF/KRqGINcoIKPYL0ob6HRpVRV62bvWyfZuPiiIvzVu9yE1j0ZoiFL1HUjdB0jTRRhwVInGCychckwn+bRxc+C4zlr9KyqpVaA0NAISnT6flmGOo+ulPWHu0l/LUAir8n1Hhe5KwVg+Az04iM3wIMxquJDM8k8zQTBLtsd3KfEjDtTQmbOGQhmtjVzAKRT+jjADQUKax+u4kSj/xc+nyfV8wEqv9BLZv384Pf/jDtrR1dXU0NDTw9dftvNL7DduC+lIP1YUeSrd62bbVR/lWL02FHuxCD1rzbkVv+SS1+ZL6yTZirkVCnkVmnklObpjJY0wmYjFy0yb8q1YRWLGawPvvo9XUAGDm51Nz4XcomTeO0sNsylM3UOF/hSaPE4hPSC8Z4enkB88lMzSTkeFDSTUnOhuR94EEezQXmP+hwh7Yi4gUin1hWBuBVuX/1ZJEpAQrFB2vjljtJzB+/HjefPPNtnQ333wzlrV/N8C2Tagt9lBd6KW8yNOm6BsKPdhFHkR4dxmacZLaiTa1U2zs00PETzAZkWeRnRtmymiTidLmwIwMqioqAfAUF+NfsYrA6tUEVq/GU1YGQEvuWLZeeRjFp6Wz86Agu1ILqPUaIBy3zBRzAmNajm5r4WeEZ+BFBcpRDH4yT72Q0OyDabj2MohRqOshZwTeujmFXet83aaxQo4ia9zlenfI3YrrH+d1HfN81PQwJ99a16MMsdpPIJJQKMRLL73EP/7xjx7l6StWCGq2eagp8lJZ5KVkq5cyV9Gb2zwIc3d5hRMkNZNsag+ShM4JET/BIiPPJDvXZOJIR9HnmPaeofAt0HbtwvvmW6S+/jqB1avxbt2KBCrnpLN+4QRKjp/Ezik1VCZtwhZvARBnjSAzPIv84NlkhmeRGTqEgEzvmLtCMSTwf70B38ZCEpcswz7/LLTrLsce3fkeInvLkDMCvaFyg4+WekEXoTSiSjT3E4hk+fLljBkzhoMOOqiTu8ACntJszq/TeOPXKXsMdZnNUL3VS02Rl6pCD2Vbvezc6qW+0Eu4WEPYu8smlCypmWxTc5hN04IQcRMs0vMsxuWFmTzCZJJlM8G0Se4YhqrDHt+iuprAhx/id1v6vg0baBwJJScksH3RWHYcMYmynDJafNVANV47nhHhQzig8VJGhmaSGZ5FopWl/PAVQxMp0UrL8G3Ygtf9AxAh50PSnn2J0c+/SuOCs2i49rKoGYMhZwR601Jv2KXx/t1JfLkkEWm3Hwb63guVUZUnmvsJRLJkyRIuuOCCLu+tkwLPKxofrkkmY0ci0hK8ujCVnUVeaou8mKVaux5Qc5qj6GuPsambaOKbYJGWbzIu12RCmsVEyybftBlly/YquLlr+UVjI/41awi44Rjkli8pmwUlx3op+Ws6Ow5JpT69FmhCyELSzGnkhuaR2egM66SZU9CGXhVVDHekRNuxC9+GzXgLtuDduAWf+69W39iWzMrMaHebsGywWkh8+kV8G7ZQ+WLXjce+MCy/sKRRNqcuquPoaxu6NAbRINr7CbSyc+dOPvjgA+65554u79dKNEZ+5gzdSMt5r4+Xx1M7SVJzok3tJBPyLVLzLMbmmkxIsTjWtMk3LcZbsn3FCHb6iD1pbsb/6afO0M6Hq6gNfca2Qy1Kj9QovS6OiokCqUnAJMmMIzN8FLPNY4ivmUJG+MBOI2QqFIMWKdF2lu9W9hs249tQ6Cj7uoa2ZNaIdMwp+QS/+x3CkydgTp2IOTUfOyOdcVmzd2fn94MmaFwwz5kjiBLD0gi00tEYlKztaZfh3hOL/QRaMQyDk046iYyMjE7udpF7Dnalb6pitum06CdYNomRwzeN9B3TxPfFF/hXr6Jl89tUBL6gdFaY0vmwc5HAdHdED1gpZIZncbDbws8MzyTedia5MjMzqQgp7xvFIEZKtLKK9sq+tWUfqewz0jCnTiR47rcJT8nHnJKPOXUi9oju57Sk3weahv0DnfKrLsYeFd0J4mFtBFppNQbRoqCggHvvvZf8/HzmzZsH9G0/gRtuuIHs7Oy24Z7I/QTAMQK33XZb9xmJPXYX5NqGUN9epCO2jXfdOuSnb1JV+xblievYOTNE6XXQNMpJ4rG8ZLQcyGRrNiOrDyUzNJNkK1eN4ysGP66y927Y4ij8DRHDOLX1bcms9FRH2Z9zOuGpEx1lPyUfO7ObRlsXhGZMIXTYITRcexkZ06dhx2DPg1jtLPYYcCawyzCMAzu5fhHwK/ewAfixYRhfxEKW/iCW+wkArFq1qsd8GsZJyg+1GPuBxPTLdkHQeo2UyMJ11G9ZSmVwJeUZm9kxK0R1q/OSDek148jicEbUHMHI8CzSw9PQ6N47S6EY0EiJtqsCb8EWfBu3RAzltFf2dloq4an5BOedulvZt7bsRXQaPRXLn41KPt0Rq57AE8B9wFNdXC8EjjcMo1rX9W8DDwNHxEiWYYntg6IzLDafFyLvnTD5q3uek7CxaKhcTXXpy1Swhl1jt1N+hIl9rHM9qSKekTUHM7H4OEbEHU9m+GB8MinGb6JQxAgp0cor8RZsQSspI/WzL3cr+5rdIwNtyv6sUzGn5jtDOVMnOi37KCn7/iQmRsAwjHd1Xc/r5vr7EYcfAtmxkGM4k85WTuR24lObGHP/TtJDM6Biedt1iaRJ20FlcCXV1W9QHvgvu3J2ERon4SDw1wnGbBnBzLUHk5F2KhlpJ5Mgx+7ewnYfR5YUiv2GlGgVVXgLHAUf6ZGj1dS2JdPSUghPySd45imYUyYQnuJO0I4cMSSUfVcMhDmBHwH/7uqirutXAFeAMxae2cmquWCwt+4rwxOP5SHbcziNI79gR/N77Kx7k52J62hIawJAy4LRX3o48N1cxmhHMSb3PNInfhsxPfahkr1eb6e/6UBhoMsHSsY2pITySsS6De7fRsQ3GxDfbERUVu9OlpaCnD4F+d3vYE6fgjxgMp6DD8DMzEAIgQ/wAfFdPqh/iFUZ9qsR0HX9BBwjcGxXaQzDeBhnuAhAdrbRsmmqDVi7wxIWX2qP86XncfBBRgnkrvAwqmISmf5jSc4/F3v6LBjrKH0bqIz4aGLJQN/ge6DLB8NTRq2iqm1Bla/AmaT1FmzGU727ZW+nJGFOmUj49LmYU/Kdlv2UfOzRmXu07DMzRwzpMnQ3mu+UfjMCuq4fDDwKfNswjOiu0FLsRkJqERzyNxhZP4301JPQZp9M6JiZ4HdcYu3+lVCh6BKtshqvq+TbVtIWbMZTVdOWxk5OxJwykeZvn4DpDuGEp0zsVNkr9qRfjICu6znAUuD7hmFs6A8ZhjzuGgAtDD84GhJ3CXYU/wfVZ1IMRLSqamesvmAzvo2FbYrfE9EjbVP2px3fXtmPGamU/T4QKxfRZ4G5QKau68XA73CG2TAM4yHgZmAE8ICu6wCmYRiHxUKWzoiMzBftYEwQu1DSAG+++SZ//vOfkVIipeS6667jjDPO2EMGf4MTZHP8u5BUBmZW191BhWJ/Iapq3MnZze1i5HgqqtrS2EmJmFPyaT71OMzJ+buV/dhRStnHACFlx6hfAxpZWlq6x8n6+nqSk5N7ncm4rNnOKjyhRT0YE0B1dTXffPNNu1DSNTU17UJJf/zxxyxevJirr76ak046qS2wnG3bvPXWW+1CSf/73//GMAyklMyYMYOlS5cybdo01q1bxznnnMP69ev3CEuxdfW7vLj+Rub+vYkzttVSe+edBOfPj9o7RouBPp490OWDgSmjqK5tU/beDVtIKNyG/HoDnvLdI792UiLm5AntFlSFp+Rjjxu935X9QCzDjkRhTqDTQh0I3kFRJeXmu/Ct63mEqTUyX+LTL5L49ItYo0ZgZY8Df9cLncLTp1B36y96zDuWoaSFENTXOwtW6urqGDVqVKdxiYTwcvCTAl8gg9o7bxyQBkAx+BHVte0XVBW4LftIZZ+YANOn0HLiMW0+9v2l7BV7MuSMQF8Rbk/IU1aB1tRM+MCpUc0/mqGkhRA89NBDXHrppSQkJNDY2MiTTz7Z6X3W2LFYN95IXVISwWOOidLbKIYroqbODZVQGKHsN+PZFaHsE+Ixp+TTcsLRhKfmt43bW+PGkDlyJDUDvKU9XBlyRqA3LfX2kfmc4EytkfmiHZwpmqGkTdPkvvvu4/HHH2fOnDl8/PHH/PjHP2bFihUkJiZGVW7F8ETU1u8Z9XLDZjxluxV4m7I//qh24RKscaOhj9FyFf3PkDMCvSXWyh+iH0r666+/pqysjDlz5gAwZ84cEhIS2LhxIzNnzoy6/Iqhi6it3x0IrcB1v9y4Bc/O8rY0dnyco+yPO7JtvN6cOhEra4xS9kOIYWkEIiPzxUL5Q2xCSY8dO5YdO3awadMmJk2axMaNGykvLyc3Nzfa4iuGCKKuHu+Gwj1a956du9rS2PFxmJMn0HLs4c54vRvT3soeq5T9MGBYGoFYR+aLVSjpUaNGcfvtt3PllVci3Am1xYsXd7rngGJ40absO0zStlP2cQFH2R8zZ3cgtCn5WOPHKWU/jBmWRiDWxDKU9Pz585mvPH2GLaK+YXfLvjVswqYixhbvaEsj4wKEJ+XRcvRhbT725lSl7BWdo4yAQjEAEQ2N7WPjtK6iLS1rS9Oq7O1vHUFjblbbJK01fhx4Yh/8TzE0UEZAoehHRGNT58q+ZGdbGhnwY06aQOjIQ2mKWEFr5TjKPjMzkwblfqnYS5QRGKIMspXgQx7R2NSm4CPDJXgjh3ECfsyJeYQOn0lTa2ycyflYuVmqZa+IGUPCCLTG0RFq9SHglIdlWf0txrBENAV3t+wjwia0U/Z+n6PsDzuYpu+d217Ze4fEJ6kYRAyJGhcXF0ddXR2pqan9LcqAoKamhu3btyvDuJdEBhiki008RFNw99DNxsLdMe23745t5Sj7XEKzD6bpwnN2D+MoZa8YQAyJmuj3+ykvL6e8vBzPfuo2SyyaPVXEWRkIOn+mBVRqghYhSJaSVFt2GsEpHBQ0VWr44iUJI/Ytur+UkqqqKmpra9E0rdvNJBSd4/96A76NhSQueQX7onMJnHQMWnlVu5j2nu2lbSFH2pT9oQfRtGBeW2wcKy9bKXvFgGfI1NCsrCy++uortmzZst+GQhISEmhq6twVdJtH8Fq8DxPBqc1hppqdK/e6Ug9fv5BAwkiLg85rQtseHdk8Hg+zZs1i/Pjx0clwmNEaYFB77DlGPPYcANLrwZyUR3jmDJr0s9xwCfmYeeOVslcMWvpcc3VdF4ZhDMhZxwMPPJADDzxwvz2vs9CuNnB/kp+7kgNMNG0eqQ4yuQsDUF3k4embM5mUIrn4oQoSMqK7x9dgCI87kPB+s5GE55a1O9fac5NCEDr0ICpf6nnBn0IxmNib5sti4OfRFmQoUCPg2vR43ozzcU5TmDtrgyR2YS6DVYIXvj8CJJz3dGXUDYCid4iaOuL/+ToJS5bh/+83SF/7T0L6/aCJthhTCsVQo89GwDCMHg2AruuPAWcCuwzD2KNpruu6AO4BzgCagEsMw/i0r7IMJL7yalyRkUCpR/CHmiCXNIU738EBMJth6Y8yqC3xcMGSSjLylSfPfsW2Caz6iPjnlhH/+juIlhDhAyZT+/vrCc7/NmMOOrktwKD9A53yqy6OWYwphaK/6dEI6Lr+N8MwfuT+XwCPGIbRU5PoCeA+4Kkurn8bmOz+HQE86P47KHku3seNaXGk2ZIXKpo4LNy1Upc2/Ou6NIo/CjDvgSqy54T2o6TDG8/WYhKMV4h//lW8JTux01JouvAcmi6YR/jAaW0bnEQGGMyYPg1bDakphjC96Qnkt/7HMAyp6/rEnm4wDONdXdfzuklyNvCUO7fwoa7rabqujzUMY0c39wwoyjTB+d4wo9Li+GeCn2NbTB6oDjLC7n665N0/JfPNywkc/5s6Dji7eT9JO3wRwSBxr71NwpJlBN5fixSCluOPpO7Ga2g+7XiI2zPCa6wDDCoUA4neGIEKXdcvA94HjgIqe0jfG7KASD+YYvfcHkZA1/UrgCsADMMgswu/7f3N9Z4w7wsJCX5usDRuFvF4MhK6vWfN3zQ+vM/LET+y+M7NcQgRF1MZvV7vgCmvroiJjFIiPvoM7QkD7flXEPUNyAk5mLdcj33xeWjjx5EEJPWXfFFGybjvDHT5IHYy9sYI/ABHCf8UKAD+JwrP7Wy4vNMmtGEYDwMPt6bpb2+XAq/Gg0l+nvf5QAh8UrKgvJbqHnoAW1YEePHqDCac0MxxN1dRGQ1T2gODwTsomjJquyqIf/FfJCxZhm9jIXZ8HMHvnETTBWcTOmLW7giafXjecCvDWDHQZRzo8kFUNprvlN4YgRZgJ87apweBWcDHeyXJboqBSAf2bKC0i7T9Thh4Pc7Lk4l+Pgh40aREw3EHFcDdSX4W1bV0ef+ur728fGU6I6eanP1QNZpyKY8e4TBx/1lF/JJXiPvPKoRlEZp9MDV/vongWacgk3vT3lcohi+9UUfPACuBCw3DuFfX9duBk/fxucuAn+m6/hzOhHDtQJwPKNMEf0/w8fdEPzs9Gtmmzf+rb+avSQFaWicRhWBJop9rG0KM6qQ3UL9D4/n/GYE/SXLeU5UEkgbkEotBh7dgMwlLlhH/4r/wVFRhjRpBw5UXE1xwFuakCf0tnkIxaOiNERhpGMZDuq7rvc1U1/VngblApq7rxcDvAB+AYRgPAf/CcQ/dhOMi2vdd2GOEBNb4PTyR6OffcV5MIZjbbHJ7TTMntZjclBLYY9zKpvPeQEuD4IX/GUGoQXDRSxUkj1VrAfYFUVtP/MtvkGAsw//Z10ivh+ZTjqNpwTxaTjhardpVKPaC3nw1u3RdXwDE67p+Lp1M3nbEMIwLe7guceYYBgwNAl6M9/FUop/1Pg+ptuTSxhD/0xgm39qtvD/xewl1CMoWEoK1fi/OyJmDFYaXr0qnvMDLeU9VMWq6ub9eZWhh2/hXryXBWEb8v95GNLcQnjaR2t9dR3D+t7EzM/pbQoViUNMbI/BD4DLgU5yx+8tjKtF+ZqNX48kEP88n+GjQBAeGLO6qCXJOMEx8JyM3yysa2/7f1USNlPDmjakUvhPH6X+uIX9u1/MFis7xbC8l/vlXSViyDG/xDuyUJJr0s2i64GzCBx/Q5tOvUCj2jd4YgWmGYdyn6/oo4BIgD1gfS6GiTZkm+El6PA9WBxllS0xgeZyXJxL9rA548UvJmcEwP2gMMztsdbnSt7eseSCJL/6eyFFX13PI95qi8QrDg2Az8a+/Q8Jzywis+siJ13Ps4dT/+mcET5sL8bF1qVUohiO9MQJ/AU4CbsWZIH4cZ73AoOHuJD9r/B5uTw6QY9k84070Zpk2N9Q1c2FTmMweXDx7yzcvx7FyUQoHnNPEtxbWRyXPIY2U+D7/moTnXib+5TfQ6howx4+j7hdXEjz/LKzssf0toUIxpOmNEUjQdT0ABAzDeFbX9atiLVQ0KdMEzyX6kUJgJDi+/cc1myyqbeakZjOqsbSLP/Lz2s/TyT6ihTMW1yC0KGY+xNAqqoh/8V94X3iNkes2IOMCBM84iaYL5hE6avZun36FQhFTeusi+jLwO13X44DC2IoUXe5O8tM6rasBZzWFeKAm+uEaqrZ4ePHSDFKzLOY/WoV3z2gECtMk8Pb7JCx5mbi33kOYFvbhs6j5040E552CTEnubwkVimFHj0bAMIz7gfsjTl0SM2miTJnm+PCb7iSiLQRvxPvYVdfSqU//3tJUqfH890egeSTnPV1JfIZaCxCJd2Oh49P/wmt4yiuxMjNovOx7NC2YR9rRh9M0wFdqKhRDmSHtWH13kr/XPv17SzgIL16aQcNODxcYFaTnqbDQAKK+gfhly0l4bhn+T79Eejw0n3wswQXzaD7xGPD5+ltEhUJBN0ZA1/V0wzCq96cw0aa3Pv17i23Da9ekU/qpj3P+Wk3W7PA+5zmosW38H35KwnPLiHvtLbTmFsJT8qn97TUE55+hYvIrFAOQ7noCt+u6ng5sBJYD7xuGMahWPEX69MeCf93ooeA1P+Nmt/D+PUlM/c7wDA3tKdlBvPEqCc+/gndrCXZyIsHzzqRpwVmEZx2ofPoVigFMl0bAMIyrAHRdnwycAlzhbiqzBlhqGEbx/hFxYPLhvYmsXOxBeCRlX/qxQsNM0TW3EPfGChKee5nAex8hpKTlmDnUX38VzWecgIyP728JFQpFL+jNxPBGnN7AA7que3ACvo3FiQQ67KjfobH0R+ns/MIPQiItgTVcpgGkxPflesen/5+vo9XWY2aNoeHay2jSz8LKyepvCRUKRR/p08SwYRgWzuYyw5LmGsFjJ4+iucb1YR8mTkBaZTXxS//txOn/ZiMy4Cf47RNoWnA2oWPnKJ9+hWIQM6S9g6LJrnVeXrosg5YGQfYRLez8wo+0GbrDQKZJYMUHJCx5hbg3VyLCJqGZM6hZdAPBs09DpqX0t4QKhSIK9Gaj+dMNw3hd1/VJwM+BJYZhvBt70QYOXy+N5/VfphKXJrnoxQqyDgvTsEvj04dG8vGT2pAyBp5NRSQYy0h44TU8ZRVYI9JpvGQBXB42mwAAFM1JREFUTQvOwjxgcn+Lp1AookxvegK/AF4HfgM8AvwfMCeWQg0UrDC8c1sKn/wtifFHtjDvwWqSRjnrj5NG2cz/P4tDryrn/buTKFnr72dp9x7R0Ej8K28Sv2QZgY+/QHo8tJxwNLV/nEfzSd8Cv/LpVyiGKr0xAsm6rucAlmEYH+i6Hlu/ywFCQ5nGy1elU/xRgMMua2DuTXV4OtGFSaNsTl1Ut/8F3FekxL/mMxKWLCPulTfRgs2EJ+ZSd+P/o+m7Z2CPHtnfEioUiv1Ab4zA7cAfgD+4sYM+7E3Guq6fDtwDeIBHDcO4o8P1HOBJIM1Nc4NhGP/qg+wxo/hjP/+8Ip1QveCs+6uZfk6wv0WKGlppGQkvvObE6S/ajp2YQPDc02lacDbh2Qcpn36FYpjRGyMwAUdBt24Ef0NPN7iupPfjrC8oBj7WdX2ZYRjrIpLdBBiGYTyo6/p0nC0n8/oifLSREj59PJG3f59CarbFgn9UMvKAQbU+jsxTLyQ0+2Aarr0MMt0Vui0h4pavJGHJMgIrP0TYNi1Hzab+2sto/s5JyATl069QDFd6YwS2AHfqup4KvAK8YBhGVQ/3HA5sMgxjC4C7ofzZQKQRkECri0kqUEo/Eg4K3liYytdLE5h4cjNn/l81cal7+oB2qmQHEP6vN+DbWEjikmXY3zmZVL+X+NdXov3/9u48Tor6zOP4Z64eEImC45EBokYxCaIJEY9o4okG1yy4q/vgoIn3xE3QGCWJqMEryeKViFmiOwGDGhd9PFZJxEgWxSR4BKMxRg15ea0MKHI5KoEZhun9o6qlabrHYaZrqqS+79eLF9Pdv67+glJP1a+rnt87LWz4+M68f+7pwTX9uw2JO6qIJEBXbhZ7AHjAzAYCNwPXmdnDwFR3/0OJtw0CFuc9bia4ySzf5cBcMzsX6AeMKrYhM2sEGsMs1EWw413xCswaV81bf63gmMvaOeqiSiordyg6NtjJvk4//xWcNo66iybALjvB+vWwrhVa2zb+amulorWtyPNtVKxrhdbWIs/lxrVCW/DeioL35rZXkXt/3nMAFW1BD6Oq++awDZD95K6sv/EHZE88jtqqKpLU5bq6ujqS/6blkvR8oIzlkPR8EF3GrlwieixwEjCA4EygEaggWGPg0BJvKzaxXHhY3QDMdPfrzewLwO1mNtzdO/IHuXsT0JTbRrE1fXvilXm1/OrcAVRUwAm3rWKPI1tZ1cl5Tj1Q0RbsbLM330bNzbf1eDnKfNlMDdlMhmymBmprydbW0JF7nMmQrc2Q7deX7IDtIDe2NhybqWHbpjs22V4FwGtv0PGzX7DyqIPLmLQ8Sq3TnBRJzwfKWA5Jzwc9y1hfX1/yta5MB+0DXOzuS/KfNLPOFpxvBvLnGwaz+XTPmcBogPCqoz5AHfB2FzL1WLYDFtywLQt+3J+dhrXzLz9fxfa7bln/h9zOPwtsGFLPmtOMbG0m2DmHO2VqMx/s1LOZDPTJPc5sfD0cQ6amx1/M5heBbCYDlRWsGTcmmL4SESnQlSJwF/B9M9sWOBU43d2nu/uiTt6zEBhqZrsDSwjOJMYXjHmDYO3imWb2GaAPsHxL/wDdse6dCn593gBemdeHvU/4B1++uoWavh/eA6Li/U2vji3cySalVXI2UwOVlXScaiw/55TE5BKR5OlK05cZwA1Afdg7qOHD3hC2nJ4APAy8FDzlL5jZlWY2Jhx2IXC2mT0HzAJOc/dIu/G8v6yS+8/Znp9+bhdee6yWo3/4DsdNfadLBYDWNgaeORGAbFUV2T61dJw+jmVPzObdH12UmB1t2957sabheJY9MZsNN/4gMblEJJm6ciZQ5e5/M7Pc4y51Cwuv+Z9T8NzkvJ9fBA7pYs4eeX9ZJQtu2Ja/zOpHx3qACk6+fzmD9+/iIjAbNjDg3Eup/cMfaR+0C+tGfYn3zz+LgcM+TUfC5hFXzJ0VdwQR+QjpShF4xMxuBurNbCrw24gzlU1u5//8XcHOP9uxcb69ywUgm2W7S66m74PzaJn8bdZ8/ZSI0oqI9L6uXCJ6lZkNB+YBiwjm8j8SZn9jAIufykC2+1+29r/uZvrdfi/vffNUFQAR2eqUnNoxs7vNLAPg7n9197sJFuad21vhemrMTasZ8bU1VPfJUlm95V839Jsxi/43TGdNw1jem3RuBAlFROLV2fz+ncAcM9sewMy+DNwOfK03gpVDrrnb159YxmdPDopBVaZrxaDv/zzEdpOvY+3ow2mZcrF66ojIVqlkEXD3e4HJwINmNplgLYFj3P1vvRWuXPKLwb4Na9hp77ZOx9c++jjbn38ZrV/Yj9XTfgTVWntHRLZOnU0HXUVwM9cS4ALgWeACM7uyl7KVXa4YnD639BU9NU//hQFnf4f2T+3Jqluuhz5JarIgIlJenR3i/m/4+zyCjqBbvepFr7DDqd+iY+cdWXnHT8l+rH/ckUREIlWyCLj7Y70ZJG5VzW+yw/gJZGszrJw1jY4dizeQExHZmmiyG6hcuZodGr5Jxdq1rLj352z4xKC4I4mI9IrUF4GK99cw8KvnUbX0LVbOmqbF1EUkVdJdBFrbGHjGhdT8dRGrbrmetgNGxJ1IRKRXpbcIbNjAgAmXUrtgIaunXknrqC/FnUhEpNd1qRncViebZbtJU+g7Zx4tl13A2hOPizuRiEgsUlkE+l97E/3uuI/3JpzGmsaT444jIhKb1BWBfjNm0X/qDNaMP573LpoQdxwRkVhF9p2AmY0GpgJVwHR3n1JkjBEsOJ8FnnP3wtXHeqTumAba9ts3WPVr5x3pe9+coB/QsUfQ8h+T1A9IRFIvkjMBM6siuMv4WGAY0GBmwwrGDAUmAYe4+97A+eXOkXnh7/S78wF2/sJYBn71vI39gP7zh+oHJCJCdNNBBwAvu/ur7t5G0JF0bMGYs4Fp7r4awN0jWWC+om09Fa2t1D6yADqytO86mMqWd6P4KBGRj5yoDocHAYvzHjcDBxaM2QvAzBYQTBld7u6/KdyQmTUCjQDuTl1d99bMrQDIZtnGZ9O3+U3a593dre3kq66u7nae3pD0fJD8jEnPB8pYDknPB9FljKoIFJtsL2zkXw0MBQ4HBgO/N7Ph7v5O/iB3bwKacttYsQVr+tbnf3imBiorWTNuTPAdQRnWBq6rq2NL8vS2pOeD5GdMej5QxnJIej7oWcb6+vqSr0VVBJqBIXmPBwNLi4x50t3XA6+Z2SKCorCwnEE22/nvlOxqLyLSm6IqAguBoWa2O8F6BCcBhVf+3A80ADPNrI5geujVcoZo23sv2kZ+Vjt/EZESIikC7t5uZhOAhwnm+29x9xfCBWmedvfZ4WvHmNmLwAbgO+6+spw5VsydVc7NiYhsdSqy2S1fgD1G2aVLC2eV4pP0ecSk54PkZ0x6PlDGckh6PijLdwJFb4xK3R3DIiKykYqAiEiKqQiIiKSYioCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiKqQiIiKSYioCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiKqQiIiKSYioCISIpFtbwkZjYamEqwsth0d59SYtyJwN3A/u7+dFR5RERkc5GcCZhZFTANOBYYBjSY2bAi4/oD5wFPRZFDREQ6F9V00AHAy+7+qru3AXcCY4uMuwq4BlgXUQ4REelEVNNBg4DFeY+bgQPzB5jZCGCIu//azCaW2pCZNQKNAO5OXV1dBHG7p7q6OlF5CiU9HyQ/Y9LzgTKWQ9LzQXQZoyoCxRY0/mBFezOrBH4CnPZhG3L3JqApt40kLQad9MWpk54Pkp8x6flAGcsh6fmgLAvNFxXVdFAzMCTv8WBgad7j/sBwYL6ZvQ4cBMw2s5ER5RERkSKiOhNYCAw1s92BJcBJwPjci+7eAnxwXmNm84GJujpIRKR3RXIm4O7twATgYeCl4Cl/wcyuNLMxUXymiIhsucjuE3D3OcCcgucmlxh7eFQ5RESkNN0xLCKSYioCIiIppiIgIpJiKgIiIimmIiAikmIqAiIiKaYiICKSYioCIiIppiIgIpJiKgIiIimmIiAikmIqAiIiKaYiICKSYioCIiIppiIgIpJiKgIiIikW2aIyZjYamApUAdPdfUrB6xcAZwHtwHLgDHf/v6jyiIjI5iI5EzCzKmAacCwwDGgws2EFw54FRrr7vsA9wDVRZBERkdKiOhM4AHjZ3V8FMLM7gbHAi7kB7v5o3vgngVMiyiIiIiVEVQQGAYvzHjcDB3Yy/kzgoWIvmFkj0Ajg7tTV1ZUrY49VV1cnKk+hpOeD5GdMej5QxnJIej6ILmNURaCiyHPZYgPN7BRgJHBYsdfdvQloym1jxYoVZQlYDnV1dSQpT6Gk54PkZ0x6PlDGckh6PuhZxvr6+pKvRVUEmoEheY8HA0sLB5nZKOAS4DB3b40oi4iIlBBVEVgIDDWz3YElwEnA+PwBZjYC+C9gtLu/HVEOERHpRCRXB7l7OzABeBh4KXjKXzCzK81sTDjsWmBb4G4z+7OZzY4ii4iIlBbZfQLuPgeYU/Dc5LyfR0X12SIi0jW6Y1hEJMVUBEREUkxFQEQkxVQERERSTEVARCTFVARERFJMRUBEJMVUBEREUkxFQEQkxVQERERSTEVARCTFVARERFJMRUBEJMVUBEREUkxFQEQkxVQERERSLLJFZcxsNDAVqAKmu/uUgtdrgduA/YCVwDh3fz2qPCIisrlIzgTMrAqYBhwLDAMazGxYwbAzgdXuvifwE+DqKLKIiEhpUU0HHQC87O6vunsbcCcwtmDMWODW8Od7gKPMrCKiPCIiUkRU00GDgMV5j5uBA0uNcfd2M2sBdgBW5A8ys0agMRxHfX19RJG7J2l5CiU9HyQ/Y9LzgTKWQ9LzQTQZozoTKHZEn+3GGNy9yd1HuvvI8D2J+WVmf4o7w0c530chY9LzKWM68pUpY1FRFYFmYEje48HA0lJjzKwa2A5YFVEeEREpIqrpoIXAUDPbHVgCnASMLxgzGzgVeAI4EXjE3Tc7ExARkehEcibg7u3ABOBh4KXgKX/BzK40szHhsBnADmb2MnABcFEUWSLWFHeAD5H0fJD8jEnPB8pYDknPBxFlrMhmdfAtIpJWumNYRCTFVARERFIssrYRWyszG0LQ7mIXoANocvep8aYqLrxz+2lgibt/Je48+cxse2A6MJzg0uAz3P2JeFNtysy+DZxFkO954HR3XxdzpluArwBvu/vw8LmBwF3AbsDrgLn76gTluxb4Z6ANeIXg7/GdOPKVypj32kTgWmBHd19R7P29oVRGMzuX4PvWduBBd/9uTz9LZwJbrh240N0/AxwEfLNIS4yk+BbBF/NJNBX4jbt/GvgsCctpZoOA84CR4T/CKoKr3OI2Exhd8NxFwDx3HwrMI96LLGayeb7fAsPdfV/g78Ck3g5VYCabZ8wd4B0NvNHbgYqYSUFGMzuCoNPCvu6+N3BdOT5IRWALufub7v5M+PN7BDuvQfGm2pyZDQaOIzjaThQz+xhwKMEVYrh7W5xHhp2oBvqG97Fsw+b3uvQ6d/8dm99Pk9+C5Vbg+F4NladYPnefG14xCPAkwX1DsSnxdwhBD7PvUuSm1d5WIuO/A1PcvTUc83Y5PktFoAfMbDdgBPBUzFGKuYHgf+iOuIMU8UlgOfALM3vWzKabWb+4Q+Vz9yUER1pvAG8CLe4+N95UJe3s7m9CcJAC7BRzns6cATwUd4hC4aXrS9z9ubizdGIv4Etm9pSZPWZm+5djoyoC3WRm2wL3Aue7+7tx58lnZrm5xD/FnaWEauDzwE3uPgJYQ8LuEzGzAQRH2LsD9UA/Mzsl3lQfbWZ2CcF06h1xZ8lnZtsAlwCT487yIaqBAQTT0N8BvBxNN1UEusHMaggKwB3ufl/ceYo4BBhjZq8TdHA90sx+GW+kTTQDze6eO4O6h6AoJMko4DV3X+7u64H7gINjzlTKMjP7OED4e1mmCcrJzE4l+KLz5AR2BtiDoNg/F/6bGQw8Y2a7xJpqc83Afe6edfc/Epzl1/V0o7o6aAuFlXcG8JK7/zjuPMW4+yTCL9/M7HBgorsn5ijW3d8ys8Vm9il3XwQcBbwYd64CbwAHhUeJawkyPh1vpJJyLVimhL8/EG+cTYULTH0POMzd/xF3nkLu/jx5U2hhIRgZ59VBJdwPHAnMN7O9gAwFXZe7Q3cMbyEz+yLwe4JLBnPz7Re7+5z4UpWWVwSSdono5wi+tM4ArxJcNhjLZY2lmNkVwDiCKYxngbNyX8rFmGkWcDjBEeAy4DKCnYMDnyAoXv/m7rE0YyyRbxJQS7CCIMCT7n5OHPmgeEZ3n5H3+uvEXARK/D3eDtwCfI7gctuJ7v5ITz9LRUBEJMX0nYCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiK6T4BSSUzO4zgsrtKYAPwfXd/3MxagGeAGoIWB/XAKHe/NHzf5cB8d5+ft61tCNp07BW+r8ndb6Wbwg6rRyb0RkTZyuhMQFLHzOqAK4Dj3f1wgoZra8OXn3f3I4ALCXovdcVlwGPhtr4IvNbDiNsD/9rDbYh0ic4EJI3+CfhlrudT2A322YIxf6br3S4PdvfvhdvKAr8DMLMbCW7seRc4maDZ4Ch3v9TMTgvfO5/gBqBVBK0LxgKNwNFmNp/gxq/lW/5HFOkaFQFJo3qCO74xs/HANwjuYp2YN+ZQYFF3PyDs8NjP3Q8NG8+dQ+luswMIehU1ACcQLCj+iSS1+pCtl6aDJI3eJCgEuPt/A6ewsRHXPmb2KEFhmAKsI2h5kNOHjVNHndmD4LsFCHoO7cmmferzuz++6O4dwBKCqSCRXqMzAUmjOcA9Zubu3sKm/w5y3wkAYGZtwAgzyx0wfR64pmB7j5vZye5+R9hg8BCCfkjHhK+PJFhWsQX4ePjcPsBfwp8Li8N6gpXMRCKnMwFJnXCO/QrgATN7BPgZwbrRxcauJGgb/juCxoH3FGnOdgVwWDiHvwDYI2z1u9bMfg+MB24m2OnXm9kcYMdOIr4FDDSze8L1g0UiowZyIiIppjMBEZEUUxEQEUkxFQERkRRTERARSTEVARGRFFMREBFJMRUBEZEU+3/zGBUXzgT2lwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEkCAYAAAA1naazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeXxVxfn/33Puno0kJAESNtkXFwRBFhVRtGrrXq/228Wldatabf3+LLVW/Wqrtu7VautSRVzqVbG2dnHHXdkEFRBQRCBASAhkz13Omd8fc+7NzeVmg3uTmzDv1yuvJOfMmfOcbT4zz8w8I6SUaDQajUaTiNHTBmg0Go0mM9ECodFoNJqkaIHQaDQaTVK0QGg0Go0mKVogNBqNRpMULRAajUajSUpaBUIIcaMQ4st0nkOTXhKfoRDiPCFEpJvO3W3n2p8QQhwthJBCiMEpyGujEOK6VNil6T6EEIuEEI90lK7XtSCEEPlCiHuEEKuEEA1CiO1CiBeEEOP2Md9HhBCLUmRmX+ZZoCxTzyWEOMIu/IanxaI0IIS4TgixsRtP+QEwCNjajefU9EJ6nUCgXuwDgOuBycB3gBzgTSFEQbpPLoRwp/scmYyUsklKWdHXzpWMvvqspZQhKeV2KaXV07ZoMhwpZUp+AA/wIFAD7LL/vhX4Mi6NAdwCVAL1wN+Aq4CIvV8A/wKWAK64Y14H3gecbZy7PyCBk9uxzwXcBWwBgsA24G/2vhvt4+N/zrP3SeBnwNP2tT1nbx9r21pv//wTGBV3vvOACDALWA402tc1JcGuucBnQDPwKTDbPucPOnnfTwU+sfPfDSwGDo3bPxJ4Dqi203wKfMfeVwA8CWwCmoC1wNWAiDv+xoRneF70eaX7Ort6LmB4kue4KO74c4AVtg0b7fchO27/IuBR4Gb7/ajs5D0eBbxg79sFvAoclHAtU4D/ArX2+7IYONy+pkSbb2zjfhxt7z8OeMe2ZzXwrYR0Hb2b0XwGdzHfQ1Ctj2ZgHeC37+N1cWlygHuBcjufT4Az4vb7gRAwLW7bj+w8D0123cneibbegbh00+1rabKfydNASeJ7bduz3s7n70AecAbqW6gDngf6xR03GfgPsMO+t0uAExLOvRG4yb4P1UAFcAfgiEtzHOp9q0aVK2/H3xM7zQH2u9SM+kYvs495JC6N076Wr+10q4CLE/IZhnr3mux8rkjMp8373ZlCqJMF1d32TTsVGGffkFpaFy6/sG/qD4HR9v/VtC4Eiu2X6w77/1/baYa2c+4RqBd8RjtpfoESh6OBocBU4Kq4l/op1Ms/0P7x2fsksNO+qSOBMYAP+AZ4A/XhTwHesl84d9yLbKFe0iPte/Iq8BW20KHcJ43AI8AE4FhgGZ0UCNvOEHCN/TKNB/4Hu3Cy91egBPYI2/5TgZPi9v8S9dIfAPzAfj7nJ35IHRTaabnOrp4LcACn2PlOta+vMO7YXah3bwRwFEqoFsTlvwhVKPzZtvOgTtzjAcB2VIXoIFThfB/qnSm200wEGoBngMNQ7/73gBmod+k2YDMt715OBwKxEjjBzucJlDDl22k6825G80kUiI7yLQf+jRKKGajCsRFbIFAVvLfs+3iEfZ8vsu/fsXHX8bD9zPJQ31MdcEUny5l234G497oWJQoH2bZ8Cryb8F43oIT0YFSFpdLOK3qNR6K+n98nPINz7fdjDPBb+/rGJAjELmCefS/PRola/Hd1OnCWncdE1LdRDfSPu5crgI+BacAk264aWgvE4/a1HY96P8+2n9uP4/JZbj+rw+18XrPvT/cIBJCNUq8LE7YvpXXhUg7cnJDmb8QVAva2OfYNvQEIE1cDSXJuB0odFwNGO+nuBd4krnacsP8R4mqbcdsl8GjCth+jPoyiuG0DUAr9o7gXWQKT49JMt7eNtf//nf0yxdcsTqDzAnGonXZ4G/tvRhVe2R3llXCfXkv4kDoSiLRc516e64hk98Q+/yUJ246y0xbY/y9C1YyNuDQd3eMbgY8StglUgRWtgCxAFb5J30/gOmBjJ57N0bYt8TXygfa2b3Xh3YzmkygQ7eX7E1TloSAuzYF2muvi8mkmrsZtb/8r8Pe4/32omm4A1cL4e0fX3sV34GZUZdAdl+YQO81Rcc8tknCf/gSY2MIe9z0s7cCmlcCvE961fySk+S/wTDt5GChR+b79/3G2vfEtv0L72T5i/38ASizHJeR1PbDC/nuunU+8gBXb70OHApGqPoiRKBfTBwnb34v+IYTIA0qBjxLSfJiYmZTyLeBO1EN8REq5MNlJhRAOVE1nDOrlbs+n+hiqNvGlEOLPQogzu+BjXpzw/0RgtZSyKs7mClSzdGL8paBenijl9u8B9u8JwBIppRmXZo/70Q6fAq8AnwshXhRCXCmEGBK3fwrwgZSyIdnBQghDCDFPCLFCCFElhKgHLkE1SbtCuq+zK+faAyFEMeqa7hJC1Ed/UK4CUC6iKMsS3qOO7vFUYEpCvnUod9doO80U4I0O3s+usCL6h5RyO6pQi15/Z9/NruY7AVgjpdwVl+ZzVI02ylTADZQn3I8f0HIvkFI2oWq6ZwAlwAWduOZ4OnoHJqJEOxR3zpW2rfH3oDz+PqEqU9ullJUJ20qi/wghioUQDwghvhBC7LavbyJ7fjMrEv4vj7MPIcQBQogFQogvhRC1qBp9v7h8JgBVUsrYCEIpZTXqOUY5DFUZWZpwv6+l5X5H81kXl09lQj5t4uxMok4goufexzQqoSr4Z6Fe0FFCCCFt6YtL40Y12Q8BjpZSbmkvTynlCiHEAShlnoOqGdwshJgupaztwKRkBWyy6xAJ262EQjG6z0iyrb18kyKlNIUQJ6I+zLnAmcBtQoizpJQvdyK/q4Ffodxvy1EF28+Bb3fWBpu0XudenCuR6L4rUS6QROLfnVbPuhP32EC5cy5Pkm984bm315uMUJJt7d1r2PPd7Gq+nTneQF3z1E7kfYT9Ox9VAFd3kHc8e/O+JdseTrIv2bb4fB9HuaivQfn9m1BekMTKZuL1JubzMlCF6lfYbKd/LyGfztxvgJmolkXi+aBzz63DE+wrX6IucFbC9pnRP6SUNahhdTMS0kxPkt+NqFbBLFTt65fxO4UQWcA/UOp4lJRyU2eMlFLWSylflFL+DKW+41G+R2z7HZ3JB9U8niiEKIqzaYBt86pO5gGqI3CqLYhREu9Pu0jFYinlLVLKo1CdXefbu5cBs4QQ2W0cfhTwXynlo1LKT+zayug20u4L+3ydXSD6YcbOZdegN6NcEF8m+WluL8MO7vFSVA2yPEm+0ZroMmCuEKKt760r715HpOrdTJbvBCFEfly+E1G13ihLUQW+N8m92JRw3F3AxahW3N+EEJ59sC2ZrTPiPQRCiENsW/flHoD6Zh6QUv5DSvkZajDDiK5kIITojyq7bpNSviKlXI1yzZXEJVsNFAshRsUdV4B6jlGW2b+HJrnfX9n7Vtn5jI7LpyghnzZJiUDYLow/A78VQpwihBgrhPgDqgMpnjuBq4QQ3xdCjBZCXIXqXIkpnBBiNqpWe66U8mPgQuAmIcR0e38uqsk/FtVMtYQQA+0fX1s2CiH+n33eiXZL4gJUCyXa9PoaGGfvL+rghX0a1aH1rBBishBiCqoWUY4au99ZHkA1Ox8UQowXQsxB+euhE6ovhJgphPiNEOJwIcRQIcSxqA631XH5G8BLQohZdrP2O3aNGFQz82ghxBwhxBghxG9RHVmpZp+us4t8g/LLniSEKBFCRAuwXwM/s+ccHGi/o6cJIf7SXmaduMf3owr3vwshjhRCDBdqLsbvhBDRCtIfUML7lBDiMCHESCHEWUKIqEh+DQwUQsyw372sfbj+VL2byfKtA54UQhxif49/RdWgo7yJGhCxUAhxuhBihBBiihDiCiHEhQBCCK9tzz+klI+ivu8C1KCWVHE/qgP8cftZH4HqB3pPSvnuPua9Fvi+EOIgIcQklBejq+K+C/WMLrS/uxl2PvH38nWUG+0JIcRUW+AWoPpNVOeoqtD9FXhYCPFDIcQo+9lcIISIVqrfsPN5Uggxzbb5KTufDknlPIh5qGFiC1A++3xUp08896Ae3r2ozqnpKNFoBhBCFKKGXd4rpfwPgJTyBVT/wTP2xz4F1TwdjrrwbXE/Z7djXy3KlfIharjl6cCZUsqoL+5RVE//B6iH9722MrJ9qMejhsu+g6pRNqCGuyVrpreVTzlq1M1MlM/yXlSHJdj3pANqUDXxl1BD9f6Kevg32/lvQ92rOtQIiFWogjnq7rvZtv0l1H0pAP7YWfs7SwqusyvnqkBVMOah3omX7O0LUEMav416P5egWqrlSTNqoaN7XGHvrwIWogqQp1C+5G12ms9QHbjFqPu9AvhfVAUF1HfzHGpETSXKfbFXpOrdTJJvI3ASakj5YtQ1RkcuRtNI1HNeiGohfIG6pm+jOu2xj8lGtR6w+zS+D1wihDhlb+1LsLUCdQ8Go57zy8DnKPfgvnI+qtxcjHpu/7XP0RX7LNQIppGoPq7HUWXjtrg0ElVGNQDvoq7hP6j3K/6buQh1T3+NqrS8gRpltSEun9NQ7/E7dj7/RrmUO0QkuPa7HSHEX4FDpJRTetSQDEEIEXVhHGwXLH2S/eU6NZpUYXtPtqBGjd3XHedMVSd1pxBClKJU8S1U7elk1CSZZB18+wVCiEtRLaGtKL/k3cDHfa3Q3F+uU6NJFXaLKgKsQfVP3IByLwW6y4ZuFQiUKJyFap57UZ3bl0opH+5mOzKJYSiXSHTC1WvYnfJCiGtRQ9aSIqXM6Q4DU0Sb16nRRBFqmGZb3CKlvKXbjOl5slBzGoajXE3LgCNkN4af6XEXk6Zt7D6Zwrb2x4+R1mj6AvGjdpJQbc8F0HQTWiA0Go1Gk5TeGM1Vo9FoNN2AFgiNRqPRJEULhEaj0WiSogVCo9FoNEnp7mGu7eL3+/+KWiFuRyAQOLCDtEOB+agZ2w5gXiAQ+Hf6rdRoNJr9g0xrQTyOWiegM1wHBAKBwKGolcIeSJdRGo1Gsz+SUS2IQCDwjt/vHx6/ze/3j0TFdCpGhbS9MBAIfIGaUZhnJ+uHXoBdo9FoUkqmtSCS8RBwRSAQmIIKcBZtKdwI/MDv929BBZ+6omfM02g0mr5JRguE3+/PQUUAfc7v968A/gIMsnd/D3g8EAgMRkWZXOD3+zP6ejQajaY3kVEupiQYwO5AIDApyb4fY/dXBAKBD/1+vxcoIi78sEaj0Wj2noyucQcCgVrga7/ffxaA3+8Xfr//EHv3JuBYe/t4VPC/yqQZaTQajabLZFQsJr/f/wxqYZUioAIV3vZN4EGUa8kF/C0QCNzk9/snAA8DOagO62sCgcCrPWG3RqPR9EUySiA0Go1GkzlktItJo9FoND1HJnVS66aMRqPRdB3RcZK9I5MEgq1bM2OuW1FREVVVVT1tRrtkuo2Zbh9kvo2Zbh9oG1PBvthXWlqaYmtao11MGo1Go0mKFgiNRqPRJEULhEaj0WiSogVCo9FoNEnJqE7qRKSUNDQ00N1zNZqamohEIt16zs4ihCA7O7unzdBoNPuKKcl+qBnXA1+RfZmbhgu94EjbgKS9IqMFoqGhAY/Hg8vl6mlTMoZgMEhVVRXFxcU9bYpGo9lLHBtMCi5pwLnBRDRB7h3N+BaG2fXnbMwRjp42L0ZGu5iklFocEvB4PNTU1LBixYqeNkWj0ewlRafV4VpjYjSp/40mcK0xKTqtrmcNSyCjBUKTHIfDwYoVK7rd9abRaPYNo9rC93wQAGG13icsiIzNnNYDZLiLSdM2UkoikYhuYWk0mYyUOL+w8L4ewvt6GNcyEyHBygPpBBHX1WllQ+M57p6zNQl9rgXhW7iQkmnTGDR4MCXTpuFbuDBleVdXV/PDH/6QI488krlz5/KTn/yEnTt3tkpzyy23MGzYMO66665W2y3L4sILL4wde84557Bx48bY/gsuuIC5c+dy/PHHc/rpp/P555+3a4tuPWg0GUqzxPNWmH6/bqRkei0lc2vJu60ZQlD/cy+V/86l4sN+yKyEDmmHoPm4zBKIPtWC8C1cSL9rrsFoUo49Z3k5/a65BoCmM87Y5/yFEFx66aXMnDkTgJtvvplbbrmFO++8E4A77riDlStX8sEHH3DFFVfg8Xi47LLLYsefddZZzJ07F8MweOyxx7jmmmsIBAIA3HPPPeTlqSW2X3nlFa6++mpeeeWVfbZZo9GkH6PCwvtGGM/rYTzvhDGawPJB8CgX9Vd6aT7GhTWwpT5+/6QBNNQObp1JLWQfZXL5ioputr5teo1A5F1/Pa7Vq9tN4162DBEKtdpmNDWRf/XVZD39dJvHhSdMoPammzq0oaCgICYOAJMnT+aJJ54A4E9/+hNfffUVCxYswO128/TTT3PFFVfw8MMPc+GFF2IYBscff3zs2ClTpvDII4+0XJ8tDgC1tbUYRp9r3Gk0fQdL4vrcxPN6GO/rYdwrTQAiZQZNfg/Nc10EZzjBl3zYakNl8r6Gtrb3FL1GIDpFgjh0uH0fsCyLJ554Ilbox7cUALxeLw8//HCbxz/22GMcd9xxrbb97//+L2+//TZSSp566qmU26zRaPYe0SjxvKtaCd43wjgqJFJAeIqD2nlemue6iYwzQLQ9lyHcJKhc03uK3V5jaWdq+CXTpuEsL99ju1lWxs7nn0+pPddddx3Z2dmcf/75XT72wQcfZP369Tz33HOttt9xxx0APP/88/z2t79lwYIFKbFVo9HsHY4tLa0EzwcRRBCsXAjOdqlWwjEurP7JW/v1Owx2rHKpn9VOKla52LXBibQyazJce/QagegMdfPmteqDALB8PurmzUvpeW666Sa+/vprHn/88S67gh577DFefPFFnn32WXw+X9I03/3ud/nlL39JdXU1hYWFqTBZo9F0BlPiWm7itVsJrjW262i4QcOPlOsoNM0J7pZC3jKheoOTHaudLYKwytXKXZQ3OMKAiWHGn9JMycQwL/64d3zXfUogoh3RubfdhmPrVszSUurmzUtJB3WU2267jU8//ZQFCxbg8Xi6dOyTTz7Jk08+SSAQoKCgILa9oaGB3bt3U1ZWBsCrr75Kfn5+qzQajSY9iFqJZ5HdSngzjGOXRDogNM1JzW98NB/nwhypCvtQg6DyUyUEFatc7FjtonKNk0izqigaLknRmAgHzAkyYGKYkolhSsaH8eb3zlGHfUogQIlEKgUhnrVr13LfffcxYsQITjnlFACGDh3Ko48+2uGx9fX1zJs3j8GDB3POOecAalb0yy+/TGNjIxdffDFNTU0YhkF+fj6PP/44oh1fpkaj2XscG+xWwuth3B9HEBGw8gXNxyjXUfNsJ3XNdovgX7YYrHKxa6MDpPouvfkWJRPCTPphIyUTwgyYGKb/6AiOToxUzS42k3ZIZxebqb7UfaLPCUQ6GTt2LOVJ+jg6Q05ODlu2bEm6r7i4mJdffnlfTNNoNO0RlrgXR2Ki4NygpjGHxxrUXeSlYpyXjdLHjjVuKp5xseM6J03VLQV4/rAIJRPDTDyzkZKJYQZMjJBbarbXH90u8UNZM3nFOy0QGo2mTyKqLbxvhXG8u42BrzRg1EqkC2oOdLP5Oz5Wixw2bMqi8lEXZlCV9A63pGhsmFHHNzNgYiTmIvLk9U4X0b6SFoHw+/1jgWfjNo0Arg8EAvek43wajUaDlDjXWqov4fUQ7mUmwoJgtoOvCnJY6clleWUeoU8c8An4CkxKJkaYfF5DrL+gcGQEh45eEyMtAhEIBNYCkwD8fr8DKAdeTMe5NBrNfkyzxPVeBBZGyHk3jK9a+fC3OLx8bhXyOblsafCSX2xSckiYwye2iEHOQGuvXUT7C93hYjoW+CoQCHzTDefSaDR9mOYawe4PDIx/muQvDVK6rQm3JQkhWEsOa5w57BjjJetQScmEMEdNrGfcEQ7qg5np4890ukMgzgGeSbbD7/dfBFwEEAgEKCoqarW/KW4+g6YFwzAwDIOioqKMjebqdDr3eJ6ZRqbbmOn2QfpslBJ2b4LylYKtKyH0ToiClQ2MrKljMs0A7DacrC3Np25aNs6TfAw6DE4eDY5YqeaK2eiNZO59zOTnnFaB8Pv9buAU4FfJ9gcCgYeAh+x/ZWJPfqYu+9nTWJaFZVlUVVVlrEBk8siMKJluY6bbB6mx0QxB1brWcwt2rXIwvLaRidRzBHXkE0EC1WVuNs7MgTMduGYJ+hvQnwagAYBdu9NjYzrZF/tKS0tTbE1r0t2COBFYHggEMic8oUaj6TGadgl2rHbFhaBwUbXeiRUWFBDiYFcts3yVDKtvxAmYPkHzbCe7TsiKhbXIrIDY+4ZRUYnz7Esx7rsZqyTzWhHpFojv0YZ7KR0UbTged3DVHttDnolUjXh1n/Ovrq7myiuvZOPGjXg8HoYPH87vf/97+vfvH0tzyy238Je//IUrr7ySX/ziF7HtlmVx8cUX88UXX+DxeCgqKuK2225j+PDhrc5x1113ceedd/LGG28wbty4fbZZo+kJpISaTY7YBLNoGIra8pYiJ6c4wsFD6jnt4DqGbm8ktzwCYRURtdmOiBo6vHVYi75Gzj2PID5YQs7dD1N7a1JHS4+SNoHw+/1ZwHHAxek6RyIh3xRcwfUIWqK3StyEfIelJP90rgcB8Nlnn7F8+fJYyA2NpjcQaYaq9S52rGrtJgrVqfATwpAUjoxQNjXEtHPqGR1uoHRDE9kfhHAsjwtrcYGP5rkuzJHtR0TtCwwaMQMRbCmncp54npwnnkd63Gzb8GEPWtaatAlEIBBoBPp3mLCT5G2/Hlew/fUgkCEgnLAxgiv4Of2/+W6bh4U9E6gd2LPrQQSDQa699lr+9Kc/cdZZZ3Voi0bTEzRWGzEh2LHKxc61Tiq+GIQ0VYHuyrIomRBh4hlN9ozjMAM9QXLeU0tuul/eM6xFcLYTmb9/rX9S8eE/yLvpHryvLMJoasbyeWk+YQ6111/V06a1om/NpBZuTEcJDnMHAolEYDqKQaTea5nq9SDuuOMOzjzzTIYOHZpyWzWariIt2P1NnIvIbhnUb28JP5Ez0GTIoXDAsfVqxvGEMAXDTYQpcS9RYS08fwnj+qolrEX9xR6Cc92EJjvA2bdbCe1hDShG5mYjmoNIrwfRHMTKzc64foheIxCdqeEDGOEKBnw1A2QQhIeqEf/Fcpak3J5UrgexdOlSVqxYwbXXXptqMzWaDgk3QdVaVysxqFzjJNRgu4gckv6jIwybGaR4YpgBE8KUTIyQ1d+yR+DUxcJaeG8P41kUUWEt3BCc6aThfA/BY12YQzNrtbSexqiqpuFH38Vz+QUE7/8rjh2ZN9Kq1whEZ7FcA2jodzbZuxfQ0O/stIhDqteD+Oijj/jqq6+YPn06ANu2beP73/8+d911F7Nnz065/Zr9l4aqPRexqf6yZREbd45FycQwB/obKZmo1jAoGhPG6U3IyA5rYTxeTf+X6nAvjSAsMIsFTSe5CM51ETzShczZf1sJHbHrEbVAWFFREbW3pHbNmlTR5wQCoL7oKlzBddQXp96fl471IC6//HIuv/zy2P+HH3448+fP16OYNHuNZcKurx2xoaTRoaX1FXGL2JRFKJkQYexJzbH+gn5DTERbdZ6gxPNhJLbCmnOzch1ZBzqov9JL81wX4YMdYGhR6Cv0SYGwXAPYOfyFlOebrvUgNJp9IdSo1jmuXN3iJqpc4yTcZC9i41QuouFHBlVfgd1f4CvoOEKpscPC86a9mM7bYYxGsLwQOtJF/eVess4aQJUnyew0TZ+gTwpEukjXehCJfPzxx3t1Dk3fJ36dYyUGTqo3OGOL2HjylIvo4P9pjAWl6z86grOzjV0pcX3esg6ze4W95GapoOm7am5CcKYTfOp8WUVOyDzXuSZFaIHQaDIQy4SKNbD2fR87VjljLYPGqhYXUb8har2C8ac1UTJB9RfkDe76IjaiSeJ+Nxxbh9mxXSIFhA91UHuNch1FJjj6/NwEzZ5ogdBoephQg2hZ8H514jrHbgyXpHhsmJHHBimZ0OIi8vbb+0VsHOWW3UoI4fkggmgGKweCs+25Cce4sIr2r7kJmj3RAqHRdBNSQv12o9Vs42TrHA+YqNY5HjndS9bQavqP6tw6x+1iSlwrTLyvqZaCa43tOhpu0PCD/SOsRabx/lo3P3ygP8GwwOMaxIKf7mTW2FDHB3YjWiA0mjRgRWDnl86YCET7C1qtczw8QskEtc5xtL8gt7RlEZuiIjdVVXsf0VjUSTxv2x3Mb4Zx7IwLa/EbH8G5LiL7QViLTOT9tW7OfbCQYFjd+2BYcO6Dhcy/tDqjREILhGa/I9U1t2CdoHKNi4q4EBSVa+PWOfYoF9HobzXHFrwvHh/Gk5v6dY4dG1taCe6PI4jw/hnWwrLAkuqnKQSNQYElwYxut0Rsv2mBlAn749PEtgksC0x7m5Qilt6UIK3WecTvTzzn1U/mEzFbC3NTyOCHD/Rnw73beuiu7YkWCM1+xd7U3KIFhGlBTblB5Wq3EoHVTqrWuKjb3PIZeQosCsaGGXNOI3ljIvQbGyZ7qAlGS2GzW0L1LifWzvYLm5w8wa5dnnYLG8KSkjUhRixt5oDlQfqXqxZHZZmT9d/KYe0kH5tGuokIgSUF5kfRwgvMxAIwWsBJFWrDiksjYwVf62NcbgeNTQVt7rdsu6VsK7+Wgjd6nClF6/1xBWtsf9y9Srx/lkzWIhqU6lcpLUTfy0yhTwpEo1HB2wU/ZfauB8myUjeTOp3hvg8//HA8Hk9s8t2vf/1rjj766JTZrlG8emJ/LgwmfoQGr/yjP+edKmOFjYxAv1rov1tQXAtFNVBcA964WJC7sqGqH1SOh6o8qOwHDV4DhAd2e2Ax6mef2DPeZWEowgkV9Xynoo4TdtRTELYICcFbRVm8fFAR/xqQy9fZdqfFZ/ZPGwghcQgwDDW/zTCk+h3bJnHY+4QAR8J+l9NAWk4ctqfKiEuj0qs8DPtvl0O2uT/eBoe93zDAISSGnb8jzq5oeod9TEt+rffn5mTR3NQQZ5/a3zp9y7W3XK9Mul/ZpKLUGq1san3/HHvY1LJ/zjTSTeIAACAASURBVM0lhCJ7ioHHlfpW5b7QJwViZc49VLg/ZmXOPcyovSVl+aY73PdDDz2kZ0+ngd2NgpeX+1i42Mfhe4iDIjsoODcSwig3MLYasM1ARD9gl0QMtjAmmjiGmDiHWjiHRijIgtEdFDbRAkfYBZYjSWHTqnCK21ZY0I/a2t0YSPI2RSh+P0TxeyHyPw0jLAgVCnad5GbtUW7qZrronys4X4T5iVHduQJO7Hv3Q6av1gZQVOSlqqqhp81oxZOX7eTcBwtpCrW4+3xui/mXVvegVXvSawTi47zrqXZ1EO4bMAlR5V4OQrI2ewE7XZ/j6GANqsLwBA6v7dlw35rU0hyGNz/3snCxjzdWeQlFBKMGJIaCb41roYesIpMBB4YpOVkFpCuZGKbwgAhGd38pQUnx6nqCC2tUWItNKqxF6EAH9T+zw1ocosJa5AK5SCCzap+atpk1NsT8S6vj+sJkxnVQQy8SiM7S4IifrSxpcGwhzxyR8vOkOtw3EIvHNHXqVObNm0e/fv1SbHXfxrLg4y/dLFzi41/LfdQ0GZTkmZx7VANnTmti/MAwdz7U9hq+l32ynZwSqxstbo1RaYe1eC0a1mI3Ti8Ej3BRf5mX5mNcWKV9v4N5f2HW2BAb7t2W0a2wXiMQnanhNxoVvDBgBgi7JiUkIUcNs6seSGlfBKQ23DfAwoULKSsrIxgMcsMNN3Dddddx3333pdLkPsvarU4WLvaxcImPrbucZHksTprUzBlTm5g1NojDgC/+6eWvfyhoN59uFwcpca6yRx29Ecb9iZqbYA4SNJ3pxn1GEVUHNSJ9mdVxqdl/6DUC0RlW5tyDTGhmS6yU90WkOtw3EFtm1OPxcO655+6V8OxPbN9t8PelPhYuzmLVFhcOQzJ7fJBfn1bH8Qc3k+VR78HGd9y8fWse2z91UzS2fRdTd9CVsBZFRdnIqqaeNlmzH9OnBGKHexmWaO3Ds0SIHe6lKTtHOsJ9NzY2EolEyMvLQ0rJSy+9xMSJE1Nmc1+hrknwn5VeFi7O4r21bqQUHDo8xM1n1XDKYU0U5ba0ALZ/6mLRLXl8866HvLIIJ929i4lnNvHAlAE0VO65cE12sZk2u41yC+8b9oS198MqrEV2XFiLY3VYC01mkjaB8Pv9+cAjwIGo3rMLAoFAWlfjPrXq1XRmn7Zw35WVlVx44YVYloVpmowePZpbbkldi6c3Ezbh7dUeFi7x8cpKL81hg2FFEa46sZ7TpzYyckDrgr16g4N3/5DHF//04SswOeaGGg79UUNswZvLV1TE0qbN92tJXJ+YqpXwehjXajusxTCDhu/bYS2m67AWmswnnS2Ie4H/BgKB7/r9fjeQlcZzdQvpCvc9bNgwXn01veLWm5ASPtnoYuFiHy8t81Fd76Ag2+TsGU2cMa2RKQeE9xieWV9h8P7duXz6TBYOl2TGlXUcfkk9nrzuGdkj6iSed+xWwhtxYS2mOqm5zg5rMUqHtdD0LtIiEH6/Pw84CjgPIBAIhIDMGr+lyTg27HDw4uIsFi7xsbHSidclOf7gZs6Y1sjs8UHcSd7W5hrB4gdzWPpINmZYMOkHjcy4sq5bOpwdG1taCe6P4sJazFFLbjbPdiILtOtI03tJVwtiBFAJPOb3+w8BlgFXBgKBVrNV/H7/RcBFAIFAgKKiolaZNDXpDrpkGIaBYRgUFRXhcrl62pykOJ3OPZ5nMipr4bkPDJ5512DxegMhJEcfKPn1WRFOm2aRl+UAe6R/POFmeP8Bg7dud9BYLZjkN/nWDRGKRrmAwjbPN80Z4nBpcK3p6LSNMSIS8WEzxr/qMf7dgFirOr3lODfWFQVY385GTvfidAqcQHbnc05Kl+3rAbSN+04m25cugXACk4ErAoHAx36//15gHvCb+ESBQOAh4CH7X5noD45E9j6SZV/Gsiwsy6KqqipjBaI9/35TSPDqpx5eWJzFotUeTEswYXCY35xex6lTmxiUb08Ka4SqxtbHWhH4/Hkf792RR902Bwcc3czsX9Uy4ED1rnTUpbCyNI810mS+YXKuZXJJdQ0DrLbdUGKXhXdRBM/rIbxvRTBqJNIFwRlOgt/30XysC3N4tNO7AXanbsZuJo+Pj6Jt3Hf2xb7S0rbn9aSCdAnEFmBLIBCIrp35PEogNPsppgXvr3OzcHEW//7ES0PQYFC+ySVz6zl9ahPjy9qvDEgJ61/x8s5tuexc72LQpBDfvncXw2Z13XMZsvsBHjUs/jogh0khk281hxloSbIsycB1EYb9J0TpayHyF0cQFkSKBI0nuAjNdRE8yoXM0X0Jmr5PWgQiEAhs9/v9m/1+/9hAILAWOBboOE6Gpk8hJaza4uSFxVm8tNRHRY2DXK/FKVOaOGNaE9NHhejMNJLNH7lZdEseW5e5KRwR4bSHqhlzUnOH/b0RYK3TYJnbwXK3g+UuVdM3TMnP765l3m213PqrPB64NJf8xfCdl5uZ/XITIzcosfpkkouXr+3Hy9/xsWSqG2kI3FKSJcEnlZhkSciK39bqb3ufJfHFpcuKS5clJT5L/fYAWnY0mUQ6RzFdATxlj2DaAOiZX/sJW3Y6+Ou7BgsWFbNumwuXQ3Lsgc2cPrWJuQc14+2kV2zHaidv35rHhje95Aw0+dYfdnPw2Y1txkWqNERMCJa5Hax0OWg0VJHb37SYHDYRGy0C/krGrIuQ3Sj5/S93c/s1uzEsML1QfaSLFT/N5uvj3ewsc9JPCE4XFt+qC9IkoFEIGg1Bo/13dNsuQ1AuhNpvb2s2ulbcGwnikmuEcRdlKSGJE6NW4hMvOBZ42xIhCXvO/tBo2idtAhEIBFYAh6Urf01msbtR8C87YupHX6oJhNNGBrnte7v5zuQmCrI7P9x09yYH792ey6oXfXjyJLOvrWXKBQ24fC15hIBVLoPlbmdMEDY7VXPEKSUTwxZnN4aZEo4wOWQy1JQIwDmrhsKdFk57kJPTVKt9Wrmw45N8pE9QApQANO9bH5gFLaIS+63+bhKCRsP+3Wp/y7aI18FuCY0CdjoNe3tLOquLQ2a9UuKz9hQXXxutnj22JWn1OJCEoINwmJreSp+aSZ1IhVHNPTnPscy9ller7trn/NK5HkRzczM33ngj7777Ll6vlylTpvCHP/xhn21OJ8EwvLlKRUx9/XMVMXXkgDDXnFzLBd/ykmvs7FJ+DVUGH/4xh0+eyMZwwOGX1jP9sno8+ZKtDsEyl1O1ENwOPnc5CNoF5CDTYnLI5LyGEFPCJgeGTHxtnGPjeCcl77TutxASQgc5Ux7zyACyJWTLqLB1bU5GUVE2VTtrku6TKJGMtWSMPUWotRjtuS0qXlWG0bLPFq1gp8UnDKV5OGWLG6210LSIS1I3m6VEqD0XnFeqe6npfvqkQESF4dnst5BYhERqRkOlcz2I3/3ud3g8Ht577z2EEFRWVqbE5lRjWbBkg5sXFvt4eZmKmFqcZ/IjO2LqQUPUJDYVg79zeQbrBUv+ksOSv2QTbhZMOKeR3GsaWT1M8JTby3K3g+0OVUR4peQgWwwmh0wmh0xK2xmFlMjo7zqxloUw4gYbWdnQeE7vqgMLwAN4JBREl2tLIRGSt36aYq0etU3k5FDZ2NDK3RYVmkYhqDNghzASWkcgu9j6US2f5H0+vgQxSnTBDRQWEY9DHWO1FiGfhJ4YB3h8UTZTQhGuqg+RmQNcFb1GIK7Pe5TVro3tpgkRZoujkh2O3YAKghblu/1/0+ZxE8LDuan2xx3akK71IBoaGnj++edZunQpwv5wiouLO7SnO1m3TUVMfXGJjy3VKmLqCYc0c+a0Jo4YG8S5Fw7uSBBWPJnNe/fmENzpIHxqiOU3Bbn/IDCFagMMj1jMDCo30eSwyfiwtU/ujObj3PS7rolWtXmHoPm43iUQ6cYJ5ErI7aD1U5TloKq+ayPJJNAMCa2eJC44gd3yab0t3i233TBiYhTdH95DfCLQv+1ZKa7EQQZWchdcK4GyBx605YKLipGX5AMPVrkdrHcZPJvt5lwrwiWGaHe4dU/RawSiM6xzbaFONHbLUJBUrgexceNGCgoKuOuuu/jggw/Izs7mmmuuYdq0aem7gE5QURONmOrj883uWMTUeafW8a24iKldoU7AcoeDpS9l0fi7LNzfONg8J8IHtzVSf5jJpLDJT+vNmCAUpfijkXmC7WvygcwfH99XEYAPVYCqKY2pfcZhaCU4nsJ8ttXUtO+Ci2v1RPftNgRb44Qn6srrCkYS95nPFt3ocOtHDIv5A3I4uyHEVfWhjBKKXiMQnanh7zB22a6lN7ESXEvP77w5pfakcj0I0zT55ptvOPDAA/nNb37D8uXLOe+883j//ffJzc3tILfUUt8s+M8K1a/w3loPlhRMGhbiprNqOGVKE8V5nQ9hYQHrnUbLyCKXg+BrLmZc66H4UwfNh5g03FvH9KOauSxsMma7pUfaaPYZF9BPQj+pVtkrkgaDQ6mJ1msBzW20emJutiStnkQXXKs8BQQRLMh2s87l4IWdjclP3gP0GoHoDCVWAbfUXsRV9We1KRSpINXrQQwePBin08lpp50GKNdVYWEhGzZs4JBDDkmp7ckIm/DOGg8LF/v4rx0xdWj/CD87oZ7TpzUyakDnPq5qQ7DcpTqRP3OGWTIwlzq7xjXqA8HsX7rJfs+FZ3iEI/60iymnNCEMQEdU0fQSDIi1BBR7V9svK82L/e2WYCBjLYhMok8JRJREoVjq/iJleadjPYjCwkJmzpzJO++8w+zZs/nqq6+oqqqKjXBKB1LCim/siKlLfeysd5CfbeGfriKmHjZiz4ip8YSBL1wGy2xBWO528LXdEWFIyUESTmsKc9BnYN2Uxdb/eMkqMpn1u90c8j+NOLTLX7Of45YSAzjXcnBJZQ0lGeRaiiKkzBij5NatW1ttqKur63YXS3usXbuWY445hhEjRuD1qgUGurIexLhx4xg8eHDsmqLrQQB88803XH311ezatQun08kvf/lLjjnmmKR5ff3113z22WeceOKJXY7F9PUOBy8u8bFwSRZf73DicbZETD16QvKIqQAV9iS0Zba7aKXLEZsIVmxaTLH7DCaHTA4Om/RrLOIf14X5PJCFK0sy7ZJ6pl7UgLsL8yHSTab3QWS6faBt3FuOL8rmMHsU04TC/vsaiyltva59sgWRLtK1HgSoNSGef/75vTWtXXbWGfxzuZcXFmex/Gs3QkhmjglxxbfqOHFSM3m+1oV2M/B5XMtguctBuT0JzSUlB4ZNvt8YionCYHsSGkBTteCjP+Wx/DEXUrqYckEDM35WT1b/bl7vuZOkeq6MRtMZXq1KXVDHdKIFoo8SjZi60I6YGrEE48vCXHd6Dace1kRpgSqwJbDZIWKzkZe7HaxyOWIjLMoiFlPCJhfa8w4mhk28Sc4XbhIsfSSbjx/IIVgnmPIDi6mXV9FvcPqW8twXKoxq/s8xn/kD/pPSuTIaTV9CC0QfwrTgg2jE1BVe6psNBuabXHSsipg6YXCERgErXA4Wut2xDuXK6CQ0SzIpbPKT+hCTwyaHhkwGduAXNcPw6TNZfHBPLvUVDkYd18xR82oZf0Q+VVWZIQ4WFlVGDVsdVaxybiSQ9SafuNcDYIrMbNloNJmAFoheyJZqB3e+5ODnrw9k/mV1FGRbLFycxd+X+NhuR0z9zqFNnD6tieJxEVZ6HTzhdrLc7WGN04jF8DkgYnJUMMKUkMmUkMnYiNXpWaXSgi9e9vLuH/LY9bWTsqlBTv3zLgZP6/5RGI2ima2OKsodVZQ7Ku3f6u+tjiq2Oqo61UI4tvgqxkSGMCY8hDGRIYyNDGF4ZBBOPfhWs5+iBaIXEh1XEAwLzrm3PxKByyE5YmIz/plBxGERPs1xcrHbw25DOYRyLcmhIZOfNStX0aEhk8K9HKCw8R03b9+ax/ZP3RSNDXPmYzsZeVwwLcstm5jsMHbHCvxtscK/RQx2OepaHWNIg4FmIWVmEZNCoznJnE6ZWUypWYRPuvm77z3+nvXuHkOgB5slrHR9yT+9HyCFujcu6WRkpDQmHGMjQxkTGayFQ7NfoAWilyMRCIdkyMN1vFVo8BZOhHQwNmJxUlM4NrJodMTa54Bn2z91seiWPL5510NeWYST7t7FxDObMPahnKwXTXG1/pba/1b7722OnUREa1dVnpUVK/Anh8dQZhZTZhbFfg8wC9stvI8MHcIv6/6HPxf/k/nGv2NCMb/6WkC1SL50lrPWuYn1zi2sdW1mhetL/uF7P5aHWzoZGSljTHgIoyODlXCEhzDcHKiFQ9Nn0ALRB5CmYESWyXdrVetgUtgkN4WjSas3OHj3D3l88U8fvgKTY26o4dAfNeBM1lsdRwSTCkd1QuFfyVbHzpj7p8ZoPZrDKR0MMvtTahYxNTSO0riCv8wsotQsIk/u62rPaq7MH81fcEnlyXvMlcmSXg4Oj+Tg8MhWxzSKZtY7t7DOuZl1zs2sdW1muXsdL2W9F0sTLxxRN9WY8FCGmQO0cGh6HX1KIOIjJKYjnkm6wn1v3ryZCy64IJa2traW+vp6Vq1a1Sm7hEsyvzr105HrKwzevzuXlU9n4XRLZlxZx+GX1OPJk0gkNaIxoeBvcf1sd1VTPqgKK6ETON/KocwsZkikhOnBCbGWQFQESqx8HN1YkEYnVXaGLOnlkPAoDgmParU9KhxrnZtZ59rMWuemPYTDI12MiJQyNjw0Jhyjw0MYbg7o1uvVaLpCnxKI+AiJ6Qh8la5w30OGDOG1116Lpbv++usxzc6NAHK4JQ9eVp2yawRorhF8+GAWyx7JwQoLcs9fT/Cad3i59Bsesjt9yx1V1ButRcktnZSaRQwyi5htHUr/xtxW7p9Ssz/Zsq2VGnovrYQj7pY0iKaYcKx3bWGtcxNL3V/w96x3Y2k80sXISBljw0MYHWtxDGGYOaAHrkSjaU2vEYjr8zysdnVc04qO31+Q7WZBtpsSUzLYbD9E9ISwyU21wQ7zTle471b2h0K8+OKLPP30020bYncGe1ySJy7dyazRXQ23LNkl6pWf39ni/tka3oX8y6GU3v5tPNV5rP3eK3x080PUjFSTA/ubeZSZxYyIlHJE8OA9fP9FVj8Mu6ejqKiIqrrMmr3a3WRLH5PCo5kUHt1KOOpt4VhntzjWOTez2L2GF+OEwyvdjJXDGJE/0G5xqD6OoWaJbnFouo20CYTf798I1AEmEAkEAt26/Gh0QZIKBzQaBgeGUzvePZXhvuN59dVXGThwIAcddFCbx5YWmlx9qsmUb6/h/oIX+b+EWcDNhNjm2Jng+onvAK6iyWgRRBFxcPD8k5l24zX4thQRnLsOxw0vcfxEOM/8CWUVqvbvo2uxpzTJyZE+Dg2P5tA2hWMTa12b+TqrIqlwRFscYyJDYi0PLRyadJDuFsScQCCQkmpkZ2r4rSMkqkBYUVdTqgNhpTLcdzzPPvss55xzTrvHW0T4p/EeV5Y8iem2MIXFhQV/iLl+Kh279zimxMynzCxmXGQoxwYnK5dPpAjXy+PY8LuR7F7vYdChIWbfVcWwWTnAsTrKajeTKBxFbhVDqF40xTrGoy2Oj9yrWZj1TuxYr3QzKlzGGHsYrhKQoQw1S2KtOo2mq/QaF1NnSbcwQOrDfUfZvn07H374Iffee2+7eWx3VLNObCYsIrFV89Y5N1NmFjMhPDxh5E8xg8z+eBKmwG3+yM2iW/LYusxN4YgIpz1UzZiTmtMyl0Gzb+RIH5PDY5gcHtNKtOtEI+ucm+2huJtY59zMh57PWZj1diyN13IzKqKEY2xsLscQhmjh0HSCdAqEBF71+/0S+EsgEHgojecCYGLIjEVITFfo3HSE+44SCAQ49thjKSws7LJdb1fe16l0O1Y7efvWPDa86SVnoMm3/rCbg89uxOhzVYW+T67MYkp4LFPCY5MKhxpRtblN4RgdGdxqRJUWDk0iaQv37ff7SwOBwFa/318CvAZcEQgE3klIcxFwEUAgEJgSCrXubN28efMeteyeJJ3hvgGOOOIIbr75ZubMmdNuXu9u+pjrP7ub9eeGkHbvezD0TrvHVH8Nr9zk4JNnDLz9YM7/M5n1Uwt3Voem7xVOp5NIJLMD4GW6jam2r4Z61oiNrBYb7d9fs0ZspFxUxtJkSS/j5DAmyOGMl8MZLw9gghzOMAYmFY5Mv4eQ+Tbui31utxvSGO67W9aD8Pv9NwL1gUDgjnaSZfx6EJnCyk3vcttnt/PhjwSW28AUgk1bn8GRpBO5ocrgwz/m8MkT2RgOmHJBPdMvq8ebn97nnokx+BPJdBu7y74a0aA6x12bYi2Oda7NbHe0DJ/2WZ5WLY5o2JFDCsZRXZXaYdappi8/5165HoTf788GjEAgUGf/fTxwUzrOtT9iIBlFDdPFJt5mEJvI5ZmBBzIoNIuy5jmUBY/BXTOUJX/JYclfsgk3Cw4+p5GZV9WRV6qjl2pa009mc1h4LIeFx7baXiMa4lxVKuzIe55PeT5rUSxNtvQxqqhU9XHEhR0pM4u0q6oPkC7P8wDgRb/fHz3H04FA4L9pOtd+Sy5hTmMrg5qPJMecTLnnLTb53qbpiW00/vYGzMpchp68iWP+VzBglB4Cqeka/WQ2U8PjmBoe12r7blHPetdm1jm3sCm3kpVyHe94VvBc1luxNFmWlzF2iyM+7EiZWYxIX4VXk2LSIhCBQGADcEg68ta0xhJhGo0K5lYtYNWLXt65PYu6zV5yZi/H8/tTCR7+Pq9aXgaFZlLWfAxlwTnkmcN72mxNLyZf5jA1NJ6pofEUZRVRtVO5R6LCsTYuVtXbnhUE4oQj2/K2uKrsobhjIoO1cGQoeuxKLyTHHMJU82pO3HYiTqeLDW96eOzWPCrXuCiZGOaEp3ZywOyBmMZ8tu/8gC2etyj3vsWW/DcByI0MZ7AtFgODM3CSOQMBNL2XeOGIZ5eoi4UaUS6rLSzyfLKHcIyJDLFbHINjw3JLzSItHD2IFoheSM1mB4vudLDqp4Mw7GFM+cMinPynXYw/pQlhu36d0sfg4LEMDh4LtVDr2EC5dxFbPG+xLvtp1uT8FYf0MjA4g7Lg0ZQ1zyHPHKE/SE1KKZC5TAuNZ1oS4YhO/Iu2ON70LOfZrDdjaXIsX6zFEZ3DMSY8lFKrv35PuwEtEH2EnyzagaO9gFNAnjmCvIYRjG+4gAhNbPd8RLnduljsvQH63UBuZBhlwTmUNc9hYGgmLpmmcbCa/Z4CmcvhoQkcHprQanu1qItzVW1qVzgSo+Nq4UgtWiD6CB2JQyJOfAwOzmFwcA7UQp3jG9sV9SbrfX/ji+zHMaSHgcHDlWAEj6FfZKT++DRpp7At4TBqVUsjLuTI696l/M3xRixNrpVlC0dLdNzpHIIHod/dvaDLAuH3+0UgEEj/5Im95P21bn74QH+CYYHHJVnw053MGpuadZLTtR4EwGuvvcbtt9+OlBIpJb/4xS846aSTUmJ3Z8g1hzG+8TzGN55HhGZ2eBazxfMm5Z63WNLv/1jC/5ETGWK7oo5hUGgWrhQs3KPRdJZCK4/poYlMD01stb3aqG01f2OtczOvepfwTJxw5A3MYnRC/8aY8BAGWoVaONphb1oQdwE/T7UhqeD9tW7OfbCQYFg98GBYcO6Dhcy/tDolIpGu9SCklFx55ZUsXLiQcePGsXr1ak477TROOOGELsd6SgVOvJQGj6I0eBRwI3WOzcoV5XmLr3wvsDZ7AYZ0MSA0LTYyKj8yRn9omh6h0MpjRmgiMxKEY6dRwzrnFsrzq1keXMM652ZeSRQOSwnH2LihuKO1cMToskAEAoEeEYfrn8tj9RZXu2k++tKNlK0falPI4Ow/9mf6qLYFYsLgMDedVduhDelcD0IIQV1dHaBWlCspKekRcUhGrjmEcY0/YlzjjzAJssO9hC1eJRhL+93MUm4mO1Jqu6LmMCh4BFDU02Zr9nP6W/2YEepHkVVEVc2Rse07jZq4obhqAuB/vYt52vF6LE0/KzvWx6Em/6nO8QFWwX4lHB0KhN/vfzQQCPzY/lsADwcCgZ+k3bK9IFEcOtq+L6RyPQghBH/+8585//zzycrKoqGhgfnz53faluzizq0+lwoceBgUOoJBoSOYym+od5TbrYtFfO17iXXZTyGkk8FyFiU5R1DWfDQFkfH71UelyWz6W/2YGerHzNCBrbZXGbtbuarWOTfzb+9H7HLUxdLEC0f8yKqSPiocnWlBjIj+EQgEpN/vH9le4nTRmRr+iCsHxdxL8Xhckud/vjOl9qRyPYhIJML999/PY489xtSpU1myZAmXXnopixYtIjt7Tz9/vyEmR19tcuKJ23C52m9VpZscs4yxjT9gbOMPMAmxw72Ucs9bbM9+h2V5v2NZ3u/IMgfaIUDmUBo8ErfM6zhjjaabKbLyKQrlMyvUsliXRFJl1LQairvOuZl/+T7kKaNlmeB8K4fR4cGx1f9Ghwd3STgqjGruyXmOZQmLf/U0nRGIKr/f/xPgA2AGkNqSNoUs+OlOzn2wkKZQi2vG57aYf2lqg4mlej2IVatWUVFRwdSpUwGYOnUqWVlZrF+/nkmTJqXU9nTiwM2g0EwGhWZS5Lmbb6o/o9y7iHLPm2z0vcz67GcQ0kFJ6LDYUNrCyMQ+WfPS9A0EgmIrn+I2hEONqFJBDtc7tyQVjmiokfgWR7GVj0BQYVTzf475zB/wHyQWIZFZUWc7IxDnokJyXwasBX6UVov2gVljQ8y/tLrVKKZUdVBHScd6EIMGDWLbtm18+eWXjBo1ivXr11NZWcmwYcNSZndPkG0NYkzj9xjT+D0swlS6l6uRUd63WJ53G8vzbsNnlsQm6ZUGj8Ij83vabI2mQ+KF44gE4aiMuqriJgG+7PuA3UZ9LF2elYXXcrPToTwjpsjMIJqdEYggsB21tvSDwKHAknQatS/MGhtiw73bHBkucQAAIABJREFU0pL32rVrue+++xgxYgSnnHIK0LX1IObNm8fgwYNjS4pG14MoKSnh1ltv5eKLL0bYS7rdddddSRcV6q0YuBgQOpwBocOZUvcrGo0Kyj2LKPe+xSbvK3yZFUBIg+LQFCUYwWPoHz4QoSOCanoRAkGJVUBJqIAjQwfHtkskO4xdrHNuYZ1rM/fnvMAOx+40BupODZ0RiCeBt4HvBQKB+/x+/63A3PSalZmMHTuW8vLyvTo2JyeHLVu2tLn/jDPO4Iwzzthb03odWdYARjedzeims7GIUOn+hHLPm5R7FvFJ3u18wu14zSJbLOZQ2nwUXtn1lfY0mkxAIBhgFTIgVMiRoYM5uWkm9+Q8x7PZb2JloGspSmcEojgQCPzZb8fu1mhSjYGTAaGpDAhNZXLdL2kyKlXrwvMWW7yv81XW8whpUBSeRFnzMQwOzqF/+GDdutD0WkqsAm6pvYir6s/iz8X/ZL7x74wUis4IxA6/33824PP7/acD6fHfaDQ2PquYUU1nMarpLCxMqlwrKLfnXazIvZMVeXfgMQtjrYuy4NF4Ld260PQ+SqwC/mj+gksqT+aenOdY6v6ip01qRWcE4gLgJ8ByYDBwYVot0mjiMHBQEp5CSXgKh9b9L83GTso9b8fmXmzIWghSqNaF3dldFJ6EgV4gSdN7iLYoMo3OCMS4QCBwv9/vLwHOA4YDmSVzmv0Gr9WfkU1nMLLpDCxMdro+tTu732Rlzj2szL0bj1lAaXC23cI4Gp9V3NNmazS9ks4IxJ3Asag1pd8GHkPNh9BoehQDB8XhQykOH8qk+p/TLKrZ6n3H7ux+m6+z/g5A/9DBsTAgxaFDMXDSaFTwN+fZzDTuI8sq6eEr0Wgyk84IRJbf7/cAnkAg8Izf778k3UZpNHuDVxYyouk0RjSdhsRip+tzJRbet/gs5z4+zb0Xt5VPafBImsVOtosPWZlzNzNqb+1p0zWajKSzw1xfAm7w+/1e4OvOZu73+x3AUqA8EAh8Z+9M7Dz3TxpAQ+WevufsYpPLV1Tsc/7pDPf9+uuvc/vttxOJRMjPz+fuu+9m6NChSe1whDfj2HIng774KW6nJOSZSNWIV/f5+voSAoOi8MEUhQ/mkPqrCIpdbPW8q5Ze9bxBs0MFBFib/QQNjnIGhmZSEppCYfggnHh72HqNJjPoUCACgcCfgD/FbTqvC/lfCawBuiX4TjJxaG97V0lXuO/du3dz1VVX8dJLLzFy5EheeOEFfvWrX/HUU091aJPETch3WEqury/jkQUc0HwKBzSfwod581iX/QxSRACD7Z6P2eJTIaAN6aZ/+ECKQ1MoCU2hODSFbKu0Z43XaHqItK0o5/f7BwPfBn4H/KKD5B3y+vV57Fi994Hpnv5u/zb3lUwIM/emngv3vXHjRoqLixk5UsVBPOaYY/jZz35GdXU1hYUdDN8UBvXFV3Vou0bRaFTwZXbAFgdAWEginLzjNeqd31DpXsYO11K+yH6C1f+/vTuPj7o6Fz/++c6+ZIUkQCCIgCiKyuZSq7hU3EoLxfZ43Vrtvdf2trXFVqh4e7vYqtR6bW1vq9dawa3q0SKWitvVn1pr64KKKLhTgaAmkIUss8/398d3MplJJiEkmcw3yfN+vXiRzPrMQPLMec45zymyuvEG49VUxualksZ8xsQOw8l+HuEnxDDUY4JQSpVrrRsH8Ni/AlYAxb08xyVYfZ7QWlNRkX2GQCgUGsDT59dgtvueOnUqdXV1vPbaa8yePZsHH3wQgNra2h4SROb+fJPK0J0kJlwCvgMH9JoGk8vl6vbvaQdPOH8CdD0QMcn2sQ+wMPFr4EIAErEodcYmdhl/Z5fxArt8L/BP/3oAXKaPceZcqs1jqU4eS7V5DEHGD3qsdn0PM0mMA2fn+HobQVyrlCoH3gUeB57XWvdpm59SahFQp7XeqJQ6qafbaa1vAW5JfWvu3r076/p4vPPp+vIJ/+cTey4FnPeAfdt9l5SUcNNNN/HjH/+YSCTCySefTGlpKS5X7wM80/AQDhyPb+eNOHb+kkjRqbSVX0wkeAIYhd1lXFFRQdd/TzvYXvE3Es7s5o0JI8r2xHPd4nVzIAdwIAdwHgBtjl3WCMOzkXrPRja6/4eXnFZr5qJ4DVXR+VRG51EZm8eY2EwcDKwVu13fw0wS48ANJL7q6vyWPw3T7P14aaXUQcBC4Disj64vAGu11j02Fkr1a7oQiAM+rDmItVrrC3p5KnPXrl1ZF7S0tFBc3OMApJveEsT3a3f1eN3+uuqqq9i6dStr1qzZ746uq1ev5p577uG+++7rsRlffX09xxxzDG+88QaBQKDb9R++9wpvPP49vvi5YwnVXIsjVkuw8S4CTX/EmdhN3DOVtvKLaC/9EqazMGcv2P2HEgYeY5wwDe7N6YRR59lIyGkthnAl/YyNzU7PY1TF5uFL9lzmzEd8Q0FiHLhBSBB5a/nXl0nqd7FGEb9LrUo6BpgA9JggtNYrgZUAqRHE5ftIDoMiWJnocRXTYMlHu2+Auro6qqqqSCaTrFq1igsuuCBncgBIOkowvdW0VlyKE0i6J9JS9X1aKpbhb3mYYONqSj/5IcV1qwiVfpG2MRcR9x7c35cseuDCR1XsKKpiR0Gb1bGzzVlLnbsjYbzMG0U3p+c7iuNTUgljPlXReZTFD5Ed38LW9muSWmudwDo4yJYGYylrb/LV7hvguuuu46WXXiIWi7FgwQKuvPLKnh/McJKsPIekqzL714vDS6h0KaHSpbhDmwg2ribQfB/BpjuIBD5NW/nFhIsXgpG3tQmjmoFBUWISRYlJTA0vBiBuhNjjfp261OR3rfcZ3g/8CQBXMkhlbHY6YVRG5+I1R06LdzH87bPENIQGXGIaLbZt28bmzZs588wz93nkqCO+h0DTPQQa78AVryXuqqa9/ELay84j6crfxJjdh/VQmBhNTFqd26nzvJxaMbWRRvdWTMMa5ZbEplEVsxLGQcFTob7S1l1r5d954IZ1iUkpdYbW+lGl1HTgMuA+rfWz+QpIDK6kayytFd+idezX8bX+H8GG1ZTU/5zi3b8kVPI52sq/Ssw/fI41He4MDIoTB1AcOoBpobMBiBlt7Ha/lp7H2OF9nPcC9/E84B5fTGV0LpUxa19GRXQOXrO0sC9CjBp9qTVcDjwKXAn8Hvg1cFQ+gxJ5YLgIF59BuPgMXJF3CDbejr/5fgLNfyLqm0PbmIsIFS8Ch+wiHmpuM8iE6KeZEP00YI0y9jo/IDT2HT6IPE29ZyObvL8EwwTToCw+o3MjX2w+pfGpth5liOGrLwmiWCk1GUhorf+ulGrLd1Aiv+LeGTSPv5q9lVfgb36AYONqynd9hxLnVbSXnUdb+YUk3RMLHeaoZWBQmpjGtOQxjG8+E4Co0cJu96vpUcaH/od5N/hHADzJMiqjc9Mrpipjc3CbRYV8CWKE6EuCuBb4GfCzVC+mf+Q3JDFUTGcx7WMupr38IjztfyXYsIaiPb+laM9vCRefQVv5RUQDx4Fh84NzRwGPWUx1dAHV0QUAmCRpdr1vzWWkVk29WvIUAIbpoCx+cHpfRlV0HsWJAzHsfgCysJ2+JIgDgSu01h0zyFfkMR5RCIZBNLiAaHABzugOAk13Emy6G3/LBmKeGVb5qfSLmI5goSMVKQYOyuIHURY/iBmcC0DEaKLeY40y6t0b+cD/IG8H7wTAmxhDVapdSGV0HhWx2bjN3MuohejQlwTxAXCdUqoUWA88oLVuyG9YolASnhpaqq6kpeIy/Hv/TLBxNWUfX0lJ3bW0lyrayr9Mwju90GGKHLxmGZMiJzMpcjIASRI0u97J2Mj3Mjt8TwBgmE7GxA5NTX5bI42iRI2MMkSWPi9zVUqNAW4GzgAeA27UWj83iLHIMtc+2p9lrgNmmrjDrxBsWIN/73oMYoSDJ9JWfhGRos+AkXujl92XFoL9Y8xHfGGjgXrPK9R7XqbOs5Hd7teIO9oB8CUqqUo1JKyMzWNs9HBc+Ic8xsFm9xiH+zLXM4F/AcqxRhCXpAJ6CFiQr8D6LWESvCVM0W8jtH7LS9u/+8A5OO/fQM6DAFi5ciUvvfQShmHgdrtZuXIlJ5xwAmC11/j2t7/Njh078Pl8XHfddcydO3dQ4h4QwyDmn0fTxHnsHfdDAo13EWy6i7E7LyburqGt/Cu0l56D6dpH11lhCz5zDDWRU6mJnApAkjiNrrfSCaPe8wrb/Y8C4DDdjInNSk+AV0XnE0zK4oXRpC8lpsOBK7XWtZkXKqX+PT8h9Z/zgwTlX2/D9UECRwiKrw/jXxuj8eYgiakDb2kw0PMgVq5cSUmJ1RvpzTff5JxzzmHz5s0YhsG1117LMcccwz333MOLL77IpZdeynPPPYdhownipKuS1srLaK34Fr6WRwk2rqG07meU1F9Pe8kS2sZcTNw3q9Bhiv3gwMXY+CzGxmdxSPtFAIQcu9Ob+Oo9G3kncDdbi6xuAYHE+IyzMuZTxkmFC17kXV8SxH3AfymlioCvABdrrW/VWr+d39CylfywHfeW3nsqeV6KQ7xzvOUIgfvNBFUn7yV6VM8vNXaok71X7XvCbiDnQQDp5ABW+Szzl//69et54YUXADj66KPxer1s2rSJ2bNtuInNcBMu+Rzhks/hCm8h2LgGf/Nags33EvXPp638Yhjz5UJHKfrJn6xgcvh0JodPByBJjAb3lnTSqPO8zIf+hwF4zPQwpuLwrBVTgeTgtz4XhdGXBPEH4FvA77TWCaXUucCt+Q2rf8wAOLp0BTeAZB4Wa/T3PIhf/OIXPPjggzQ3N/P73/8ewzBoaGjANM2ssx8mTpzIrl277JkgMsR9h9I84Tr2Vl1JoEkTbLyd8l3fxKy/iuLS82gru4CkW35hDGcO3FTEjqQidiQz+SpgHbxU79lIS+kWtvNXtgbX8GbR/wIQjE9Mz2NYx7geNuDW56Iw+pIgnFrrt5RSHd8XZMtmXz7h+/8UoXRlO46MrXzJIDT/LEDo7P3rvLov/T0PYvny5SxfvpznnnuOq6++On040HBnOstoG3sJbWP+DW/b05S33k3R7l9RtPs3hEvOoq38YqL+o2RPxQgRSI7jgPBZVBR9md27d5MgQoP7zXSPqU+8L7It8BAATtNHRfTI9FkZVdF5+JOVBX4Foi/6kiCeUkrdDFQrpW4EnshzTP0WXuih9Achsk4McxqEFw7u8ZBXXXUV27ZtY82aNTgc/cuXxx9/PC0tLbz11lscccQRAFlHjNbW1ub9MJC8MBxEik4hPkXRuOslgo13EGi6D//ePxPzHkpb+cWESr+A6eh9dYwYXpx4qYzNpTI2F1If0NoctVlnZWwp+j1J43cAFMcPSO/JqIrOpzx+CI78nYAs+qkv50H8VCk1C3gSeBvYnveo+sksMfh4a1len6O/50GYpsn777/P9OnWHoJNmzaxZ88eJk+eDMCiRYu44447WLZsGS+++CLhcDidOIarhOdA9o77ES2Vy/E3P5jaU7Gckrqf0V72L9aeCs+UQocp8iSYnMiB4YkcGLZa48cJs8ezOb3z+yPvc3wQWAuAKxmgInZkOmFUxubhS8rKuELr7Uzq+4HztdZRrfUbwBtKqYOxjh89eqgCtJOBnAdhmiYrVqygqakJp9OJz+fjpptuoqzMSmhXXnkll156Kffffz9+v58bb7yx36MTuzEdAdrLz6e97Dw8oRcJNqwm2HArwYZbiBSdkjom9cSCH5Mq8suFj3HRoxgX7TxgqdW5M73z2zpg6abO1ufxAzPOyphHWfxgOWBpiPU2grgX2KCU+qLWukkpdTrwU2DULk85+OCDqa2t3fcNc3A4HKxdu7bH66uqqrjvvvv6G9rwYBhEA8cQDRyDI/YRwaa7CTTexdjWC4i7p9A25iLaSxWmU9pZjwZW6/MaikM1TA0tAawDlna7N6V7TNV6n+L9gHV2uztZREVsTmdTwuhcvGZ+KwajXY8JQmv9J6XUR8DDSqnHsM6kPk1r3TRk0YkRK+meQEvl5bRUfBv/3g2pY1J/THHdzwmVnk1b+UXEfTMLHaYYYi7Tz/josYyPHgtYo4wW54cZG/k28nrRrzGNJAClsYNSbc+t0lRpfLq0Ph9EvZWYfoo121sLfBe4CfiuUgqt9Q+HKD4x0hkeQqVLCJUuwR3anDom9X6CTXcRCXyKtvKLCBefDoYskxyNDAxKElMoCU1hWuiLAMSMVna7X0snjO3+R3k3eC8AnmQpFdE5VEXnUxWzDliC/J2cONL1VmL6v9TfTwK/HYJYxCgX8x9Ok/8Gmqt+QLD5XgKNtzOm9mskXONpK7+Q9rLzSbpkeeRo5zaLmBA9ngnR44GOA5bez1ox9Vrxf6cPWKowD2VM6ez0Rr6SxDRpSthHvZWYnunvg6bOjXgW8Kae4wGt9Y/6+3hidDFdY2gd+w1ax3wNb+uTBBtXU1L/C4rrf2UdkzrmImK+ubKnQgAdByxNpzQ0nYNC5wAQNfay2/Made6XaSp6nW3+9bwTvBsAb6LcWpKbShgVsTm4TWlln0u+Fh5HgFO01q1KKTfwnFLqEa21HDYk+s5wEik+jUjxabgi7xFovJ1Asyawdy1R3xG0lV9EqGSxHJMquvGYJVRHFlAdWUCFr4L63XU0u95LT37XeTays+RJwDpgqTw+s3OJbXQexYkDZJRBnhKE1toEWlPfulN/+tZXXIgc4t7p7B3/U1oqv586JnUN5R99l5K6n9Jedj7t5ReScE8qdJjCpqwDlmZQFp/BDM4DIGI0pg9YqvNs5H3/n3g7aPVW8yUqrC62sfmpA5aOxGWOvs2dfT4PYn8ppZzARmA68Fut9fdz3OYSrPbhaK3nRaPRrOt37NiB37///yiOT+op/8aVNN50LcmqwZugyme775tvvpm7776bbdu2sXr1ahYuXNhjHB9++CGbN2/m/PPPz/95EP3kcrmIx+P5ewLTxGh+GueumzD2rLcuGruIxIT/wCw7uU/lp7zHOEB2jw9GVoxJEuwxtrDL+Ae1xj/4yPEPGo33AHCYLirNI6g2j7X+JI+hhMEZZQzkPfR4PJDH8yDyliA6KKXKgAeBS1Mb7noyaAcGlay8luCdf6LtwrPZe+3K/b5/TxobG9m6dWtWu++mpqasdt8vvfQSN9xwA5deeimf+cxnspr47d27t8d236+99hqlpaUsX76cr33ta70miCE9MKifhvKQFmeslkDjHQSa/ogz0UDMMz3V0uOLmM4iW8TYH3aPD0Z+jGFHQ3oTX336gKUQAP7EuM49GekDlva/3DmsDwwaqNQmu6exTqLrLUH0quSH1+Pe8k6vt/H84xWMjIRXdMcDFN3xAKZhED2258N3YofOYO9Vl+8zhny2+96frq3Ojz7CecMNVF19NeHvf5/Q0qV9vu9IlHBPpKVqZeqY1PVWS49P/pOS+mtpL/0S7eUXEZdjUkU/+JJjqIkspCZifWBLEqfRvTV9Vka9ZyMf+jcAnQcsVWXMZQSTw7CfWoa8JAilVCUQSyUHP3Aq8PN8PFem6NzDcX24A0dDE0bSxHQYJMeUET+gZtCfazDbfe+3eBxME2dtLaUrVgCM+iQBgMNHqOxLhMq+hDv0KsHG1QSb7qaocTWR4Am0lV9MUf31eCJb0nfp+PGNeg9j99THCxO3GDYcuBgbO5yxscOZmT5gqd5aYptKGm8H72JLkXUiQiAxIX24ktX6fBZOOpuHtjs+4V7XORzn+A2BZFUhXlKv8jWCmADcnpqHcABaa/2XgTxgXz7hA5RecQ2Bu9Ziej0QjRE66zODWmbqMJjtvlN1xH5xhEIUr1olCaKLmH8OTf457K36IYGmPxJsvIMxO79K0ghi4sSg8/ApEw9R//wCRiuGM3+ykgPCZ3BA+AwAEkRpdG9JJ406z8v802/9+nOYXiqiR6Tbnm/3Ps5O43k2Ff2KT+29ppAvI6d8rWJ6HZiTj8feF8fuBtq+/EXaz19K4O61OOsGvz6ar3bf/eWsrcVoasIsk740XSVdFbRWfJvWsd/A1/I4wYab8YY2drmVSbjoVIxkSNqQiwFz4qEiNpuK2GzgXwFod3yc2shntQzZGryNN4tuTt/nveB9HNm6zHajiBHXgL3x1uvTX++95opBf/x8tfseCAMYP3s24ZNPJrRkCZGFCzEDeThGbzgzXIRLziJcchZlO7+Ov+VhDJKYgEGMip0XYmKQ8Ewh5p1J3HsIMe8hxHwzSbgPAEO6iIr+CyTHMyX8WaaEPwtAggjPlH+THb7HMY0EJklbjiJGXILIp3y2+77pppu49dZbaWho4LLLLsPr9fL000/vcxVX0u+n9ZvfxNHcjH/9evyPP04yECB8+umEFi8mcuKJMIAS1ki0d9xP8Lc+DmYEDB/1NffgTNTjjryFK/IW7shWfC2PYKS27iQNH3HvwVbS8M0k5j2EuHcmSZf0+BH9E3E0Uet7Kt3aPGlEbTmKyPsy1/0waMtcR7rtzz/P5iuv5IJQKHsVUyKB54UX8K9bh//hh3E0NZEsKyP02c8SWryY6LHHgnNoPgnbffljyUcrCTbdRVvZheyd0P1Tm5FsxxV5J50w3OG3cEW24kzsSd8m4azISBrWqCPunTFoZSq7v4cgMfbX30tW8m7wXpJG594vh+nhoLZz92sUMeyXuYrBl5gwgcR3v0td130QTifR444jetxxNP/sZ3ifeQb/Qw/hf/BBgnffTWL8eEKf+xyhJUuIHXnkqO5l1FqxjEDyA1orl+W83nQEiPlnE/PPJpRxuSNejyuVMDpGHMHGOzHMsHW/jDJVulTlO0TKVCJLnWdjVnIAaxRR53m5QBHlJglipPJ4iCxcSGThQoz2drxPPIF/3TqCa9ZQ9PvfE58yhdDixYSWLCE+Y0ahox1ySfc44kc+SXI/P1kmXZVEXZVEgws6LzQTOKP/xB3pTBru8JacZaqYdyZx3yFSphrlFu/uXFJtxxFOB0kQw9D+lgXNQIDw4sWEFy/GaGrC/8gj+Neto+g3v6H4xhuJzZxJ6AtfILR4MYlJ0s9ovxlOEt5pJLzTCPPZzou7lam24mt9AmfzvenbJJwVxFPzGvkoUwkxELZOEKZpYppm/zaTjVCmaZJIJPZ9w57uX1ZG+7nn0n7uuTjq6qyJ7XXrKLnmGkquuYbo/Pm0L1lCeNEikpVy9sJA9K1MtTVHmcpBwjMFR92RFBlTpUwlCsbWk9TRaJRYLEYwKL3aOzQ2NvLqq6/S2trKWWedhcs1ODneuX27NV+xbh3ut97CdDiIHH88oSVLCJ95JmZGm5C+sPOwuYOtYswqU1lJwxd7B8IfZJSp/Kky1SEZZapDSbrG7uPB88dW72EP7B6jnXsx2TpBALS2tvLJJ5/0e0NafzgcDpLJ5JA9X1+ZpklDQwMffvgh5eXlnHLKKXl5Htdbb1kroR56CNf27ZheL+FTTiG0eDHhU0+FPnTYtfsPJdg/xoqKCvbUbccVecdKGuGt6TmO7NVUlemEYZWpZhLzHgRDUKay+3sI9o9REkTf5EwQAE1NTfztb3+jazvwfAkEArS3tw/Jc/WH3+9n6dKl+Y/RNHG/+qqVLNavx1lXRzIYtPZYLFlCZMEC6KGbrN1/KMH+MfYWnyNejzu81SpVpfdvvNOtTJWVNNJlqsH7sGX39xDsH6MkiL7pMUEMNbv/h4ICxJhI4Hn+easMtWEDjuZmEuXlhBctIrRkCdGjj4aMUZ68hwO33/GZCZzRbV1WU23FGfswR5mq62qq/pWp7P4egv1jtHOCsPUktbARp5PoCScQPeEEmq++2tpjsW4d/gceIHjnnSQmTCD0+c9beywOP7zQ0Y5OhpOEdzoJ73TCLOq8ONmOK/K2lTRSZSpf62M4m+9J3ybhrCTm62wxMpRlKmFfkiDE/vN6iZx2GpHTTsNoa8P3xBPWZrzbbqPof/+X+NSpGOedh3PhQhLT5RyGQrNWU80h5s/on2maOBL16R3i1ohjK8HGO7LKVHHPgamk0XXT39DNCYrCkRJTDnYfkoI9YzQaG/Fv2IB/3To8f/87hmkSnTWL0JIlhD7/eZITJxY6xCx2fA8zFSS+rmWq1Igju0wVSK+m8o6dR1O8ZkBlqnwbyf/OMgdRAHb/DwX2j7EiGiV0++1WsnjtNQAiRx9tLZtdtIjk2ML/MrH9e2ij+LqXqbamVlM1pG+TcFYR8x1iuzKVnd7HXCRB9I0kiP1g9xgz43Nu22ZNbj/0EO533sF0OoksWGAtmz3jDMwCNWQcTu+hLZkmFaUJWnY9nypTbU2tpnp3H2WqmSTck4esTGX399HOCULmIETeJQ48kNZly2j9zndwbd2a3pBXvmwZps9n7bH4whcIn3IK+Pb/0HdRIIYBnvFEihYQKeram2pb56a/8Fu4w2/ia9mQs0xlraaamSpTjSnQixG5SIIQQ8cwiB96KC2HHkrLFVfg3rgxvcfCv2EDyeJiwmecYe2xOP54GKRd4mKI7Ws1VXhr57kb3VZTZZapMjf9yQeHQsjLT6BSqga4AxgPJIFbtNY35uO5xDBlGMTmzyc2fz57f/xjvM8/j3/dOnyPPELg/vtJjB3bucdi/vysPRZieOp9NVX2pr/cq6lShzWlmhsOZZlqtMrXR7Q48D2t9StKqWJgo1LqCa31ljw9nxjOXC4iCxZYO7OvuQbf00/jf/BBAvfdR/D224lPnGi1Jl+8mPhhh43qcyxGHMMg6aoiUlRFpOjEzsvNOK7oPzuTRvgt3OE38Lf8JX2TdJmqy/4NKVMNniGZpFZKPQT8j9b6iV5uJpPU+8HuMQ5GfEZrK77HHsO/bh3eZ5/FiMeJTZ9uLZtdvJjE1KkFjzGf7B4fDH2MRrItVabqPB7WOumvMX2bhGtcKllYSaNo/Keob6+0bZlqVE9SK6WmAHOAF/L9XGK1TXeCAAAQdUlEQVRkMYuKCJ19NqGzz8bR0IDv4YfxP/QQxf/935Rcfz3RI47o3GMxYUKhwxVDwHQEifnnEvPPzbjQxBGvS2/261iKG2xfg2FG4COYgDNjNVVHmWomCXeNlKl6kdcRhFKqCHgGuFprvTbH9ZcAlwBorecNVTO+fXG5XMTj8UKH0Su7x5jX+HbuxHH//Ti0xvHKK5iGgXnCCSSVIrl0KfRxj8Wofg8Hia1jNOMQeg9XeCvm3tcx2jdjtL2BEd7WeRNHEDN4GGZwFmbgMMzg4ZjBWeAeun06A3kPPR4PDMd9EEopN/AX4DGt9Q19uIuUmPaD3WMcqvic77+P/89/ts6xeO89zNR8RugLXyB8+umYvZwlIu/hwA3HGLPLVJmb/noqU8205jk80/NSphp1JSallAH8Adjax+QgRL8kpk2j9bLLaF22DNebb3busbj0UpI+H5GFC63d2yefDF4vAP61ayletQrnrl1UVVfTcsUVhJYuLfArEUOlb2Uqa/9GukwFmJllKt/M9KqqkVymytccxKeBC4HNSqnXUpddqbXekKfnE6OdYRCfNYuWWbNoWbkSz8svW8tm//IX/OvXkywpIXzmmcQrKii67TYcIesQUFdtLaUrVgBIkhjNDIOkexwR97geVlNt6ZwYD2/OXk3lCBL3zMhKGjHvIZgjYDWVtNrIYTgOm+3GNvHF43ifey69x8LR2pr7ZhMnUvfii0McXO9s8x72YrTGaCRacUW7rqbqqUyVShoZZaqKD07DE3mz2+NGvYexe+rjfY5jWJaYhLANl4vISScROekkWLWKCdOm5fxpctbWUnnKKSQmTSJRU0O8poZE6k+8pgazrEz2X4g001lEzD+PmH9exoUmjvgnGSf8dZSpVncrUxlmDBMnBonOu+Mh6p8/1C+lV5IgxOjh85GYOBFXbW23q8yiIuJTpuDasQPPiy/iaGnJuj5ZVGQljEmTiE+e3C2RmKWlQ/UqhF0ZBkn3eCLu8USKTuq83Izjim6z5jbSZao3spKDdX8HrZXLhjTkfZEEIUaVliuuoHTFivQcBEDS76f52muz5iCM5macO3bg2rEDZ+qPa8cOnDt34nn+eRxtbVmPmywp6Z48Mr4uVMdaYQOGi7j3IOLegwiXfD59cemuywk0349BHBMPbaXnkHRVFTDQ7iRBiFGlIwl0rGJK9LCKySwtJV5aSnzWrO4PYpoYjY24du7MTh47duD64AO8zzyTlYAAkmVlnWWrLokkUVPT63JcMTK1VC4nsHettV/DhqMHkAQhRqHQ0qWEli7t/+SlYWCOGUNszBhiRxzR/XrTxNHQYCWP7duzE8k77+B76imMcDjrLokxY9LJIlFTQ3zSJIzDDsNVVkZi0iRMv5wNPdIk3eNoKz2HYNNdthw9gCQIIQafYZAcO5bk2LHEZs/ufr1p4ti9G+f27Th37rRGH6mv3Vu24Hv8cYxUV4GOXxmJioqsSfPEpEkkJk8mPmkSiUmT5ByNYaq1YhmB5Ae2HD2AJAghhp5hkKysJFlZSWzevO7XJ5M46uoY29JC6xtvZCUS9+uv43vkEYxYLOsuiXHjrNJVRvLoGIkkJk5MbxIU9pJ0jyN+5JMkbbpUWBKEEHbjcJAcPx5z1ixCBx3U/fpEAscnn1ilq+3brfLVzp24tm/H88orONevx0hkLJ80DJLjxmUt3U0nj8mTSVRXg9s9hC9QDBeSIIQYbpxOktXVRKur4eiju18fj+P8+OP0vEdH8nDu3InnxRdxrluHkUymb246HCTGj7eSRY59IIkJE+R0v1FK/tWFGGlcLusX/aRJ8KlPdb8+FsP50UfZySP1tfdvf8Px8ccYGR0WTKeTRHV1zuW7HHGEVb5yOofwBYqhIglCiNHG7bZGC5Mn574+GsW5a1f2PpBUIvE++yyBjz/OuvkEl4vExIndl++mJtGT48bJkbHDlCQIIUQ2j4fElCkkpkwh5wkt4TDO2lpcO3dS2thIaOvWdDLxPfkkzrq6rJubHg+J6moreXRdgVVTQ7KqStqY2JQkCCHE/vH5SEybRmLaNJIVFbR0XYETCuGqre2cA8nYhe5+9FGce/Zk3dz0+YhPnJgzeSQmTyY5dqwkkAKRBCGEGFx+P/Hp04lPn57zaqO9HWeOXejOHTtwb9qEs7Ex6/ZJny979VWXpbzJ8nJJIHkiCUIIMaTMQID4jBnEZ8zIeb3R2tqZPDISiXPHDjyvvIKjqSnr9slgsLOFSY59IH09glZ0JwlCCGErZlER8Zkzic+cSSTH9cbevdnJI2MjoeeFF7p14jVLSqicOLHHfSBmScnQvLBhSBKEEGJYMUtKiB92GPHDDstxpWl14s1oYVK0ezeJd9+1VmE99xyO9vasuyRLSztHH5llrI5GikVFQ/TK7EcShBBi5DAMzLIy4mVl6U68/ooKGjom0js68WYu3+1oqthbJ96eNhHW1GAGAkP9KoeMJAghxOiR2Yn3yCO7X2+aOPbsyZ4D6Shh9dSJd+zYHtu4xydNgh468frXrk23na/qoe18oeUlQSilbgMWAXVa6xwN9YUQwoYMg2RFBcmKCmJz5nS/3jRx1Nfn3IXufvPNrE68HRKVlZ3JI5VInNu3E/zDH3BErFkWV20tpStWANgqSeRrBLEG+B/gjjw9vhBCDD3DIFlVRbKqqtdOvJlLd9ONFDdtwrlhQ7dOvB0coRDFq1aN/AShtX5WKTUlH48thBC2lerEGx0/Ho46qvv1iQSOjz9m3DHHZPW76uDctWsIguy7gs5BKKUuAS4B0FpTUVFRyHDSXC6XbWLpid1jtHt8YP8Y7R4fSIz9Mm4c1NTA9u3dr6upsVWsBU0QWutbgFtS35r9Ov4xD/p9FOUQsnuMdo8P7B+j3eMDibG//MuXU7piRdaKqaTfT/Py5YT2I9bq6up8hJcmq5iEEGKIdcwzdKxiSoymVUxCCCF6F1q6lNDSpbYc4XTIS5N2pdQ9wN+Bg5VSO5VS/5qP5xFCCJE/+VrFdG4+HlcIIcTQkWOehBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5JSXM6kBlFJnADcCTuBWrfWqfD2XEEKIwZeXEYRSygn8FjgTOBQ4Vyl1aD6eSwghRH7kq8R0NPCe1voDrXUUuBdYnKfnEkIIkQf5KjFNBHZkfL8TOKbrjZRSlwCXAGitqa6uzlM4+89OsfTE7jHaPT6wf4x2jw8kxsFg1/jyNYIwclxmdr1Aa32L1nq+1np+6j62+KOU2ljoGIZ7jHaPbzjEaPf4JEbbxJc3+UoQO4GajO8nAbvy9FxCCCHyIF8lppeAg5RSBwK1wL8A5+XpuYQQQuRBXkYQWus48C3gMWCrdZF+Mx/PlSe3FDqAPrB7jHaPD+wfo93jA4lxMNg2PsM0u00NCCGEELKTWgghRG6SIIQQQuSUt1Ybw5FSqga4AxgPJIFbtNY3Fjaq7lI71V8GarXWiwodT1dKqTLgVmAW1vLmr2qt/17YqDoppS4D/g0rts3AxVrrcIFjug1YBNRprWelLhsD3AdMAf4JKK11o81i/AXwOSAKvI/1XjbZJb6M6y4HfgFUaq13FyK+VBw5Y1RKXYo1bxsHHtZaryhQiFlkBJEtDnxPaz0TOBb4pk1bhHwHa/Lfrm4EHtVaHwIciY1iVUpNBL4NzE/9gDqxVtkV2hrgjC6XXQE8qbU+CHgy9X0hraF7jE8As7TWRwDvACuHOqgMa+geX8cHv4XA9qEOKIc1dIlRKXUyVqeJI7TWhwHXFyCunCRBZNBaf6S1fiX1dQvWL7aJhY0qm1JqEvBZrE/otqOUKgEWAH8A0FpHC/WJshcuwK+UcgEBbLBHR2v9LNDQ5eLFwO2pr28HlgxpUF3kilFr/Xhq1SLAP7D2PBVED+8hwC+BFeTYrDvUeojxP4BVWutI6jZ1Qx5YDyRB9EApNQWYA7xQ4FC6+hXWf/ZkoQPpwVSgHlitlHpVKXWrUipY6KA6aK1rsT6hbQc+Apq11o8XNqoejdNafwTWhxegqsDx7MtXgUcKHUQmpdTnsUqxmwodSy9mACcopV5QSj2jlDqq0AF1kASRg1KqCPgTsExrvbfQ8XRQSnXULjcWOpZeuIC5wE1a6zlAG4UvjaQppcqxPpkfCFQDQaXUBYWNavhTSv0nVon27kLH0kEpFQD+E/hhoWPZBxdQjlXWXg5opVReW2j0lSSILpRSbqzkcLfWem2h4+ni08DnlVL/xOqQe4pS6q7ChtTNTmCn1rpj5PUAVsKwi1OBbVrreq11DFgLHFfgmHryiVJqAkDqb9uUHjIppb6CNfF6vta64GWcDNOwPghsSv3MTAJeUUqNL2hU3e0E1mqtTa31i1jVgYoCxwTIKqYsqaz9B2Cr1vqGQsfTldZ6JalJQKXUScDlWmtbffrVWn+slNqhlDpYa/028BlgS6HjyrAdODb16TKEFd/LhQ2pR38GvgKsSv39UGHD6S51MNj3gRO11u2FjieT1nozGWW5VJKYX8hVTD1YB5wCPK2UmgF4AFvEKDupMyiljgf+irX0saPGf6XWekPhosotI0HYcZnrbKxJdA/wAdbSx4Itz+xKKfUT4BysksirwL91TBAWMKZ7gJOwPjl+AvwI6xeHBiZjJbYvaa1zTcIWMsaVgBfYk7rZP7TWX7dLfFrrP2Rc/08KnCB6eA/vBG4DZmMtF75ca/1UoWLMJAlCCCFETjIHIYQQIidJEEIIIXKSBCGEECInSRBCCCFykgQhhBAiJ9kHIUYUpdSJWEsHHUAC+C+t9fNKqWbgFcCN1RKiGjhVa/2D1P1+DDyttX4647ECWK1NZqTud4vW+nb6KdXl9hQbbsAUIicZQYgRQylVAfwEWKK1PgmruV0odfVmrfXJwPeweln1xY+AZ1KPdTywbYAhlgFLB/gYQgwZGUGIkeQs4K6O/lmpjryvdrnNa/S94+hxWuvvpx7LBJ4FUEr9GmtT017gfKymjqdqrX+glLoodd+nsTY/NWC1e1gMXAIsVEo9jbXprX7/X6IQQ0cShBhJqrF2waOUOg/4BtbO3sszbrMAeLu/T5DqtBnUWi9INfn7Oj13/C3H6v10LnA21uH0k+3WHkWInkiJSYwkH2ElCbTWfwQuoLPp2eFKqf+HlTRWAWGsFhEdfHSWo3ozDWsuA6weTtPJPmcgswvnFq11EqjFKi8JMazICEKMJBuAB5RSWmvdTPb/7445CACUUlFgjlKq40PSXOC6Lo/3vFLqfK313alGjp/G6i11Wur6+VjHbDYDE1KXHQ68nvq6a+KIYZ1gJ8SwICMIMWKkavo/AR5SSj0F/A7rjPFct92D1db9WawGjQ/kaIT3E+DE1JzB34BpqXbMIaXUX4HzgJuxEkK1UmoDUNlLiB8DY5RSD6TOmxbC1qRZnxBCiJxkBCGEECInSRBCCCFykgQhhBAiJ0kQQgghcpIEIYQQIidJEEIIIXKSBCGEECKn/w+acM5qbeP/ygAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOy9eZhcRbn4/6nT2+z7lslCkplkkkxCgAAJIYQ9bCoQpdm8bIpwFS5c8SIqIoIg+gO+CFfxigqKgLQYxB1UZF/Dksxk30O2WZLM3ts5p35/1Jmens6sme7Mkvo8Tz8zfbpOnfds71v11ltvCSklGo1Gozm8MYZbAI1Go9EMP9oYaDQajUYbA41Go9FoY6DRaDQatDHQaDQaDdoYaDQajYZDbAyEEHcKITYeymOONIQQW4UQtw+3HL2RKJ8Q4hUhxM8P0bEP2bEOJ4QQTwgh/pmEek4RQkghxIRkyKU5NAghJjv3bVFf5dyHSqBUIYTIA+4EzgQmA63Am8C3pJRrh0+yMcNSwBypx3KMR6WU8pSUSJQCnAbRb6SUdx6iQ96E9gJo+mEsPCDjgCnAHcAxwKeALOBlIUT+cAo2FpBS7pNStoy1Y/WEEMI7XMdOJVLKZinl/uGWQzPCkVKm5AP4gEeBZmC/8//3gY1xZQzgXqABaAN+C9wMmM7vAvgL8D7gidvnn6jWv7uXYxcCEvh0H/K9AXwv7vt3nX3OiNv2KvDDuO9nOscNAjuBx4HCuN+PAf4G1Dvn8z5wdsJxtwK3x30/w7lGtwzgmnqAB4EdQBjYDfw2oczFwAdACNjryJMfJ/8rwD7nmK8Cx/cj3yvAzxO/A98G9jh1PQFkDvS+9nF+gzoWqkcoEz5XOb9lAT9y7lMH8BGwNK7uyU75y4G/Au3A/QO8xpcAHzvXeKtTPjOhzFeA1U4d9cBzceeUKPPkXq7HE6hn/UvANqAFeAEoTih3ZdyxdgDfI+7d6KznIOq90amvA3gRuMKRd0JcmXnAS859bgCWAUcM5f3t6Zno7RmIO87XgM1ABNgE3NzDc303XTqpHrgBpaceQemoncANCfvd5NzrNuf4vwXGxf1+inNNzgRec67VauCshHruAdY4v38C/BTITShzqSN7CHgL1bCVwKK4MpXA74EmR+aXgDkJ9fiBjXH1fCaxnh6vdX8342A/wP9zLvj5wAzUi9ZCd2PwVeci/wcwzfm+jzilARQ7N+l+5/u3nDKT+jj2VOfkT+ijzF3A23HfX3fk/b7zPR31cp3tfD/NuZE3OrIeB/zbeQBE3INxJTALmI56KSPA9J6ULUoRtQGXDfCafhX1cp4CTHJkuDnu96uBKOrFmQUc6TzMRc7vFwIXObJVo16yfXQ3aDH54l/GhO9Nzv2dAZztfP/uYO5rXy/+QI+FUvhPoR74MueTjlIO/3b2X+Q8D19y7sXpzr6TnWdkB/B5p8yUAVzjq1Av4X84+ywGVgJPxpX5rnP+NzjX+pi4e14AbEG9D50yu3q5Hk+gFNczwGxgIUp5/yquzHmABXzDOdbFjnx3J9STaAz6q/d8lMvuq069XwDqiDMGqGeszTnfGcAc4HfAeiDtYN/fHp6J/p63r6AaaF9CPW/XoxThFxKe6ybnfCqB2wEb1RDo3PYNZ9usuP1uQjXYpgAnoJ61V+N+P8W5Jisc2aYBv3aOlRdX7nbgJNRzdzqwNuF6z3OO/T2gCrgApdBjShwoRRmkR51rXYUyZHtxDDlwtFPP953fl6Ket+ExBkCmczOuTdi+nO7GYCdxD62z7bckKA3gVOfB/A5K2S3t49gu4O/Ae4DRR7lTnDpzgAyU4r8FeM/5/UyU8uhshb4C3JdQxyTnIh/Vx3FWoMYvuilbVEumGThzENf1R8DLOManh9+3A/87iPoMlOK4PFG+hJcxUUGvTKjnp3Q3rAO6r728+IM91s+BV3q4tyEObHn9EviD8/9k5959e5DXeCtwfcK2xU5d+c6zHwS+1sd5bgTuHMD9eQLV2vbFbbsN2B33/XUgkLDfTY4M3rh6Eo1Bf/W+ATyVUO/9dDcGT3Bgr8mHajRdELdtwO9vL89Ef8/AJ8T14J1t/w/YnHDf/pDw7LcAf+rhfbihD3mOdq7B+LhnTdK911nmbDurj3ouROkcw/n+FPB6Qpnr6W4M7gTeSSgjiOsJAb8B3koocwMDMAapGjOoQD0UbyVsf6PzHyFEDlAOvJNQ5u3EyqSU/wYeQF2Mn0spl/V0UCGEC2WVp6Nujt2HjG+jlP1ilMXe5ux7tBAiF9UTeE9K2e6UPw64WQjR1vlBdQdBtQYQQhQLIX4ihFgrhGhyylQDRyQc+0uoFsBpUsp/9CFjIo+jWgQbhRA/FUJ8ttPPLYQoASaiuo09IoSYIoR4UgixUQjRgnoZcnuQrz8+Tvi+E9VqGdR9Heqx+uA4wAvsTLhfn8e5V3G8l/C9r2tcjLpWDybU+zdn30rU/U6jj/swSNZIKcNx3xPPvxrVO43nVUeGiiHUO4s+3l+H44ALE67FXufYses80Pe3D/p73ibQ8zWYLITIiNu2Ik4mG2UQVyZsqwdKOrc5EVQvCiE+EUK00nUNEt+ZmIxSyj2o3lrsegohlgohXhNC7HKu01OoZ7TMKTKL/t+Z44B5Cde7FdWw6bzes1AuuHgS71uPpCqaSDh/5RDLqIJKyZ+IusCVQgghHZMXV8aL6vbOBU6RUu7oq04pZVgI8RaqyxYBXpZSNggh1qKs/WkoP2knBvAD4Mkeqtvj/H0C1Vu4FdU1C6JaxIkDk2+jWktfEEJ8mHgufcj8sRBiCqrXciqqFXu3EGJBfLE+qvgz0IjqVn+COu83epCvPyKJotEVjDDg+5qEY/WGgep1HTeA+trjv/RzjTuPexPKDZXIDpRrrlPOZNDT+YsetsUzkHtwMPUmYqDeh/t6+G1vTJgBvL/9MJBnoLdrEE+0h3162mYACCEmodxIT6Lcyo0ow/NPDnxnEmUkrp75KPfZ94H/QfU+FgC/SqhnINf7X6iWfiLNzl8xgHp6rTwVbERdnBMTti/s/EdK2QzsQvnh4lnAgdyJau2fiPKtfT3+R8f6/xFlFRdLKbcPUM6XUUr/NNRF7tx2oXOcl+PKLgeqpZQbe/i0OWUWAz+RUv5RSlmDGnyc2sNxa1CKZinwMyFETw9uj0gp26SUz0sp/ws4FpgJnCylrEcpo7N62k8IUYi6PvdJKV+UUq5GuVJKeip/sAzyviaDCMo1GM9yIA/lt068V/0+G31c4zqUEa3q5TkIoXqLIXq5D33IfLCsAk5O2LYY1RDZPIR6V3Pg+5v4fTnK+G3q4VrERy/dSR/v71CQKvpsBz1fgy1Syo4hVH8cagzqZinlm1LKdfTfK+2JRUCjlPJ2KeW7Usr1KKMSz2r6f2eWo3qCO3u43g1OmVX0f996JCU9AylluxDip8D3hBB1wDrUANQMVDeskweA7zqt8fdQg2FLiLNsQoiTUQM7n5ZSviuEuBZ4RgjxipTyHSFENsp6T0ANetlCiM6uV7OUMtiHqC+j3DUWXS29l4HnUC2G+G7aHcBLQoj/h7Loraiu2UUoH2PQOc/LhRBvoF72u+jlpZdSrnLO7WXgcSHENf24tRBC/A9K0X6M8ste6si+3inyXeBR55o/hzL2p6J6J/tQ3eJrhRCbUBFXP0QpjWTT731NIluAi4QQ1agBzlbUNf0nsEwI8XWUeyAf1RgJSSkf662yAVzjbwG/EEI0AX9APSczgXOklNdJKduEEA8AdwohgsA/UArlXCnl9+NkPtFpeXYA+/q7933wfeBPQojbUJE8R6GU7wNSyp5aqwPlAeB3Qoj3UO/XItSgeTz3ou7vb4QQP0I9X5NRg58/klJu7u/9HYJ88XwfeEAIsQE1xnAa8J+oHvBQ2IB6Zm8RQjyF8jrccRD1rAOKhRBfQOmZRcCXE8o8CLwvhLgL5fefgRrDhK735n9RevQPQojvoRomE4BzgL9IKd9CjZW8L4S4B6WnquPq6Zu+BhSG8kG9AP+H6r40Az+j59DS76O6X50hiN8EWp3fC5wTfiCh7v9DvVC5dA3g9PS5qh8ZXY5sK+K25aEGu/7ZQ/mTUEqmFeVeWAM8hBMih/I1v4VSsFtRN/yfwBNxdWyl+wBtJWrg9yl6iSqJK3sdKmy0ha7Q1fMTylyOUn5hVFf9LzhRDajW0wpUy3Ud8FkSBjN7kO8Vegj3TDjm7cDWgd7XPs7vYI5VgFJWzfH3HPX83ec8JxGUK+/vqHEa6BpAXpRQ/0Cu8QWohkKHU+5j4I643wXKlbTOOXYd8Lu43491jhFkAKGlCds+D8iEbVeinsUIyp9+DwMILR1AvTc59QVRz/GVHBhaOgcVlrrfKbcR9a4XMID3dwB6ZCDPgEC5X7agjPNmeg4tvT1h2wED+agon/iQ806XahDlUj3buQanOL+fknhNnO0mcfoHFdZah9Ibf0U1Mrrde7pCS8PO8+V3ysyLK3MESlc0OOW2oYzHlLgyl8TV8y6qkdzvAHJnSOSIQQjxS2CulHLecMuiSR76vmo0g0MIcQVdc5maUn28YU1HIYQoR/nn/43qin8aNbGlpwESzShB31eNZvAIIb6Gemf2ocYrfoDqUabcEADD2zMQQpQCz6IGodJQ3bZHZB8+3bGMEGIVvYd5/kZKef2hlOdg0fdVMxCEEJejXEa9MUsOPBhk1COE+DUqiq3TvfY88B05tEHwgR9/pLmJDmeEEEeg0iH0RItUEUMazZjACf7oKzpnq5TyUCVJPOzRxkCj0Wg0YyJrqUaj0WiGiDYGGo1Go9HGQKPRaDTaGGg0Go2GEbbspd/v/yVqQYf6QCAwu5+yk1DTrfNQM4lvCwQCf029lBqNRjP2GGk9gydQ070Hwu1AIBAIHI2afv2TVAml0Wg0Y50R1TMIBAKv+f3+yfHb/H5/BfBj1IpJHcC1gUBgLSrXRo5TLBeVXEyj0Wg0B8FI6xn0xM+AGwOBwDzU6mCdPYA7gc/7/f4dqMRPNw6PeBqNRjP6GdHGwO/3Z6HSDv/O7/d/jJq6Ps75+VLgiUAgMAE4F3jS7/eP6PPRaDSakcqIchP1gAE0BQKBo3r47Qs44wuBQOBtv9+fBhTRfb0EjUaj0QyAEd2SDgQCLcAWv99/EYDf7xd+v3+u8/N21JKV+P3+maiEaA09VqTRaDSaPhlRuYn8fv8zqMUiilALQXwHtWrVoyj3kAf4bSAQuMvv988CHgOyUIPJtwYCgWQtQq7RaDSHFSPKGGg0Go1meBjRbiKNRqPRHBpG0gCy7qJoNBrN4BHJqGQkGQN27RoZ88aKiopobGwcbjH6ZKTLONLlg5Ev40iXD7SMyWAo8pWXlydNDu0m0mg0Go02BhqNRjPctNUZvPiNHB5fUjRsMowoN5FGo9EcTrTVGbz2XRfv/6oUKcGKJMX9f1BoY6DRaDSHmLY6g9d+mM3q32cgJdjm8BmBTka0MZBS0t7ezqGeCxEMBjFN85AecyBIKUlLS8Pr9Q63KBqNZoBICa27DOpXeahb5aF+lYdNL/uwwiPLSz+ijUF7ezs+nw+PxzPcoowIpJS0trbS2tpKUdHw+RY1Gk3P2Cbs3eSm3lH6dbUe6le5Ce53qQJCUjDFYsriMB17DepqvCCG1z3UyYg2BlJKbQjiEEKQk5PDxo0b2bhxI3l5ecMtkkZz2BLpEDSsdndr8Tes9WCGlGJ3+STFM6JMOydEaXWUkuooJbNMvJldno62eoMPf1rM+78ykLYeM9AMEpfLxfLlyznjjDOGWxSN5rCgvdFwWvmqpV9X62HfZjdIpbzT8mxKq6McfUU7pbOV4i+sNDH60bBZJTZLH7Y45voG3nooi53Lh88FrI3BKGUkjmloNKMdaUPTNldM8Xe2+NvqXLEyuRNNSqqjzLwgSOnsKKXVJtnlFuIgGvV1huBid5RHyiRL7m1J4pkMnjFnDNKXLSP7vvtw7dqFVV5O6223EVy6NCl179u3j5tuuomtW7fi8/mYPHkyP/jBDygsLIyVuffee/m///s/brrpJr761a/Gttu2zXXXXcfatWvx+XwUFRVx3333MXnyZACuueYatm/fjmEYZGZmcvfddzN79uykyK3RaA7EDEPj+i6/fl2th4bVHiLtamDXcEsKp5lMPilMSXVUtfhnRUnLS15Ay33ZPt4UkoeyvNzbEk5avQfDmDIG6cuWkXvrrRjBIADunTvJvfVWgKQYBCEE//mf/8nChQsBuPvuu7n33nt54IEHALj//vtZsWIFb731FjfeeCM+n4+vfOUrsf0vuugizjjjDAzD4PHHH+fWW28lEAgA8NBDD5GTo5Z0fvHFF7nlllt48cUXhyyzRqOBUJOgfrWnW4t/7wZ3LKTTm2lTUh1ltr+DkmqT0tlRiqZFcaclT4ZGQ1DjMVjpcVHrcfGRx8VutzI8z2Z6ubktQok9fCnaRo0xyLnjDjyrV/dZxvvBB4hIpNs2Ixgk75ZbyHj66V73i86aRctdd/UrQ35+fswQABxzzDH8+te/BuDHP/4xmzZt4sknn8Tr9fL0009z44038thjj3HttddiGAZLliyJ7Ttv3jx+/vOfd52fYwgAWlpaMIyRFXam0YwGVBinizqnpV+/ykPjGg/7t42LlckqtSipjlJ5RijW4s87wkIk6ZWTKPdPjcdFjcegxuNipdfFHlfXASabFl4kLimxhMCGYe8djBpjMCASDEG/24eAbdv8+te/jin4+B4AQFpaGo899liv+z/++OOceeaZ3bZ97Wtf49VXX0VKyVNPPZV0mTWasYRtwt6N7rgQTtXiDzU5SldICqaaHDHf5sjL25Xir46SWWwnTQYJ7HIpxb8yTvk3OIpfSEmFaXNC2GJONMKcqMXsqEVQCE4ozcJyBhoiQgx772DUGIOBtNxLjj8e986dB2y3xo9n73PPJVWe22+/nczMTK6++upB7/voo4+yYcMGfve733Xbfv/99wPw3HPP8b3vfY8nn3wyKbJqNKOdSLugvocwTiuslKk7TYVxVp0XjIVxFs8y8WZIJyto25BlkMB2V2eL3xVr+e9zFL8hJdNNm1PCJnOiNnOiFtVRi8wedPv3s70H5Owf7t7BqDEGA6H1ttu6jRkA2OnptN52W1KPc9ddd7FlyxaeeOKJQbtzHn/8cZ5//nmeffZZ0tPTeyzzuc99jq9//evs27ePgoKCZIis0Ywa2uqNhElbHvZtcXUP45wdZd7VqrVfUh2lsKL/MM7BYANbXAa1HoOVXqX4az0umg3H+EhJlWlzVshkdtRiTtRmlmmRPsBG/QdeN5GE8KOIECz3ugFtDIZM5yBxqqKJAO677z5WrlzJk08+ic/nG9S+v/nNb/jNb35DIBAgPz8/tr29vZ2mpibGjx8PwEsvvUReXl63MhrNWEPasH+LK9bS7/zbXh8XxjnJpLQ6yqylHbEWf3a5fVBhnL1hAZvcjm/fY1DrKP42R/F7pWRm1OZTwShHRi3mRC1mRG0G9/Z356XG9tj/I2W9hTFlDEAZhGQq/3jWrVvHI488wtSpU/nMZz4DwKRJk/jFL37R775tbW3cdtttTJgwgUsuuQQAn8/Hn//8Zzo6OrjuuusIBoMYhkFeXh5PPPEEIplPvEYzjJghaFjn6d7iX+0m2tEVxlk03WTKyeHYpK2SWVHScpPrP48CGxzFX+NxUeM1WOV2EXQUf5otmWVafC4YZU5E+ferTJvDIQ/CmDMGqaSqqoqdPYxJDISsrCx27NjR42/FxcX8+c9/HopoGs2IIbhfdGvpd4ZxSstpaWepMM4jL+mgZLYa1C2cZuIeSlO7B8LAeo/BFsPirdw0aj0u1ngMQk4jK8OWzI5aXNYRifn4K037sFWKKTlvv99fBTwbt2kqcEcgEHgoFcfTaDSHHilh/zZY/3patzQNLTu71EpWmUVpdZRpS0KxFn/epOSFcXYSBNZ6jG6Du2s9BlEhAItsw8PsqMWV7SqiZ07UZopp4+qv4sOIlBiDQCCwDjgKwO/3u4CdwPOpOJZGo0k9VhT2bVTKvrPFX7+6M4yzQIVxVpiMPy7CMVepFn/JrCiZRckL4+ykQ8Aqt3LxdCr+9W4jFqaZZ9vMidp8qS3C7KjF4qxcchr36mUd++FQ9IhOBzYFAoFth+BYGo1miITbBA1rPNTVdsXwN67zxDJqutMkxTOjzPhUkKnzfWRO3k/RDBXGmWxaBayKi+Gv9bjY6DawHcVfaNkcGbU5M9TZ4reYYEniR9uKsgTDPzw78jkUxuAS4JmefvD7/V8CvgQQCAQOyNEfjAsR1XRhGAaGYYzoNQ3cbveIlg9GvoyHQr6W3bBrhWDnCsGuFYJdHxs0bupSpRmFkvFzJTPOtBk/V1I+16ZoGrjcAB7cbgPTzE2KLPuRfCwkH3V+DJsNcVq9XMJRUuC3BUfZBsdIQTkgDAE+6C28R9/ngSFSuYqY3+/3AruA6kAgUNdPcblr165uG1pbW8nOzk6VeKOWLVu2sG7dOs4+++zhFqVXRkq4XF+MdBmTKZ9tqTDO+IHd+lUe2hu6vOZ5R5ixWbqdaRqyyvoO4zxYGffF5enpjOHf5u5y5Iw37VhLv3Nw92Bn5o7l+1xeXg6QlLDDVPcMzgE+HIAh0Gg0SSIahMZ1jtJ3fPwNa+LCOD1OGOepYScFs/Lv+3JS0zCsN0S3GP4aj4udcYr/CNOOi+pRyr9gGBO2Ha6k2hhcSi8uIo1GM3SC+8QBk7b2bowL48xWi64ceWnXpK2i6SauFKyhIoHdhqDWSczW6eOPT9A21bQ4NmJxTbsa3J0dtUhiRmjNEEiZMfD7/RnAmcB1qTpGIi8ULWG/d9UB2/Mj1Zzf+NKQ60/legadPPjggzzwwAP861//YsaMGUOWWTM2kBJadri6onlqPdStctO6KyGMc3aUaWeHYi3+3InJD+MEpfh3uARvCJs3sn1Oi9+gMS5PT6VpszBscmRcnp5srfhHLCkzBoFAoAMo7LdgEimJzKPZswFbdGUpNaSXksixSak/lesZANTU1PDhhx/G0lJoDk+sKOzd4GbrNoNN7+TEWvzhFicTpqHCOCfOj1BS3e60+E0yCpMfxglK8W+NT9DmtPqbDAMwcbm9TDdtTgubHBmxmB21qTYtUhBcpEkho2ay3bs5d7DP0/d6BhYRbKLdttmY7PXU8rfCz/W6X0F0FvNbhnc9g3A4zDe/+U1+/OMfc9FFF/Uri2ZsEG7tDONULf36Wg+N6+PDODMomWUy83wnG+fsKMUzTDwDzYg2SGxgs9voNri7yuOixUnX4JGSGVGbc4Mmc6IWJ2VmU9awj55TLmpGE6PGGAwEF17SrRKCrnoQEqQg3SrGRfIdpMlez+D+++/ns5/9LJMmTUq6rJrhR0poqzMOyL3ftLXrFcwotCiZHeXYL7ZRUm0yY1Em5DdipGiarAlsjOXpMWKKv91R/D4pmRW1OT8uQVtV1O72NhVlGDqG/yDwnHs82xe28Ma322n32lCutifLpX0wjBpjMJCWO0CHUcfvS0/AIowLH59u/DsZdknS5UnmegbLly/n448/5pvf/GayxdQMAyqM0x1r6Xf6+Dv2xoVxTlbZOOf4O2It/qzS7mGcRUWZJCsiMgqscxvdBndXu12EHMWfbkuqTYuLOyLMdnz80w6TBG3DQfEKi/w1GRz5eAYrr+7gjW+3EyxzJ82lfTCMGmMwUDLsUirbL2Zd5pNUtl+cEkOQ7PUM3nnnHTZt2sSCBQsA2L17N5dffjkPPvggJ598ctLl1ySPaFDQsLb7alsNa9xEg11hnMVVUSrODFFabcaycfpSOJIapitPT+d6u2s8Rix/fpaToO0/4hK0Veg8PSklJPbR7NlAk3s9Te71XAi4HVfgUY8po1BzVYT866+A4uGRccwZA4C5bTfT5FnP3Labk153KtYzuOGGG7jhhhti3+fPn8+vfvUrHU00wujYZ1Bf66ZudVf8/r6NbqTtuFVyVDbOuZd3xCZvFU5LTRhnJ0FgdZybp8brYp3bwHQUf66j+L/QHnFSMttMsWydpydFhIx9NLnXOUp/A02edTSzgfSt+yiudVNS42FWjQ/i+lwuU+Ay4eif+Yh8/AB7f9+7ezmVjEljkGGXcs7e3ye93lStZ6AZPtrqDN58KItdH3i5+iXlk5ESmrerRVc6W/v1qzy07u5qO2eXm5RWm1SdF4rF7+dOtJK66Eoi7U6envgJXBviErTlO3l6To3L0zMpIU+PZuhIJCFjr6P0N9DsWc9+93qaXevx7N5PyYduims9VNWkUVqbTv4aN66wau5LIbAmTwS2x+ozPRLphnb/pwjd/F/DdFZj1BikilStZ5DIu+++e1DH0AyctjqD177r4r1flSItsE3BP+/IiQ3sRlq7wjgLp5lMPCEcU/ol1SYZBakJ4+ykRcAqYfNGpjc2wLvJbSAdxV9sKffO2SEzpvjLteJPKhJJ0Gig2b2eJs/6rta+ex3G/iaKaz0U17qpWJlOaW0mhavS8LR0uaXNcaWYMyoIXl1BtKoCc2YlZuVkZHo65ePnIb0eLMPi46vb2HPLuRyTPrBx0VShjYHmsKJ1j8E/v53LhhfTkBbEp3VZ+XQGxbNMqi8MxhZdKaqK4klx3OR+gcrP4+3KzrnV7QJMyE1jnGUzJ2JxQTAaG9wt0+kakoZS+nVK2Xs2xFr8TZ71yI5mila7Kalxc0RNJgtqMimszSatvstFbOflEJ1RSWRpJe1VFZgzKohOr0Dm5fR6zEj1dCLHzqXhq+ezevadLKz/uorrHUa0MdAcFoRbBKuWpfPKPTmxHD2J3LxuT8rCODtpdBK0xS/C8klcnp6JToK2izuinJiexRF791OkFX9SkEg6jD1xrfz1NHtUa980mylc56a41s2EmiyOW5lN4ap8sramxfa309MwqyowT62guVPpz6jELilisP7BxpdUlh4fcIn5Lxrt4Q/Q1cZAM2aREnYu97LiqQzW/ikNM2RQOD1Ken6UPR97kZLY5C4g6YZgj5OgrbZzApfXxe64PD2TTYujohZXdESY7QzuFsRlES5KM2jUhmDQKD9mkPQAACAASURBVKW/i/2e9TS71xNyfcKeohU0uTcQpZW8LS5KatyMW5nNvNocCmuLyN6QiWGqprl0uzAryjGPrKDFX6mUflUF1qTxMMjowdGENgaaMUdwn6D2uQxWPJPB3vUevJk21Z8NMvfyDsqOjCIEtNUbfPjTYt7/lYG0uxuFwSKBXYZw0jR0De7WO4pfSEmFabMgbDEnqgZ3Z0ctUpQk9LBBYtPu2hVr5Xe2+JvdG4iKNrJ2GxR/4Ka8Jo+qlTkU144jZ20urg4zVoc5KZ/ojEraz+hq6ZtTjwDv4TfDQhsDzZhA2rD9LS8rns5g/d/SsSKCcUdHOPv+JmZ+Jog3s7vmzSqxWfqwxTHXN/DWQ1nsXD6w+E8JbI/P0+P4+PfFJWibbtosjkvQNitqkaUV/0EjsWlz7ThA4Te512MaHaTtFxTVuClfkc+cmjyKaieStzqIZ38oVodV4sOsqiR4WQXRmZXK3TN9KjIzYxjPbGShjYFmVNNWb1AbUL2Apq1ufLk2R32+nbmXdVA80+x3/6wSmyX3tvT4mw1scRnUegxqvF0TuJqdWbtuKakybZbEInpsZpkWKUobNOaR2LS6tjvRO10Duc3uDZhGEHcHFK1xM35lIbNq8impqSB3dZC0XW2xOuxsi+iMKUTOq6R9RgVmVQU5C4+ncbhHZ0cBY9IY1BmCL+en8+j+4EGvjtQTqUxhPX/+fHw+X2wi27e+9S1OOeWUpMk+lrAt2PKqj5VPZ7DxH2nYpmDigjCLvtrK9HODA4r+qTMEF7ujPGIISmyJhUrQtjJuAletx0Wbo/i9UjIzavOpuDw9M6J2bystavrAxqLNtb3bQK5q7W/EMkIIEwo2uJiwspiqFQWU1FaRtzpE+uZmhDOmIn0dRKdNwTyhkhbHvROtqsAuLz1wMLeogKTl9RjDjElj8FCWl3e9Lh7K8nJvSzhp9aY6hfXPfvYzPeu4D1p2ulj52wxW/jad1l1uMgotjr22nbmXtlNQYQ2qrrtyfLwpJJcWpJMFrHK7CDqKP82WzDItPhuMOimZLap0np5BY2PS6trWzbWz37OOZvcmbBEGCbnbXExcUcK0lUWUrKomvzZCxob9GBHVq5NGE+aUiZgzqmm9QLl3olUVWJMngHtMqq9hY9RczTtyfKz29B/uEQE+9LqQQvBkppdaT/85S2dFLe4agNFIZQprTc9YUdj0zzRWPJ3B5n+rdvjkxWFO+04L05aEBpXqYbch+GO6h0C6h7Ve9Syt9bg4KtK55KLy8Vea9uh5MUYANlFa3NuUeycuRl8pfbW2SEa9wcQVZcxbUUxJ7VEUrIqSuXYfrrYQaiSmAXN8GWbVdNpPdpT+DDVJizTd/zoUjLlnfkdc6J50vk+1ku8vTHYKayCWn+i4447jtttuIzc3N8lSjx72b3Wx8pkMagIZtNe7yCqzOOG/2jjykg7yJg28F9As4K/pHp5P9/CW00gotGxcUmIJgReYO8DGwOGOUvpbukXvtLk3s2/cemyh1hHxtggmfVDOUTUllNYeR0FtlKw1+3E3tqJGYeqw8nMxZ1QS9J+EWaXcO2bVVGRO9rCe3+HOqDEGA3lZ6wzBCaVZsSn7UgiaXfCTxuSOHUByU1gDLFu2jPHjxxMOh/nOd77D7bffziOPPJJMkUc8ZhjW/z2NlU9lsu1NH8KQVJweZu5lTUw9LYwxwKc1BPwrzc0f0j38K81NWAgmmxb/3RZhUcjk0qKMWD6fiBA8m+nl5rZI0p+R0YpFhBb35m4zcpvdG2h2b0YK5b5xhQSTVk3gyNXjKfzoBApWWWSvacL7yV7AAnZjZ6RjVlUQPvNo2qsqVUt/RgV2UcGgJ2lpUs+oMQYD4aEsL4mvs+1sT+bYQbJTWAOxpS59Ph9XXnnlQRmZ0UrjBjcrnspg1XPpBPe7yJlgctL/tDDn4g6yxw2sV2cBb3tdPJ/u4a/pHloMQbFl8/n2CEuDUeZGbQTwjRzfIXlGRgMWYVrcm1WStbjB3Bb3FqRQvS9hCsZvmMisj8dRVruYwlU22aub8W2uR1hRYCvS48asnEx03rGELq8kWqWUvjVh3JiepDXWSJkx8Pv9ecDPgdkoj801gUDg7VQdD+ADrzuWs72TiBAs97pRWd6HTipSWHd0dGCaJjk5OUgpeeGFF6iurk6KvCOVaFCw9k9qLGDn+z4Mt2TaWSHmXt7B5JPCA1rEXQK1HoNl6R7+mO5hj8sgy5acE4qyNBhlYdg64AE/FM/ISMMkRIt7U/dka551tLq2dSl926Bs+yRmrChnXO1UCmslOatb8K3fjRGKANtUxs0jJhCdUUXbuecSraoka+GxNOZlg0cPr492Utkz+BHw90Ag8Dm/3+8FUj6746XG9pTWn6oU1g0NDVx77bXYto1lWUybNo177703pecyXNTVulnxdCarn08n3GJQMNXklNubmX1RkMyigfUCtroEzzvjAJs8LjxSclrI5M5giDNCZp/r8cY/I0VFRTSOoZBDkyDNMaXflV5ZKX11bYV0UVw3maqV4ymrmUFRrSBnTSvpa3djNLcCmwGwyoqJVlXQccUix71TiTltCjKj+9XNKirSYZtjhJQYA7/fnwMsBq4CCAQCEVSgz6gmVSmsjzjiCF56aXjWPT0UhNsEa/6QzoqnM9izwovLJ6k6L8jcyzqYuCAyIPdxgyH4U7qbZekePvKqx/aEsMl1TUHOC0bJO4zc/aYIOpOxulr5Te4NtLq2qbW/ASHdFDRPpmL5EZTXHklRDeSubid97R5cdY3ABgDs3GyiMyoJnn+WGsh14vVl/uEbvHC4kqqewVSgAXjc7/fPBT4AbgoEAt2a7n6//0vAlwACgQBFRUXdKgkGgykSb3RjGAaGYRxwvUYSLpebjs3FvPtLg48DBpF2QVm1zfkPmBxzmU1GgRvoPcUvQCuSPxo2zxg2LwuJJeBIW3CvaeC3DSYKr+pvHmSf0+12j+hraLtDmMXbaBSr2SvWsNf520yX0jekh8JIJVWrZzKhZiFFtQbZq9rxrdqJ2PIJQq4FQKanIWdOQy45BbO6Cul8KC9FCIEHDmoexUi/hjDyZRwp8qXKGLiBY4AbA4HAu36//0fAbcC34wsFAoGfAT9zvsrELrtp9p9O4HDEtm1s2x6RLo5Qk2DVsgxqn81mT60HT7rNjPM7mHtZB+XHqCRxHTZ09CJ6BHjF5+b5DA8v+dyEDMFE0+bLwSgXBqNUmV2upKGe/UhxE0VFm7NoynonPl+tnNVu7KBzfUpDesmNTGXCpuksWDmfolo3eauCZKytw71pGyL6EQDS5cKcOolwdRXRpec6ydemYU0qB1cP83T27h2S7CPlGvbFSJUxfdkysu+7D7FrF0Z5Oa233UZw6dJB1VFeXp40eVJlDHYAOwKBQOeSXc+hjIFmDCIlfPKOShK37i/pWGHBhGNszrqviZkXBPtd/N0G3ve6WJbu4c/pbpoMg3zL5uJglAs7ohwbtcbECl4R0RLLtdPp2mlyr6PdvStWxpA+cqNTmbT9SCrWn0XOBzZ5q0JkrK3HvW4zRsfKWFlzYjlmVQWhM05y3DuVmBVHgC+Fiy5rkkL6smXk3norhuP9cO/cSe6ttwIM2iAki5QYg0AgsMfv93/i9/urAoHAOuB0YHUqjqUZPtobDWp/l87KpzPZt9mNN9vmyEs6mHtZO9Wn5NHY2NHn/mvcBs+ne/hDuoedboN0W3J2yOSCYIiTw+aoTf8QFs0HJFtr8qynw7U7VsYl08g1K5hQfyzjawspqfWSvypMxpp63Gs34dr/YaysVVSAWVVBx6UXOLn1KzGnT0FmZw3H6WkGg5SIlhZcu3erz549GLt3k/XoozFD0IkRDJJ9331jyxg43Ag85UQSbQYOn8D5MYy0YevrPlY8lcGGl9Kwo4Lxx4U598ZWZnw6hKeflJ07XII/OAZgjceFS0pODpt8ozXEkpBJ5igaCA6Lpu7J1jwq/06Ha0+sjMtOI8+cxoSm+YxbXUJpjY/8VREy1jbgWbsJ9673YmXtrEzV0j/3NMyqCjLmz2NvWZGapKUZedg2RmNjNyXviv842xKVPnDAXJdOXLt29fJL6kmZMQgEAh8Dx6aqfs2hpXW3Qc2zGaz8bQbNn7hJz7c45iqVKrpoet9jO/uE4M/pakbwuz71yB0bMbmnKcinQyaFh3Dm7wtFS9jvXdW1wXG55keqOb+x54iukNhHs2cD+52ZuJ0unqCrPlbGbaeTa06nvP1EyteXUVqTRv4qk8w19XjWbsa19T2E7ayk5fVgVk4hsmAeHc4qWuaMSqzxZd1m5qYXFWGPQF/3YUEkgquuDtfu3V1Kfs+emKI39uzBVVeHSBjXlG43Vmkp9rhxRGfNwjr9dKyyMqxx47DLy9X/paWULFqEu4fIRCuJYwCDZUzNQNYkF9uEzS/7WPF0Jpv+5UPagiNODLP4Gy1MPzuEu485dx1IXkhTA8Gv+NxEhWBa1OLWlhAXBqNMsoanC1ASmUezZ0MsgRqowdmSyLGEjL1OK3+d4+JRLf6Qq0shu+1M8szpjA+dTNmWCZTWplFQa5G5pgHPus24N76PCKu6pWFgTZ5IdEYFwfOXKKU/sxJz8kSdcXMYEW1tqtW+a1eXgnf+Gp3/92CE7fR07HHjsMaNI7JgAZbzvz1uXJfCLyoa0Kzr1ttu6zZm0Fl/623DN7Q6pp/ItjqDNx/KYtcHXq5+aegtrFSuZxAKhbjzzjt5/fXXSUtLY968efzwhz8csswHQ/MnLlY8k0HNsxm07XGRWWwx/8sqSVz+lN6TxJnA6z6VEuJFT5S2ggzKLJsvtEe4sCNKtWkP60CwRDKj/So2ZP6223Ybky3pL7Au61exbR47izxzOhNDZ1C0ZwJlNVkUrrKV0l+7Cfe6DzHaXo+Vt8aVEp1RQXjxfKelX0G0cgqkp6E5REiJsW/fgS35piYKtmzpUvitrQfsauXnYzsKPTp3rlLszvdORS9zcpKWU6lzXCD7vvtw7dqFdZDRRMlkTBqDTiNQ+2zmAYueD4VUrmdwzz334PP5eOONNxBC0NDQkBSZB4oVgQ0vqfQQW19TTf6pp4Y583vNVJwRwtXLaK4EPvS4+EO6mz+me2h0GeTaEr9tcM7+VhZELFKdncbGImQ0EnTV02HUEXTVEzTq6HDVEzTq6XDVETTqCboauOqYbM48wccb3zZpH2eDhDS7gImhJRTuP4Ky1TkU1lhkrtmLZ90m3OtqcDW80nWsvFyiMysJfu68WEs/Or0CmaszbqYU08Sor+/mj4+5azq/79mDCHdPKSINA8rKMEpLMSsrCS9a1K0lb5WVYZWVQfoAVkRKMsGlSwkuXTpiQl9HjTH45x051K/uO77EikDzDhft9U48tewyAk9/rrCXvaBkVpQz7up56cN4UrWeQXt7O8899xzLly9HOC2P4uLifuVJBvs2uVjxTCa1gXQ69rrILjc58b9VLyBnfO+9gI1xkUBb3QY+KTkjZLI0GOLUkMn4oiIaI4NbcCYRizBBV0OPCj7oqqPDqCfoqidkNMbSLcTjs/NIt0pJt0vIjUwl3Sql7ONlFK/2MfeXGWxfFGHfTMnM9SfiW7MW9ycvx/a109NUxs3TToxl24xWVWCXFOmMm8kmGOzdXdM5EFtfHxtz6UT6fDGlHjn66JgLJ17R2yUlFJWVjQhlO9IZNcZgIOxd7yHcKuAQOCOSuZ7B1q1byc/P58EHH+Stt94iMzOTW2+9leOPPz4lspshWPdXlR7ik7d9CJek8swQcy/rYMopYYxe1hDaYwhecHIC1XhdGFJyYsTiv1rDnBOKkjPAYYCo6CDoKPiO2F+l4GMteVcdYaPpgH2FNEizi0i3SsiwSyiMziHdLiHDKiHdLnW2l5JuFeOKW5TS2Lsf3+vvAstwRWxAMPVfPqb8C+zcNYQXHNMVujmjEmtiuc64OVSkRDQ3d1fqPQ3GNh14n+2cnJhSN6uqYv75mG9+3Djs/HxtmJPIqDEGA2m5t9UbvPVQFjXPZiLt7u6hy54b2kzLRJK5noFlWWzbto3Zs2fz7W9/mw8//JCrrrqKN998k+zs5LkfGta4WfFMBqt+n0GoySBvssnJ32hh9kUdZJX2nCSuWcDf0jwsy+haHGZuxOLO5hCfCUYpdSKBJJKwaI4p+HojSH3mppiLpsPVpeyjRtsBxzGkh3SrhHS7hBxzCqWR+T0o+BLS7EKMgTy2oTDe99/D9/o7+F59B2/tuh6LCcBoacXV3ELTTV8Y8LU87LEsjMZGxLZtpK1Z0+WuifsYe/b0GFZpFRdjlZVhTpxI5LjjelT0MjNzGE7q8GbUGIOBkFVis+TeFhbe3NarUUgGyV7PYMKECbjdbi644AJAuZ8KCgrYvHkzc+fOHZKskXbBmj+ms+KpDHZ/5MXllUw/RyWJm7Qw0mOq6BDwcpqb5+MWh5lkBvlCcCMnR1dQKNYTdNezOree5XH+eEuEuleUq0IuOxV6QXQW6dap3VryGZb6zSfzEUPp0UmJe+1GfK+9i++1d/C+8yFGKIx0u4gcdxQtt36Z8OL5FH/qyq5dvF4wBO0Xf4a2m7948Mcea/QUVpnop48Lq+ycBSHdbuWaKSsjOns21hlndLlrysvV/yUl4NUzpEciY8oYdJJoFHYuT97Dl4r1DAoKCli4cCGvvfYaJ598Mps2baKxsTEWaZRIm+sT3nbdw55y1ZJNjJGXEvas9LDiqQzWvJBOpM2gcFqU077TTPXngmQU2NhE6TAaYi6aNlc973kz+KdnGu+6j6JDZJAtG5jHcxzJs0xwf4Bww3bUx2vnxlw1JZHjDnDVjM+pIrTXi0dmDU3J94FR3+go/3fxvf4OrnrV+4tOm0LH5RcSXryAyAnzkJndM9lJrwcMA/tKPw3Xf16NAxwm9BhWGe/CGUhY5QknxFrxmdOn05SVpRT+AMMqNSMTIeWImfIpdyXMvmttbU2qm2SorFu3jtNOO42pU6eSlqZCBgeznsGMGTOYMGFC7Jw61zMA2LZtG7fccgv79+/H7Xbz9a9/ndNOO63Humq2v8Zz677FlCs3Y0gv09ov5YSWewm3CGqWKVdQY20mrnST8gtWUXr1v/AufFcNwjot+ZCxDykku5jLCi5mJZ+llXJ8spWj5assMt/jGHMP2VZRbBC2S9kX4+5z1YDUJAcTwSDedz+Ktf49azYCYBXkET5pPuGTFxBedDz2+LLe5VpyKZFj59J28xcpmDVjRA8sDuoa9hZWmRBx019YZfwnPrSyt7DKkRIJ0xcjXcahyOckqktKa2tM9gxSRarWMwC1psFzzz03qDqt3WW03HUH7/zjPD5e+A6tz52NDGbgPuojsn/8GGmXPU0kr5kd0k26rZR6ljUBK7qEj4zTeM1zNNtd+bilzcnhdj7b0cqSkCSdxajlKIYZ28a9egNpr72D77V38b73ESIcQXo9yvXzjRsIn7yAaHXVgFukjS89k2KhU4BpYtTVHRhxE9+yr6vrMazSLilRg7DTphFevPiA2HmrtHRYwio1Iw9tDEYjlovQHz9N4xd/CKYbMDDriii75D2mXFFD6ZEmGfJEMiIXkr6nlDS7gL2Giz+mq3GAD9O6Fof5SqtaHCZ/hHQQjd31+F57B9/ryv3j2rsfgOiMCtqvuIjwyQuIzD/6gBW3RisiGDwwjNL5625ooHTHjt7DKh2lHpk3r3vLvnMgtrhYz3TWDBj9pIxCrB0TsT4+CsyusZCbPmzGmzkdmK5GgIE2AX9JU6Ggr/tcWEIwK2rxrZYQ53dEGX8IcwL1hugI4n37gy7Xz3pn2cWiAuX2WbyA8EnzscsOzbyLpJEYVpk4Qaq/sMpx42DiREKVlQe25seNQ+qwSk2S0cZgNCIh0U3oddJ99rQ4zATT5j/bIixNWBxmWLBtPDVrVev/1XfwLl+BiJrINB/h44+i4+JPEz5pAebMypQNRnYuKuLatYuSg0kD4IRVHjBBKm5Qts+wynHjVFjl8cd3nwmbEFZZVFRE8wj2dWvGFtoYjEYE2K7urfp3e1gcxh+MsjQY5djI8C4O49q5W7X8X30H7xvv4drfDEB01nTav3gZ4cXzCR931CHJ49PvoiLhsAqr7CMlsau+vudslZ25bWbPxjrzzG4KXodVakY62hiMQuzxNg1H2xTvkAgLRESwtCiTdFtyVsjkwmFeHEa0tSPe+ZicP7+kXD+btgFglRYRPuMkpfxPmo9d3HuKkKRj2xh795Jz9909LiqS99WvknPnnbh6WAbSzsiIRddEFi7sasmXl8dcOHZhoQ6r1IxqtDEYhbS7BVvPtXjnRx0cf5eHaW8aPLK/g7OGa3EYy8KzYjW+V9XAr/eDlQjTwpXmI7LwWDo+/1nCJy/AnD41+X7uUAhXQ4OKtqmvV8nMEv82NGA0NCCsPnIlRaOEzj67x7TEMjtb++c1Y54xZQyKllxKZN6RtN38RezS5A84piqF9SeffMI111wTK9vS0kJbWxurVq2iJzr1fbjUpuT/a+PO5hAc6J5OKa7tO5Xyf+1dfG++h9Gs4tcjc2bQdv1/kPbps2iYNvng1uOVEtHU1KXkGxqUUq+rw2hoUH87lXxz84G7GwZ2URFWSQl2SQnR6moVYllSQvaDD+Lat++Afazx42keppThGs1IYEwZA++q9Xg2bCHz2T/RfvGnk24UUpXCeuLEifzjH/+Ilbvjjjuw+mrFOlhC8McMD99oDVOS4sgg0dKK7833Y1E/7q1qzoQ1rpTgOacRXjyfyKLjsQvVzGpfUREkDn5Go2rgtbcWfKeyb2g4IGYewE5Lwy4txSopwZw+nfBJJ2EXF6uVpRxlb5eUKJeNq+dsezI3d8QtKqLRjARSZgz8fv9WoBWwADMQCAxpCcycO+7Hs3p9v+VEJApA5pO/J/PJ32OVFGJNKAdv7x706KzptNz1tX7rTlUK63gikQjPP/88Tz/9dL/yANjAQ1le7m05UHkOCdPE89Gq2IQvz0e1CMvCzkgnsvBY2q+5RLl+KiYjOjow6upwb1iH8aZS7q62NvK2bu2m7I19+xA9zHi38vOxS0uxi4uJTJ2qlHunki8uVkq+tBSZlTVkd81IXFREoxkJpLpncGogEBiW2LhOpeOqa8ToCBGdXZXU+pOZwjqel156ibKyMubMmTMgOSJCsNzrBoZoDKTEtXUHvlffVhO+3nwfo7UdKQTm1ImETj0euzQP6QFXYwNpf/kdmb/8X6XkOzoOrM7jwVtcjF1SgjlxIva8ed1a71bn/8XFhzzCZqQtKqLRjARGjZtoIC338vHzYv93JiPrzEiZ7GRkyUxhHc+zzz7LJZdc0uf+k3fs5Nt3/4DPf+u7g2/Vdg641tfj3rwV77sf4alZj3vLTox2NVtNGoAMAiGEjODZtBvPJrW7nZ0da7VH5s7FLi6OuW7ilX1BZSWNPfjmNRrNyCSVxkACL/n9fgn8XyAQ+FkKj9V10BQbAUh+CutO9uzZw9tvv82PfvSjvisxTYSU3WLkQ6ed1n9UTX0DRnM74AO8gAcQSGzwCqxxWZhHlGFNGo9VWhpz08QreznQPDY6zFKjGVWk0hicGAgEdvn9/hLgH36/f20gEHgtvoDf7/8S8CWAQCBAUVF3xR3sYQZnX0Sqp8cyUqYqLXEqUlh3EggEOP300ykoKOhh754xgkHybryxx0ll0pcGhSVIbxYi6IKOLCANKQRy+hTkScdjn30q8oyTY8nKBOqhGOqD4Xa7D7ifI42RLuNIlw+0jMlgpMh3SFJY+/3+O4G2QCBwfx/FDusU1gCLFi3i7rvv5tRTT+2zru2vvca6b32LKzerPD4SaLnjDjUIm5aBe/sePDXr8b7zIe5ddQCYkyd0pXleeFzKF3AfDf74kS7jSJcPtIzJYEynsPb7/ZmAEQgEWp3/lwB3peJYh5JUprAGeOONNwZYW/d7bxeVY+yLkP6H5/DUrEVIiZ2bTfjE42j7ry8QXjwf64gJByW3RqM5PEiVm6gUeN7v93ce4+lAIPD3FB3rMMQA0oB8JF5cjTZZP32SyDFzaL3lOsKLFxCdO1OnL9ZoNAMmJdoiEAhsBoa2eK+mH9xIfFjjimm59SuEzzkVmZ013EJpNJpRig75GIVIZ7axAFx7Gsl89o/aEGg0miExov0IUkqklAidJCyGlBKzrQ3pciHTfLEQWo1GoxkKI9oYpKWl0dHRQaaz2MfhjpSS3Str2fGrZ+CoauoC/5OyEFqNRnN4MaKNgdfrJRKJsGnTpkFP7hosoqMDY/9+iF9r1jCw8/ORGRkHlDdDguB+A9tUq4yl5dmIJIgoQmGMphaIRpFeLzIvB+nzIqXE3rSVLa+8TvOnTmfijCptCDQaTdIY0cYAVEimaZq8+eabRCKRlB7LvWYNvjfewGhtxc7OJrxoEebMmd3KRNsFm1/10bjGQ3qBRcUZYXInWrB9aMd21TXie+0dXNt3YufnEl50HOb0CtjdvZw4bi752dksWbKE5h7SN2s0Gs3BMGhj4Pf7RSAQOKRLqOTl5XHeeeel/kAXXgj0PAlE2rDimQxeuTcHd1BwwY2tLPhKG+7BTUI+ANf2nWT/4Mdk/OFFrII82v77Wto//9k+s6wCeDzDtY6ZRqMZixxMz+BB4L+TLchIpnG9mxe/nsuO93xMOiHMkvuaKKzsf72BvhD7msh++JdkPvEs0uWi9cZraPvyFcickTPjWqPRHD4M2hgEAoHDxhBEg/D2w9m8+2gW3kzJuQ/uZ7Y/OLSU+sEQWb/8LVn/+ziirYOOiz9N6y3XY48rSZrcGo1GM1j6NQZ+v/8XgUDgC87/AngsEAiM+VjGNS/4+Put+UTaDGZf1MGp324ho9Duf8fesCzSf/9Xcn74KK7ddYROX0TLN2/EnFGZPKE1Go3mIBlIz2Bq5z+BQED6Ipz9IgAAE8RJREFU/f6KFMoz7DSsdfOL69w0biykc7Xh8x5qOvgKpcT3ytvk3PMwnjUbiMydxf6H7yKycEgLv2k0Gk1SGYgxaPT7/V8E3gJOAPamVqThoa3O4K+35LHl3/EjwkOb7OapWUPO9x7G98Z7mJPGs+8n3yf06TN0rn+NRjPiGIgxuBK15sBXgHXAFSmVaBgItwmeOKuY9oaeF1EfLK5PdpH9w5+QsexvWPm5NN/1Ndr/43P9RghpNBrNcDEQYxAG9qAWtn8UOBp4P5VCHUp2vO/hLzfl095oUDo7wt6NHqQNVmTwvQKxv5nsh39B5hMBMAxab7iatq9cqSOENBrNiGcg/orfAHnApYFAwAS+n1qRDg1WBF67L5unlxYhJVy+bC9XvdjIdW/XcfzVNu40ics7wOkUoTCZj/6a0hPPJ/Oxp+lYei51rz9P6zdu0IZAo9GMCgbSMygOBAI/9TuLE4wFGje4+fONedTVeJlzSTun39mCL1sp/qwSm6UPWxxzfQNvPZTFzuXe3iuybdKX/Y3sH/4E9849hE47UUUIzZx2iM5Eo9FoksNAjEG93++/GEj3+/0XckCChNGDtOGDxzN59d4cPBk2F/5iH9PPDvVYNqvEZsm9Lb3W5Xv1bXK+9zCe1euJHDmTxge/Q2TR8akSXaPRaFLKQIzBNcAXgQ+BCcC1KZUoRbTsMvjrV/PZ9rqPijNCnHN/E5nFg5834K5dS849D5P22ruYk8az/8f3EPzMEh0hpNFoRjUDMQYzAoHA//r9/hLgKmAysDaVQiWbNS+k8eI38rAjcNYPmph7ecegZxG7duwm+wc/If35vyFzc2i+8xbar/gc+PpwI2k0Gs0oYSDG4AHgdNSC9q8Cj6PmG4x4Qk2Cf9yey+rnMyg/JsKnHt5P/pTB5RQSTS1kP/JLMn/5WzAM2r58BW1fuRqZqweGNRrN2GEgxiDD7/f7AF8gEHjG7/dfP9DK/X6/C1gO7AwEAp86WCEPhq2ve/nrf+fT3mBw0v+0sOCGNow+zrZoyaVE5h2pVg0rKlIRQk88S/bDv0S0tBG86FO0fO167PFlh+4kNBqN5hAxEGPwG+AF4Dt+vz8N2DKI+m8C1gA5ByHbQWGG4NX7clj+WBYFFVE+/8d9jJsb7Xc/76r1eDZsIfPZPyEXHkvJmvW49zSoCKFv3Ig5S0cIaTSasUu/xiAQCPwY+HHcpqsGUrHf758AnAfcA3z1YIQbLHW1bv50Yz5713s45uo2TvlWK570gS+9ICLKaIh/v4kQguCZi2n+wTexS4tTJbJGo9GMCFK50tlDwK1Ar851v9//JVSqCwKBAEX/f3v3Hh5Vfedx/J0wCSCgBKIr42V1vVVF64VtLbqK16XqYp9t96ugq6ul1MvWemGLUC/1ca0utlZqkT550IWKVb4iXeyWWlwtouulK9iKW8u2VR/lIhCIKAiEkNk/zomMIZNMMvPLOYHP63l4kpnMnPORR/KZ8zvn/H61XVvGsXk7PHV7Jb+e3IvKXjD259s44pxqYHCXtgdQkcvR55nn6X3trTQ983iXtxNKJpPp8t9Xd0h7Pkh/xrTnA2Ush7TkK1gGZlbj7g1d2aiZnQ+scffFZjai0OvcvQ6oix/mWq8uVowVS6r42Vdr2LQmAxU5mpsqGHzCWjq1qeZmsnkPc9XVUFnBpgtHsfG6sTR3IVdoba3GliZpzwfpz5j2fKCM5VBKvmw22/GLitTekcFdZlYD/BFYALwYT0dRjJOBUWZ2LtAH2NPMZrn7JaXF3WHj6koW/dsAls7OW6w+14VZRnM59rp5cvRtr0qoqqL5MmPtlZdowXkR2W0ULAN3vxLAzA4DzgbGxYvbvALMdffl7bx3IjAxfv8IYHw5iwDgyatreO+VakqaZjqXY8Bd99Nv5uNsH1zD5vPOZOP1X2PQUZ9J5dGAiEgoxZxA/iPR0cED8aWinweGAAXLoDuMmtbAi/f1Z+nsfl2eZbT/Dx9iwNSZbLr0K2z47k2Utp6liEjP1akTyO6+nWiRm868ZyGwsDPvKUbL3EHDr9vYpVLoN/2n7Dn5AT7+8nlsuHOCikBEdms9fkKdllL4+kurOXb0JvY5urHD9/R9bB573fZ9Np97Bh/ce6vmFRKR3V6HRwZmNtLdnzKzQ4Hrgdnuvih8tM7paJbRFn3mLWDg+DvYMuILNPzoTsiEvLpWRKRnKOYj8fj46ySiu5G/Hy5OWL2fXkTNtTfT+LnjaJh+jyaZExGJFVMGA8zsQGC7u78EbAqcKYjqF37DoK9PYNvRR7B+5n3k+vZNOpKISGoUUwZ3Af8K3BPPTfRy2EjlV/Xq6wy6/AaaDj6AdbPuJzegf9KRRERSpZgB84OBm9x9Zfz4poB5yi7zxjIGX3otzfvUsu7RB8gNGph0JBGR1CmmDN4CJpvZXsDPgTnuvj5srPLI/OltBo+5huZ+e7Bu9jTdUSwiUkCHw0TuPi++e/gy4CzgHTN73MxOCZ6uBL3eXcHgC6+GykrWPTaN7fsPSTqSiEhqFXNp6ReBi4AaoiODcURzQMwDTg2arosq31/L4IuuomLLFurn1LH9kL9MOpKISKoVM0x0DDDJ3VfkP2lmXwsTqTSV6xoYfNFVVNY3sG72NJqO1KI0IiIdKaYMZgO3mFl/oqGiy919ursvCxut8yo+/IhBY64h895K1s26n23HD006kohIj1DMpaUPEi1Uk43nJhodNlLXVHy8mUGXfpOqZX9m/fTv0fiFE5OOJCLSYxRTBr3c/Q+dfE9wteeMZs+Jd1G5ei1s2cqgK26kevFSGqZ+l62nD086nohIj1LMMNGzZvZjIGtmU4CnA2cqyo4F7J9k+96DySxfRcMPvsOW885MOpqISI9TzKWldwA/Am4hGjJ6IHSoYlU0bqNiayOZ5avIZTJUvfZGdKQgIiKdUrAM4nsJqgHc/Q13fxzYSrQEZupUNDXR7+EnqLl6UtJRRER6nPaODB4D5pvZQAAz+1vgYeDS7gjWGbnqKnJ9erPp0q/QMO2upOOIiPQ47a2B/ISZrQJ+YWa/AoYD57j7B92WrgO56iqorGTThaPYeN1YTTchItJFBcvAzO4AcsAK4AZgGnCDmeHut3ZTvoIajz6cxmGfVQmIiJRBe1cT/Vf89Rlgamc2Gk91vQjoHe9jjrvf1qWEBdQveLScmxMR2a21N0z0XAnb3Qqc4e4bzawKeMHMfunuPW4tBBGR3UGQBYDdPQdsjB9WxX9yIfYlIiKlq8jlwvyONrNewGLgUGCqu09o4zXjiGZBxd1PbGxsDJKlszKZDE1NTUnHaFfaM6Y9H6Q/Y9rzgTKWQyn5qqurIZpFumTByqBFfGnqz4BvuPsb7bw0t3LlynZ+3H1qa2upr69POka70p4x7fkg/RnTng+UsRxKyZfNZqFMZRB8nqH4UtSFwMjQ+xIRka4JUgZmtnfezWp9iVZI+0P77xIRkaQEOYEMDAFmxucNKgF39/8MtC8RESlRqKuJXgeOD7FtEREpv1SsTSAiIslSGYiIiMpARERUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiKoDEREBJWBiIigMhAREQKtgWxmBwA/AfYFmoE6d58SYl8iIlK6UEcGTcCN7n4kcBJwjZkdFWhfIiJSoiBl4O6r3H1J/P1HwJvAfiH2JSIipavI5XJBd2BmBwGLgKHu/mGrn40DxgG4+4mNjY1BsxQrk8nQ1NSUdIx2pT1j2vNB+jOmPR8oYzmUkq+6uhqgohw5gpaBmfUHngPudPe5Hbw8t3LlymBZOqO2tpb6+vqkY7Qr7RnTng/SnzHt+UAZy6GUfNlsFspUBsGuJjKzKuAJ4JEiikBERBIUpAzMrAJ4EHjT3e8NsQ8RESmfIJeWAicD/wgsNbPfxs9Ncvf5gfYnIiIlCFIG7v4CZRrHEhGR8HQHsoiIqAxERERlICIiqAxERASVgYiIoDIQERFUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiIEWgPZzB4CzgfWuPvQEPsQEZHyCXVkMAMYGWjbIiJSZkHKwN0XAetDbFtERMovyDBRscxsHDAOwN2pra1NMs4nMplMarIUkvaMac8H6c+Y9nygjOWQlnyJloG71wF18cNcfX19knE+UVtbS1qyFJL2jGnPB+nPmPZ8oIzlUEq+bDZbthy6mkhERFQGIiISqAzM7FHgJeAIM1tuZl8NsR8RESmPIOcM3H10iO2KiEgYGiYSERGVgYiIqAxERASVgYiIoDIQERFUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiKoDEREhEBrIAOY2UhgCtALmO7ud4fal4iIlCbIkYGZ9QKmAl8EjgJGm9lRIfYlIiKlCzVM9DngT+7+lrs3Ao8BFwTal4iIlCjUMNF+wHt5j5cDn2/9IjMbB4wDcHey2WygOJ2XpiyFpD1j2vNB+jOmPR8oYzmkIV+oI4OKNp7LtX7C3evcfZi7D4vfk4o/ZrY46Qw9PWPa8/WEjGnPp4ypyVcWocpgOXBA3uP9gZWB9iUiIiUKNUz0P8BhZnYwsAK4CBgTaF8iIlKiIEcG7t4E/DPwK+DN6Cn/3xD7CqQu6QBFSHvGtOeD9GdMez5QxnJIRb6KXG6noXwREdnN6A5kERFRGYiISMDpKHoiMzsA+AmwL9AM1Ln7lGRT7Sy+w/tVYIW7n590ntbMbCAwHRhKdEnxFe7+UrKpdjCz64GxRNmWApe7+5aEMz0EnA+scfeh8XODgNnAQcA7gLl7Q8oy3gP8HdAI/Jno7/KDtOTL+9l44B5gb3evTyJfnKPNjGb2DaLzrE3AL9z9W92dTUcGn9YE3OjuRwInAdekdBqNbxKdmE+rKcBT7v4Z4LOkKKuZ7QdcCwyL/zH2IrraLWkzgJGtnrsJeMbdDwOeiR8naQY7Z3waGOruxwL/B0zs7lB5ZrBzvpYPeWcD73Z3oDbMoFVGMzudaIaGY939aOB7CeRSGeRz91XuviT+/iOiX2L7JZvq08xsf+A8ok/eqWNmewKnAg8CuHtjUp8U25EB+ppZBtiDFNwD4+6LgPWtnr4AmBl/PxP4UreGaqWtjO6+IL56EOBlonuKElHg7xDgB8C3aOPG1+5WIONVwN3uvjV+zZpuD4bKoCAzOwg4Hngl4Sit3Uf0P3Zz0kEK+CtgLfDvZvaamU03s35Jh2rh7iuIPnm9C6wCNrj7gmRTFfQX7r4Kog8qwD4J5+nIFcAvkw6Rz8xGEQ2n/i7pLO04HPgbM3vFzJ4zs79OIoTKoA1m1h94ArjO3T9MOk8LM2sZa1ycdJZ2ZIATgGnufjywieSHNz5hZjVEn7gPBrJAPzO7JNlUPZ+ZfZtomPWRpLO0MLM9gG8DtyadpQMZoIZoaPpfADezsk0zUSyVQStmVkVUBI+4+9yk87RyMjDKzN4hmgn2DDOblWyknSwHlrt7yxHVHKJySIuzgLfdfa27bwPmAsMTzlTIajMbAhB/TWT4oCNmdhnRSdGL3T3xoZg8hxCV/u/ifzP7A0vMbN9EU+1sOTDX3XPu/huio/7a7g6hq4nyxG38IPCmu9+bdJ7W3H0i8Qk6MxsBjHf3VH2qdff3zew9MzvC3ZcBZwK/TzpXnneBk+JPjZuJ8r2abKSCngQuA+6Ov85LNs7O4kWsJgCnufvHSefJ5+5LyRtaiwthWJJXExXwH8AZwEIzOxyoBro9o+5AzmNmpwDPE11u2DImP8nd5yeXqm15ZZDGS0uPIzrBXQ28RXS5YWKXRLZmZrcDFxINa7wGjG05eZdgpkeBEUSfCFcDtxH9knDgQKIS+wd3b+sEaZIZJwK9gXXxy1529yvTks/dH8z7+TskXAYF/g4fBh4CjiO6RHe8uz/b3dlUBiIionMGIiKiMhAREVQGIiKCykBERFAZiIgIus9AdjFmdhrR5XqVwHbgFnd/0cw2AEuAKqJpE7LAWe5+c/y+7wAL3X1h3rb2IJr+4/D4fXXuPpMuimdzPSOFNzOK6MhAdh1mVgvcDnzJ3UcQTey2Of7xUnc/HbiRaG6nYtwGPBdv6xTg7RIjDgT+vsRtiAShIwPZlZwLzGqZTyqeefa1Vq/5LcXPrDnc3SfE28oBiwDM7IdENwh9CFxMNKHhWe5+s5n9U/zehUQ3Eq0nmhLhAmAccLaZLSS6gWxt5/8TRcJQGciuJEt09zhmNga4muiO2PF5rzkVWNbVHcQzSvZz91PjCe6upPDMtjVEcyGNBr5MtPD5gWmbQkQENEwku5ZVRIWAu/8UuIQdE34dY2a/JiqIu4EtRNMotOjDjiGl9hxCdO4BojmNDuXT8+Tnzzb5e3dvBlYQDRGJpJaODGRXMh+YY2bu7hv49P/fLecMADCzRuB4M2v5QHQCMLnV9l40s4vd/ZF4EsOTieZaOif++TCipR43AEPi544BXo+/b10S24hWVhNJHR0ZyC4jHoO/HZhnZs8CDxCtad3Wa9cRTVW+iGhywjltTAJ3O3BaPMb/38Ah8RTDm83seWAM8GOiX/5ZM5sP7N1OxPeBQWY2J17fWCQ1NFGdiIjoyEBERFQGIiKCykBERFAZiIgIKgMREUFlICIiqAxERAT4f3FrZSn+QHfyAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Managed Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgUxfn4P9Vz7n3NshwLcsqpIqeoeKGAJsYrtOaXGDWRJH4TYqKJUZOowYSYaDwSCUaNoHjRKsYYLyAaFVEOEeQ+5L522QPYa86u3x/dOzt7XzPswtbneeaZ6e7q6rdrut+36q2qt4SUEoVCoVB0TbSOFkChUCgUHYcyAgqFQtGFUUZAoVAoujDKCCgUCkUXRhkBhUKh6MIoI6BQKBRdmJPeCAgh7hNCbO9oORRtp+5/KIS4UQgR7ohrtyOfC4QQUgiRb2/3tbfPbea8XUKI37T3+i2QL25lat/Xd+KRl6J5hBD/E0I83dbzT3ojkAiEEJlCiEeFEBuEEBVCiENCiNeEEEPame/TQoj/xUnMk5kFQK+OFqKVLAN6AAc6WpBGOBHLVBEHlBFoGz2AfsA9wCjg60Aq8L4QIivRFxdCuBN9jc6MlLJKSlnQ0XK0BillUEp5SEppdrQsDXEilqkiPpxURkAI4RFCzBFCHBVClAoh5gCeOmk0IcQsIcRhIUS5EOJlIcTPqpvCwuItIcRKIYQr5pwlQohPhBBOKeUmKeUVUsrXpJRbpJSrgP+HZRwabd4LIVxCiIeFEPuEEAEhxEEhxMv2sfuA7wPn281pKYS40T4mhRA/FUK8KIQ4Crxg7x9sy1puf94UQgyMud6NQoiwEOIcIcRqIUSlfV+j68h1sRBinRDCL4T4UghRLUOLmvRCiCuEEF/Y+R8RQqwQQpwZc3yAEOIVIUSJneZLIcTX7WNZQojnhRB7hBBVQogtQojbhRCiievVcl0cj/sUQvw/IcQO+9wlQoh+McfquYyEEOfaefe1t2u5gxq5xhlCiGX2NbYKIfQWyNXSez9LCPGRXcal9rPUrW4+bcj3Qrssq8v0wgZkzBNCzBPWO1dmv0fnxRz/lf3c9I3Zd68Qorip8opJe58QYrsQQhdCbLNl/ZcQIl0IcbX9TJUJIV4VQmTEnDdKCPGOEKLQfn9WCiGm1sl7lxBiphDiMfv5LRBCPCSEcMSkuURYLpkSYemeD4UQ4+rk008Iscgupz1CiB+LOm4cIYTTvpeddroNQogf1snnFCHEu/b/uEcIMaO58mkWKeVJ8wEeAQqBK4AhwEPAMWB7TJrbgHLgemCQvV0ChGPS5AL7gYfs7V/bafo0ce3+gAQmNJHmNmAfcAHQBxgL/Mw+loql3JcB3e1Pkn1MAsXADGAAcCqQBOwG/guMtj8fANsBt33ejYAJfARMtMtkEfAV4LTT9AIqgaeBYcAk4HP7mt9pQZl3B4LAHVito6FYBvG0mOMFwBIsAznA/n8uizn+K6wWVT/gO/b/c1PMNe6r8x/eWOf/Sth92teuAJba/9dYYDmwFhANyWfvO9fOu6+9fYG9nW9v97W3z7W3k7CeubeBM4AJwEpb5t80IV9L7r071nvwInCaLduXwMftLNOedtnMtcv0EjvfaJna97UReA0YAwzEep8CwFA7jQDeAz4FnPb1QsA3WvjeV/9HbwGnA+cDh215q8tzItZz+KeY8y4AbrBlPxX4PdazfGpMml1AKXAnlr64FghT+/m8Cphm5zEc6xkrAXJi7m8N1nMzDhhpy3UUeDomn3l2+U3GeheuBY4A34/JZ7X9XIy381ls/7dPt6SsGiy/jlLY8f4AKYAfmF5n/ypqK5D9wP110rwc+wLY+y60/+x77Qfy6iau7QDeBVYAWhPpHgPex1YeDRx/GvhfA/sl8M86+76PpSB8MfvygCrguzEvsgRGxaQ5y9432N7+g/2gO2LSTKXlRuBMYpRdA8fvBw4BKa34Lx8DFsds30fzRiAh92lfWwIDY/adau+7uCH57H2tNQI3Yxm/rJg8RthpmjMCzd37/ViVD3dMmjPsNOe1o0x/j1URccak+Tq1jcCN9rWddeR+H3g0ZrsbcBD4O7AXeKwVz8t9WO9q7LswG4gAuXWeq1XN5LUW+HXM9i7g33XSvAu81EQeGpbh+La9fUkDz1A2dqXE3u6HZXSH1MnrHmCN/ftiO59YI5WL9c632QicTO6gAViun2V19i+t/iGESMeqvXxWJ82ndTOTUn4A/AXrAXtaSrmwoYvazcLnsBTD1bJpn+9crJrYdiHEE0KIa0TL/fsr6mwPBzZKKYtiZC4AttjHoruxHuxq9tvfefb3MGCllDISk6ZeeTTBl1i1uPVCiNeFELcKIXrHHB8NLJNSVjR0srBcbXcKIdYIIYqEEOXAj4BTWiEDJPY+D0spo+4eKeVWoMjOM14MAzZJKUtjrrMeq7bYHM3d+3DgMyllMCbvtXbesc9Ka/MdBqyQUsaOKlpKbcZitUSOiBq3ZTlWzXxQjDyFwPeAW7BavXc0IVdD7I99F7AqHoeklIfr7It1geUKIf4uhNhsu6PKscqj7rO3pu61qCmDalfPfNsldQyrZp4Rk88woKjOM1SC9a5WMwarpr+qTjndTU05VeezNSafw3XyaTXO9pzcyaj2Ict2prESWsr9HKzaxEAhhJC26Y1J4wZewqpVXSCl3NdUnlLKNcLyJV+C1dJ4DLhfCHGWlPJYMyI1pEQbug9RZ79ZR/FVH9Ma2NdUvg0ipYwIIS7FetkvBq4BHhBCTJNS/qcF+d0O3IXlKlsNlAE/B77WUhlsEnqfDRDbZ2HW2QZwtSG/tsrTlntvbn9L8m1I5rrbGrAJy2VSl8o62+djvW95WEq0sAnZ6hJqQI6G9sWWyTwst+wdwE6sGvXLQN2KWbDOdt18/oNVKfgxVismiGUM3XXOaYrq/M6mfrlUn9ueZ6TZC58MbMcq/HPq7D+7+oeU8ijWEL0JddKc1UB+92HV7s/Bqs3+KvagECIZ+DeWdT5PSrmnJUJKKcullK9LKX+KZf2HYj382PI7Gj25NhuA4UIIX4xMebbMG1qYB1j+2rGxHV3UL58mkRYrpJSzpJTnAR8CN9mHPwfOEUKkNHL6ecC7Usp/Sim/sGtLgxpJ2x7ac5+5QogB1RtCiFOBHCzlBpay6lYn71GtlG8DMEwIkRlzneFYyrC9bAAmxLY6hRBn2Hm35llpKN/xde677sCIVVj9ZceklNvrfKLDZYUQFwO/AL6B5WJ6VojGBwfEifOAv0sp/y2lXIfljurfmgyEEDlYOuABKeV7UsqNWG7pbjHJNmI9Q7GDNrKw3tVqPre/+zRQTl/ZxzbY+QyKycdXJ59Wc9IYAdvd8ATweyHEN4Q1cubPWB1asfwF+JkQ4ttCiEFCiJ9hdcRELawQ4nys2ukNUsrlwHRgphDiLPt4GpYLZDBW540phOhuf5Iak1EI8Uv7usPtFsH3sGo+1c27ncAQ+7hPCOFpLC+sTr7DwAJ7lMNorFrMfqwx3y3l71g1rzlCiKHCGt3xB/tYS1pMZwshfiuEGC+E6COEmITVObcxJn8NeENYI036CSG+brcewGrKXiCsUSanCiF+j9XpFW/ac5+VwFwhxGghxBjgWWAdVmc3WB3yyVitugFCiGlYtcLW8CJWK+h5YY0SOgt4Bqt22l4eB9KBeUKIEcKaoDYfWCql/Lgd+c7B8kk/aZfpJGrKtJoXsJ7rt4QQk4U1SW68EOIuIcSVYLllbHkeklK+DXwLq/J2WztkawlbgG8LIU4TQozEatW3tBJWTSnWezjdfn4n2PnE/m9LsNxqzwkhxtoGeD5WP4bV6WdVfp4BnhJCXC+EGGg/B98TQlRXQP9r5/O8EGKcLfMLdj5t5qQxAjZ3Av/CKuAVQCZWB1Esj2K9FI8BX2C1Av6CZb0RQmQDz2N1TL0DIKV8Dcuf/5KwhpiNxqrx9MX6Uw7GfK5tQr5jWA/2p1hK5CrgGilltU/vn1g9/8uwHqxvNZaRlLIKy3gFsEZwfIjlMpoa6/ttDinlfqza19lYvs/HgOoZqv4WZHEUq0b9BrAN60F+AaszEinlQayyKsMaEbEBS1FU1/Lut2V/A6tcsoC/tlT+ltLO+zwIPIk1wuUTrBf8qmr3oP3/TQeuA9ZjGfe7WylfJXAZVgtjBVYZVo92axd2X9FkIB/r+fqPLec17cx3P3A51oiX6jK9rU4aP1ZLdxXWO7QVWGifs9uu7c/Dqv3/1j5nJ1a/0Czb6CaKm7B04AosvfEuVvm0GLsPcBpWn+SXWPfyKNYzU51GYr3rFcDHWOX/DpYRin32foD1n/8aqxL1X6zRSzti8rkS6537yM7nbSw3apupHuLWpRFCPAOcIaUc3WziLoCwxnB/CJxuN5NPSrrKfSo6H7Y3YR/WyK+/daQsJ1PHcIsQQvTEssofYLliLge+C/ykI+XqSIQQt2C1aA5g+TcfAZafbIqxq9ynovMhhPgGlttmE1Z/wb1YriCjI+WCLmgEsBT/NCw3hBerQ/kWKeVTHSpVx3IKVh9IHtYwusXYHeFCiLtpwrUhpUw9HgLGiUbvU9E5EUL0oaZ/qSF+KKV84XjJ0w6Sscb898VyC32ONUekw0N1KHeQoknsPpLsxo7Hjn1WKOKNEMKJpTgbo0BKWXacxDkpUUZAoVAoujAn2+gghUKhULQCZQQUCoWiC6OMgEKhUHRhlBFQKBSKLswJN0RU1/VnsMLVFhqGMaKZtKdgzWDNxYrv/R3DMJoM8qZQKBRdiROxJTAPKw58S3gIeM4wjNOBmcAfEyWUQqFQnIiccC0BwzA+0nW9b+w+XdcHYMUIysUK9jXdMIzNWLNCf24n+wArPohCoVAobE7ElkBDPAnMMAxjNFY42r/b+9dSEyTrKiBN1/WcDpBPoVAoOiUnXEugLrqup2JFhnxF16PrcleHYP4F8Liu6zdiRd3bTzvDrioUCsXJxAlvBLBaM0cMwxhZ94BhGAeAqyFqLK4xDKMly/UpFApFl+CEdwcZhnEM2Knr+jQAXdeFrutn2L99uq5X3+NdWCOFFAqFQmFzwsUO0nX9JeACwAcUYIVkfR9rlaMeWGu7vmwYxkxd17+JNSJIYrmDfmwYRqAj5FYoFIrOyAlnBBQKhUIRP054d5BCoVAo2s6J1jGsmi0KhULRNkRDO080I8CBAwc6WoQoPp+PoqKijhajUTq7fND5Zezs8oGSMR50dvmgfTL27Nmz0WPKHaRQKBRdGGUEFAqFogujjIBCoVB0YZQRUCgUii7MCdcx3BBSSioqKjjecx6qqqoIhztfKCIhBCkpKR0thkKhOAE4KYxARUUFHo8Hl8vV0aJ0CgKBAPv371eGQKFQNMtJ4Q6SUioDEIPH48Hv9/PKK69w7NixjhZHoVB0Yk4KI6Coj8PhwOFw8MUXX3S0KAqFohOjjMBJjBCiU/ZZKBSKzkOXNQJJCxfSbdw4euTn023cOJIWLoxb3iUlJVx//fVMnDiRiy++mJtvvpni4uJaaWbNmsUpp5zCww8/XGu/aZpMnz49eu51113Hrl276l3j4YcfplevXmzevDlucisUiq5HlzQCSQsXknHHHTj370dIiXP/fjLuuCNuhkAIwS233MLHH3/MkiVLOOWUU5g1a1b0+EMPPcTatWtZtmwZS5cuZfbs2bXOnzZtGh9++CFLlixhypQp3HHHHbWOr1u3jtWrV9OrV6+4yKtQKLouJ8XooFjS77kH18aNTaZxf/45IhistU+rqiLz9ttJfvHFRs8LDRvGsZkzm5UhKyuLs88+O7o9atQonnvuOQBmz57NV199xfz583G73bz44ovMmDGDp556iunTp6NpGpMnT46eO3r0aJ5++unodiAQ4O6772b27NlMmzatWVkUCoWiKRJiBHRdfwb4OlBoGMaIJtKNBT4DrjUM49VEyNIgdQxAs/vbgWmaPPfcc1HF/uMf/7jWca/Xy1NPPdXo+XPnzuWSSy6Jbj/00ENcc8019OnTJ+6yKhSKrkeiWgLzgMeB5xpLoOu6A/gT8F48L9ySmnq3ceNw7t9fb3+kVy+KX42vLfrNb35DSkoKN910U6vPnTNnDtu2beOVV14BYNWqVaxZs4a77747rjIqFIquS0L6BAzD+AgoaSbZDOA1oDARMjRF2Z13YiYl1dpnJiVRduedcb3OzJkz2blzJ3PmzEHTWlfUc+fO5fXXX2f+/Pkk2bJ+9tlnfPXVV5x11lmMHz+egwcP8u1vf5sPP/wwrnIrFIquQ4f0Cei63gu4CrgIGNtM2h8APwAwDAOfz1cvTVVVVauuX3X11QCkPfAAjgMHiPTsSdmdd0b3x4MHHniAL7/8kvnz5+PxeFp17vPPP8/zzz+PYRhkZWVF9//kJz/hJz/5SXR7/PjxPPvsswwZMqReHpqmoWkaqampDZZZZ8HpdCr52omSsf10dvkgcTJ2VMfwo8CvDMOI6LreZELDMJ4EnrQ3ZUOLKrRlLHzV1VfHVenHsmXLFv72t7/Rv39/vvGNbwDQp08f/vnPfzZ7bnl5OXfeeSf5+flcd911gDUD+D//+U+rZDBNE9M0KS8v79SLZXT2xTw6u3ygZIwHnV0+SNyiMh1lBMYAL9sGwAdcput62DCMf3WQPHFl8ODB7G+gz6ElpKamsm/fvhalXb58eZuuoVAoFNV0iBEwDKNf9W9d1+cB/zlZDIBCoVCcSCRqiOhLwAWAT9f1fcC9gAvAMIwnEnFNhUKhULSehBgBwzC+1Yq0NyZCBoVCoVA0T5cMG6FQKBQKC2UEFAqFogujjIBCoVB0YZQRUCgUii7MSRdFtCX4dkzGHdhQb3/QM5yi/ovanX9JSQm33noru3btwuPx0LdvX/70pz+Rk5MTTTNr1iz+8Y9/cOutt3LbbbdF95umyQ9/+EM2b96Mx+PB5/PxwAMP0LdvX8CaJezxeKKzkH/9619zwQUXtFtmhULRNemSLYFg0mgk7lr7JG6CSWPikn+i1xN48sknWbx4MYsXL1YGQKFQtIuTriWQfugeXIGm1xNABoFQnZ1hXIH15Oz+ZqOnhTzDONa9Y9cTUCgUinhy0hmBFiHcRBzdcEQKEUgkgogjF4S7+XNbSbzXEwCiQeTGjh3LnXfeSUZGRpylVigUXYWTzgi0pKYOoIUKyPtqAsgACA9F/d/FdHaLuzzxXE8AYOHChfTq1YtAIMC9997Lb37zG/72t7/FU2SFQtGF6JJ9AgCmK4+KjGuRCCoyrk2IAYj3egJAdF1hj8fDDTfcwMqVK+Mqs0Kh6FqcdC2B1lDu+xmuwFbKc38W97wTsZ5AZWUl4XCY9PR0pJS88cYbDB8+PN6iKxSKLkSXNgKmK4/ivq/FPd9ErSdw+PBhpk+fjmmaRCIRBg0aVGvUkUKhULSWLm0EEkWi1hM45ZRTWLSo/fMYFAqFopou2yegUCgUCmUEFAqFokujjIBCoVB0YZQRUCgUii6MMgIKhULRhVFGQKFQKLowXdoIVGoFvJNzDZVaYVzzLSkp4frrr2fixIlcfPHF3HzzzRQXF9dKM2vWLE455RQefvjhWvtN02T69OnRc6+77jp27doVPe73+7nzzjs555xzmDRpUr0IowqFQtEaEjJPQNf1Z4CvA4WGYYxo4Pi3gV/Zm+XALYZhrE2ELE2xNvVRCtzLWZv6KBOOxW/SVXUo6epIovfffz+zZs3iL3/5C1A7lPSMGTPweDy1AstNmzaNiy++GE3TmDt3LnfccQeGYQDwhz/8AY/Hw9KlSxFCcPjw4bjJrVAouh6Jmiw2D3gceK6R4zuB8w3DKNV1/VLgSWB8PC68PP0eSlzNhJIGIgQpcq8GIdmSMp9i13ocNB1FNDs0jPHHOi6UdEVFBa+++iqrVq1CCAFAbm5us/IoFApFYyTECBiG8ZGu632bOL4sZvMzID8RcjRFhSN2Vq6kwrGP9Ej/uF8nnqGkd+3aRVZWFg8//DDLli0jJSWFO+64g3HjxsVdboVC0TXoDGEjvg+809hBXdd/APwAwDAMfD5fvTRVVVXR3y2pqVdqBbyWNwGEtHYISdBxlPOL/k6yGd9oovEMJR2JRNi9ezcjRozgt7/9LatXr+bGG2/kk08+IS0trda5mqahaRqpqakNlllnwel0KvnaiZKx/XR2+SBxMnaoEdB1/UIsI3BuY2kMw3gSy10EIIuKiuqlCYfDrbru2tRHkcha+yRm3PsGqkNJz5s3r82hpBcsWBANJZ2fn4/T6eTKK68ELDdTdnY2O3bs4Iwzzqh1vmmamKZJeXk5DZVZZ8Hn8yn52omSsf10dvmgfTL27Nmz0WMdNjpI1/XTgaeBKwzDKG4ufTwpdH+OKYK19pkiSKF7VdyuUR1K+plnnmlzKOmXXnqpVijp7Oxszj77bD766CMAvvrqK4qKiqKL0CsUCkVr6ZCWgK7rfYCFwPWGYWw93te/oiixkTgTFUoaLONy++23M3PmTJxOJ3/961/V8pIKhaLNJGqI6EvABYBP1/V9wL2AC8AwjCeAe4Ac4O+6rgOEDcMYkwhZOoJEhZIGK5z0q6++2lbRFAqFohaJGh30rWaO3wzcnIhrKxQKhaLldOkZwwqFQtHVUUZAoVAoujDKCCgUCkUXRhkBhUKh6MIoI6BQKBRdGGUEFAqFogujjABQoJVwV/o/mOy7LS75JWo9gb1793LJJZdEP+PHj2f48OFxkVmhUHRNOkMAuQ6jQCvh0dRXWJDyARKToGhdDKLGSNR6Ar1792bx4sXRdPfccw+RSCQuMisUiq7JSWcE7kn/Jxtdu5pMEyTEPsdhCh1HAIkUNce+mfPbRs8bFurLzGPfb1aGRK0nUOsegkFef/11XnzxxWblUSgUisY46YxAS9jq2keZqATRfNr2Es/1BGJZtGgR3bt357TTTouvwAqFoktx0hmBltTUC7VS2w30PmYdN9CrxffHVZ54ricQy4IFC6IB5hQKhaKtdMmO4W5mFrOO/YBPC+bwrYqL8Uo3bhl/e1i9nsCcOXPavJ7A/Pnzo+sJVHPo0CE+/fRTrrrqqniKq1AouiBd0ghUU9cYDA/2jVveiVhPoBrDMJg0aRLZ2dnxElehUHRRTjp3UFuoNgbxIpHrCYBlBO6/P75uK4VC0fl4fGQeFYcdMXusFcJSciP8ZE1BXK6hjEACSOR6AgBLly5tU94KheLEouKwA4HkAoq5hCIW4eNDcuoYhvahjIBCoVAkADMM4aDADFrfkaAgEsT+rvkdbmR/JCjIJcCN7CWXIB4kl1HIGI7yLPlxk1MZAYVCccIiJZghiIQEkUAjyjbmmBkShAPU2h8JCrxujWOlqUQCwk4H4UD1+Xb6QM3vcMBOU32NmGPV15dm28agOzBJIUIyEW5jB17MaOetB0lP/NzKTspIj0sZKiOgUNThePhhTzSkxFKYIWErQSyFGhS19tdKE8RWjoIkl8bRkpSadDFKOhwEs5kacbih/SFL+caPdIRD4nRLNDc43RKHW+JwY3/X/PakmzjdoLkkTo+93yVxxP52mSRJk6RwBG/QxBuK4AmauP0mHn8Ed5WJq9LEVWHiKpc4y00cxySOKtmklA7gIB5S43TXyggoFHVozN8aTz9sY5gRGnUZ1Kqdxhzb69E4UpxU41oI2cq3kd9RZWsr0ZbUds1QPJRtRvSX5rKVrQtbiTagcF3gSjFr0rltJeuy0zTwO6q4PbYirvPb6YnJK+Z6eT1zKD1WhNbQXywlohK0UhOtVCJKJVqpjG5rR+ztw2bN71KJOCoRjehzqYGZKZCZAjNLYPbTCGc5CVZvZ2mYWYLPf+TlPErwUJORH43lZDGJyjj8J4lbaP4Z4OtAoWEYIxo4LoDHgMuASuBGwzBWJ0IWhSKefPlyUrR2W10jjtZKY2q+kSC2Em2kFltXwdt+Yxlpq7KtP5QYsGum0laC2EpQ4nTXHHN6JZ70OgoyVsHWqw3beTVRU65bm87tns3R8uKochcdNTg9KGsUd4EZVdieYAkZ+ytrK/UYJS+CjWdpphJV2maWINxbQ8Zsm5k1St20lbxMF6A1/18v+3U25xSXQowRMIEdOcmd2wgA84DHgecaOX4pMMj+jAfm2N8KRafmndvrK1uhNVDjdINmK0OHrXDdKSaOLFqsSC33AnZNWOL0NKxsc7plUlZZWusch1uiOUEch9AoLSHVB/7GqsVtwbRq2lppzOeIGfNb1q65V29XNJZhJakeaint8ABHPSUuG1DquBJXyNO/LOIIGRwBfD4fRUVF1n6K4naNhBgBwzA+0nW9bxNJrgCeMwxDAp/pup6p63oPwzAOJkKeukz2pTA6GOZn5UHyzDg+mDYlJSXceuut7Nq1C4/HQ9++ffnTn/5ETk5ONM2sWbP4xz/+wa233sptt9WEsDZNkx/+8Ids3rwZj8eDz+fjgQceoG/fvgAsXryYBx98ECklUkpuu+02LrvssrjfQ1dEmrDlbW+TaW5ZUVBf2SbeS9QkPh9oRSdoNNmmXC1HYpV8K10tGbbCzhKY3TXCQ0RNzTxLi9bILcWukTXQR1FlceexmseRjuoT6AXsjdneZ++rZwR0Xf8B8AOwJkn5fL56mVVVVbXq4hvcDra5NBakuLm2Ihh3Y5CoUNJSSm699VYWLlzIkCFD2LhxI1deeSVTp06tF5ZC0zQ0TSM1NbXBMussOJ3ODpdPStj0tuC93zk4sLZpP0XPoVm4ndDKKCAJpTOUIQBBCcURRGkEik0oiSBKIlAcwXGkhLwie7vEtNLZv0Ww8XdPpmmQrSFzHJDlQA7UkNkO+6NBjvWbmG0ytFquFo3mQyM4nU58KbnxKYcEkaj/uaOMQEPmtsEnwTCMJ4Enq9NUN4diCYdrAsDdk+5ho6v5qlnQtvjzU9zMT3HTLSLJj5i4mzhnWCjCzGOBZvNOZChpIQRlZWUAHDt2jG7dujUYl8g0TUzTpLy8nIbKrLMQ28Q9nlQKP4dEKVuWwdY/noJ/lQ/Zv4SSp9/D++PrSGlg1EmFBzK+Yz0hTk3ickrcDnA5JS4HuJ0St1PicoLbIevvt3+77HPcDonbSa18au2rlWBBwPgAACAASURBVNY+t9Y1rbR5uZmUHyupl5dTa2PFtq6rJca10jZXC0i3gCyIVNfE+wrMkQ7MLGeNq6VOp6iZIcDd2A1IIGJ/YogAJa2/5Y56DltDe2Ts2bNno8c6ygjsA3rHbOcDBzpCEGm/JQUOqNQ0RoTMuOYfz1DSQgieeOIJbrrpJpKTk6moqODZZ5+Nq7wnOlUEKHSUUuAo5ZBWQoGjhEKtlEOOUgocJRRoJRQ4Skn9dCATfvND8v83mrLeh1jx9wfZPjCI+Pgy/FOb0JzXzyY1mE6PQHe6BbvhC/hIC2YQDguCEUEoLAhFIBgWBO3fRys1QhEIhQUBe18omh5CEStt++heb4/AJAtJXiRMbjhMt3CE3FAYXzhCTihCdiBCVjBClj9CRiBCpt8kvSpCqt9Ea6RybgqoStGoSNXwp2kE0gSBgRqhdI1QuiCcqRHJFERshU62hswW+HpnUll+JGq4XI4ag9aZWlVdkY4yAv8GfqLr+stYHcJH49Uf0JKaeq+eNZMs3FKiQdQt1C3OfQTxDCUdDod5/PHHmTt3LmPHjmXlypXccsst/O9//yMlJSWucnc2AoQojFXsjlIKtFIOOUps5V5KoaOUI1p5vXPd0km3SBZ5ZjanrZjA5JlX4Fw0CK1bFa5bd7E1N4Ud/7udwHsavXLC7Nekpe3q4gpz1aVDOKRtZZvrNbY5tgPlJJteRoVOZVxwCGODQxgVHEyqTKp/fhNISYxxsL9jfof9EkokohREqYmjVOI4InEdNUmpckGRH3eZifeYibfcJKncJLnCxNnEYnkVbkGZ18GRJAdHPA725rgodTso9jgodjkpcjkocjoodDkpcDg5qDk44nREK071KLM/exs+DHkN7u0Mrapu5YKKMqedrn5ebW5VxYFPtrj55V/CPPvZtXx3whM8dJuDcwY3MVyplSRqiOhLwAWAT9f1fcC9gAvAMIwngLexhoduxxoi2noN2U4SrfyhJpT0vHnz2hxKesGCBdFQ0hs2bKCgoICxY8cCMHbsWJKTk9m2bRsjR46Mu/zHgyAh9jsOU6CVRpV5XcV+SCuh1FFW71yndNjKPYsB4Z6cHRxBXiTL+pjZ5EWsT5ZMpWizi6UPpbH1nSTcGSb+KwO8lupm/66+eApM8s4OEbowxP4RwAYH2kwPP99Ywp3bivjjqT4eHZqNeU+I15PHY9VbrgfAJSM4OMZqVyFL3ftAHETIDfSKCE4PZjAhmMdF/r70NW1fboJcLWaWPbIlW8PsLzCznFRlCWRmzFDFLIGZGTOqxXa1pNufPjU5AiH7U5uICUG79WK1bGp+B2NaNTW/rW9vUjpFpWV2Wmq1mgIhUWe/3ZKq01KqblUFY1pboQYMZqjNw2y7NXpEiPpuuOrvasNRY7BsY2X/ttLZhstVd3/NcXedtC6nZPshJ39+M51HNtzFucXL+dX6R7hhziyevaUkboZASBl/5ZdA5IED9b1GZWVlpKWltTiTyb4UxtijgxKh/MEKJb1q1aoG1wNojueff565c+diGEatEUWFhYVMnDiRt956i4EDB7Jt2zauuOIKPvnkk3ohp3fs3M7r2+9lmHMal154ZVzuqTWEiXBYO2Ip85gafFSx266ZYsexeuc6pEaumUl3W4nnmVl0i2TRParYrd9ZZhpaM11+JTscfPJwGhv/lYTphY0jTZZmOQh6JJwZgQvDMDZMb4fJaaEII0IRFhY4MC4vYtD2EKkRSblDsGugyc9fe4H7fT/lkEPjoIASv6D8KFQdhXCpiXnEqqFnlEpyis3oJ7skQk5xGF+xSWYpjbpapKgzgajO2PJa29kiOhY9p7ePouLiePxtCeN4+txjW1W1jFQDBqP625ucTlFJWb20oQiWC6+W4bGMU6COEarrBmzsmqGIJZeUzRuryjf7kmTW925UaR5K9y5rcZnYfQINXrBLGoFEs2XLFi666CL69++P12sNOWxNKOkhQ4aQn58fvafYUNILFy5k9uzZCLtt+otf/IKpU6fWy2fz7tW8tO1n5CedwfRz/havWyNChCLtaG2fe6xrxva5F2lHkXXG8GlSI9fMqKXY+3vySSv3Woo9kk2emU22mYaDto27lMA+h+DzQy42PZgKL3uIaLC2H3w+QBAYHCHnvCBjxwcYkxJmhK34s2JEdY48SnaxidOsnS9OCA/QokMVm5pAFE4Bf7ZGaY7JoZwQB3MiHMiBQp+L4pwUSrJTKM5xUJyjUZKtUZyj4U8X5CHpHrE/pmn/tr57mCbdIrLe4IWTvVPzeNAR8lmtqvqtpqjxCgu+P9PkofW/48pD75ISqaLCkcTrPS7lF8PvZfUzTfj66tCUEVBhIxJAIkNJX3311Vx99dXN5hMSFYBkj2cV1/Q4n72kc1/Qgdf04TVzSDJ9eCM+69v04TazqZRuSkWEAseRRn3uhdoRTFG781xIgc/MiLphTg8NsN0xWeRFsuluZtMtkoXPzMBZR7n7fD6KKtv28pnADofGBpfGOreDdS4H2wudDP6lhxELnEgJ6/oK1o8xGXaRnwfGVXFZ9wCp1Qq/ERfLrqFOun1UW8MLwEyGcP+aCURNzQqtdrU4sMY+9wIkkr2OQla6V/Cpeytb3AXscAZB+BAyF5/ZH2H2oUjLZa8jjSKHm0ADjmifbRSqjcQALUJ6kquW0ciQx2UJbUU7cGiQ5JYkuaGRwZGUpvXgmCsNb8RPlebBG/FzzJnGkbRcGhhR3yaUEThJMYEtZLKbJPYTIiIEG4VGoWsfxdo2SkWEY8JJGW7KcFGGKyZWYQ3pUiPbdOOTyYwKZ5AX6UePSDd6RXqRH+5Dz3B3cs1MXAl+lMLANqfGOpeD9S7re4PLQYU9HjxtA0z4lZer33UgTMGWfpK0a/z8aHIl5w4O4Ky2PS1o+A76phPz82AtP7yZAkd/n0zVNa1bJS4WgaBPJI8+VXlcU3U+AEdEOZ+7t7DSvZlV7v/whXsbfruJ0Tucx+nBMxkYPp08cxBO8ijQNMsd5RAccGisdglKHBHIqu1y9JqS7mZNK6KlrQpF52L+/xXjuv4wT/T9Lk/2vZ4f7JpPr1Ah8/8vfu4/5Q46Sfl4z3Lu3voIgUzJ/m/U7+DLiqSRa6bhM1PIkW4ypUYGJukESaGKJHEMr1ZCWCumSjtMuJGeSaeZXK914TVz8Jo+ksxcvJGcaGvDY2ah1TEWDTXD/cAWW9FbSt/BJpcWrRUnm5LhoQiDj5pEljipeDyFniscOCNQMizCGT8u54oplaQlte3ZFsckeeOPoh2LideSLihYnmHFfEkgQUKsd+1kpXszK92bWOneTJHjKAAZZgqjg4MZGxzC2OBQRgYHkoSHNF8OG0pLLOOgCQ45NA45hG0warZb0qroHjFrG4+ISWYcWhXKHdR2Ptni5vq/5xAICTwuyfz/K251p7ByB3VBLPUliK36/uvwLLqb2eRGMvG2sg4YFlX4tWL8WhFVWpH121FUa7vcsZ8i15f4tWKkaMBfKQUeM8s2Cpah8Dr6syNjALvFQHY48tnu8LHDkUrYVlgZpmREKMKNFUFOC0UY6jc5sMHF6x8lsX2hl9O3auSGQBsT4tLfHGXE2PaPmJDpgkObMoHjrxzcuBgVOpVRoVP5YcU3kEh2OQ6xwr2JVe7NrHRv5v10K9aiSzoZEerHeY4zOc3Vl7HBIYwzMxu+J6BUiFpG4ZBtMGq3Kuq3BlWromM5Z3CQHY8dTNizqIzASYoAtDq+j7GhIW3OzymTSI3kkxppfkUjiUlQHKXKYRmN6s9hRwWbnMmsdmaz3dmD3Vp/CumLTLEUTwqF9ORTzmEtPVlDvtxET7OCJOHj8IGxvPvhN7jvw/PovSaTsdsl+QFB5rmlTLn3IL2HJeM4CdWQQNAv0oN+VT24tuoiAEpEGavcm6NG4QntXwSyLePXN9wjOl9hbHAoA8O9EAgEkC0l2WHJsHDjEyIDQEG1odBiDYb1/YXbwSGHs9lWRV5E0iOmVTFUmHgEcWlVKOKLMgInKfmBIKOPVbAiw4vblAQ1gRY6iOnqkfBrCzSOiWzWO3yscw2LunT2OGtqmT3DJiP9Eca7XPQ9WsKgyGHSKSTg8OPXcqjSzuBQ+RCWfDqMD5eezb4dgxixN8LVO4J4j4Hr4v+S9vtf4xq/gv/ZebrNDKuFEe3wzsFr5pIUsVsd9icpkoNbZiCaGV5aqRXwsvNaztb+RrLZ+Bjy4022TGNyYCyTA9Z8kTRfOu8fXcEq92ZWuDex2LMKI/kDwHL7jQlZLqRxwaGcFhzQZCvQA/SJSPpEGg9I17ZWRRh6pKtWRSdE9QmcpOzZ+jFbP7wbLd3kEz2Fz9IFX6xYT9B7Jv60yfjTphJ2D2r3NEgJHNAE6+3ROetcGutdDg7FKIC+4Qinhaxx+NZYfJNse35G3SZuVRAWfenl1eXJfLjJgxkWTK4KM3StwDzsIH9cgAm/KsB3zl7LFeUoxq8dtt1SNS4qv1ZMlVZEQCuloXCTQjptI5FDUsQ2DtWGwu7X2J68gN3edxhc8V0mHJvVrnJKJHXLUCL5ynGAlZ5Ndt/CZnY4rffGLZ2cHhrAuMBQxgaHMCY0hGwzPssU1iUAFNqGoiIzna0VFbVaFU31VeTU6ZvokaC+imo6c59ANXGIHaTmCXQldm9fzbYPfkZSzkjOueoxnMHteMvew1v2Lm7/FwCEXX3xp03FnzaVYNIoEE2PzTeB3Q4RrdlXj9SprvFpUjIobNpj7y2lPzwUIb2JR8zn83H4cBErvnLz6vIk3vw8iTK/Ro+MCNc4g6QvcVG+x0n3M4JMvKOMfucHWmW3TMIEtFLbSBzG7yi2+zBq+jWifRxaEWGtgYU6pGBIxY0MqLoaX2hksy2I401LlEORdoRV9iikFe5NrHPtIGT32wwM9bLdR5YLqV+kh+1ASryMzbUqCmy3VHFDfRVS1mpFNNS5ndfCVoUyAq1A13VhrwPQEcTNCMSjx70xErmewJIlS3jwwQcJh8NkZmbyyCOP0KdPn7oisHPHdrYvvRdnj2lceEntGcNa6BDe8sV4y97DU7EUQYiIIwd/6mT8aZMJpEwkrCXxlVOL1u432Iq/zB6S6ZKSwaGaWbanhUyGhSO0ZkDOzkIH76z3Mf8DyZ5iJ8kek6+N9HNhJEzRC0kUbXbhGxJi4i/LGDTFf1xit4REJX6tiJXpM9nrXYQUEWsqL1a1MymSR2//xfTxT6VH4BwctH3IaLxoi3KoIsCX7q9YYY9A+ty9JRp3KSeSXssonBbqj9uK+nJcZYwltlVxsIG+iva2Kgbm5FBcdPLOuo63EXjEMIyft0mS9hMXI/DJFjc3zMmmKlhTu0hym3GLx1FaWsqmTZtqrSdw5MiRWusJrFy5kocffpgZM2YwadKkaHRR0zRZsmRJrfUE3nnnHQzD4MiRI5x77rm88cYbDBgwgNdee42FCxfywgsv1JNh586dbN26FafTyaRJkxqVVUTKEBUfsiOykQ2OKtakDmZ12ijWpp5GlcNScF5TMizWpROMcGq46bDbjXGkUvDm50m8ujyZVTvcCCGZODjANeOqGF5msuKRNA6tdZPdP8w5vzjG0Mv9x30pwkqtgNfyJhARNdP1HdLDmKO/5ZDnU/Z7/kdYq8BpptArcAF9/FPI90/CIxsemZNo4lGLNTHZ5twXdR+tdG9it7MAAK90c0ZwYNQwjAkOIVO2bpnz41HTbqpVcSims7vhVgW2Uajdud09YtKjla2KRNFpjEAH06wRuOeVdDbua7rW8tl2d4NxO4SQnDWwcSMwLD/EzGn1Y900x1tvvcVzzz3HggULmD17NuvXr+exxx7D7Xbj9/uZMWMG48aNY/r06fXO/fLLL7nlllv45JNPWLNmDT//+c/54AOr06+0tJQRI0awbt06srOza523fedOvmrACFQJ2Ois8d2vcznY4tII2TWotEiIMyp3Mfrox4w5uoyRZevpJzMJp03GnzqFiLs3rSUUgQ82eHh1eTKL13kJhgWn9ggxbXwV35vi5fCnx/joz2nsW+4hPT/MObeVMeKaKrQOGrbwafpdbEt5GTMmLoQm3Qyq+BYTjs0ijJ9Dnk/Y432Pvd7FVDkKEdJB9+BZ9PZPoY9/SotGUcWLRCnYAq2EVe4t0eGp6107CQurw3hwqDdjbKMwLjiUPpG8Jl1Incnd0lCr4mhqMjsDgU7XVxFLooxAs6+Zruv/NAzj+/ZvATxlGMbNbZKkk9BY4KaWBHRqLfFcT6B///4UFhayZs0aRo4cyeuvvw7A/v376xmBgw6N/2gmg1waX6W4o0p/u1PDtB/urIjJ6SGTH5QHbZdOhFMiEo08ENfg8g7CG3oPd/l7pBTcS0bBvYQ8w/CnTaUqbQphz/BGO5alhPV7XbyyPIl/rUyiuNxBTmqE6ydWMG18FSN6hzj4hYu3r09l2399pHaPMHnWEU7/ViWODh4aUuj+vJYBADBFkEL3KgCceMkPTCI/MAl51KTItcY2CItYkXEPKzLuISs0jD7+qfSpmkJ2eHjcfezHgzwzm6/5J/A1/wTAWohnjWt71Cj8O+kTXkhZDEC3SCZjg0OjrYXhoX4Jn0XeVjxA74ikd8wIKF9SOkVHjkS3Y1sVsUNmD8a0Kta4mu6ryGugr6K1rYrYpXATtXZcS/6l/tU/DMOQuq4PSJAscaElNfX+t/YgEKr/Unpckld/Hl+/YDzXE0hPT2fOnDncd999BAIBLrzwQjIyMnA66/+NElgjJMuT3ezP8JIbMTkjFOFr/rDlxw9G6GnKxlWTEISSziCUdAZl3e7AEdxldyy/R2rRo6QVPUzY2cvuWJ5MMHk8CBcHj2i8viKZV5cnseWgC7dTMvl0P98cX8kFwwK4HFC4wcnCm7LZvthLik9y4T1HOfO7FbhaF2w1YVxRtCj6u7nal0AjNzSK3NAoRpfdxTHHDvZ4F7HH+x5rUx9hbdrDpIR70ds/mT7+yXQPTkBrp3+9o0iWXs4OjuDs4AjACia4xbnXDnlhdTi/lfQpAEmmhzNDg6JGYTJnN5V1p6P2vAqot4KZTXN9FWuamFfRXKsiLyJrLYV7gxnmR5qI+7rozbqDdF1/BXgPWAZMAC41DOObcZWi5ZwQfQLVzJw5k02bNjFv3jw8ntZ1IM6dO5eXXnqJBQsW1AsTXc3hw4cZP34869evJzk5udaxj/bs5tdbNxDIzODA5VMZH4zwWnEDI1/agBYuxlO+BG/Zu3grPqIyqPHazu/w7Nb/44NdpyOlYOyAAN8cX8XXR1WRmWw9Y8XbnSx9KI3NbybhyTAZ/6NyLvmll/JA53ATNER7muBVWhH7PEvY432PA56PiGh+3GYG+f6L6O2fTK/Ahbhl+0e1dSZXy0GtuFbIiw2uXZjCREjBkHCfqPtobHAIvSK5naaFlMgylMARQb2+iYMt6KuIxSHBiWzTuujt6hPQdT0Za6H3QcAW4GnDMOKjTVrPCTE6CBKzngBYawp069YN0zT55S9/SUpKCjNnzqyXx0d7dnPP1g2EMzM476KL4r52gmnCsm1uXvvMzVtfJFERdNEvfRffHTKPbw8x6N2rJ/7UqfjTLqFkf3c+eSSNDa8l4UqSjL65gnE/LMebITuVAmuIeMkXFlUc8HxkuY08iwk4StCkm+6Bs+njn0If/2SSzfpLRB5PGRNBuahitWsrGzP38GF4NZ+7t1Ch+QHoHsmOGoSxwSEMDfWtF2X2eNEZyrChVsXvMrz10gkpW12pa68RcADTgFxgDnCmYRgrW3z1+HJCzBNI5HoCv/jFL1i5ciWhUIjzzjuP++67L3qNWD7es5vXtmzkAqeHKy+8KG73tv2Qk1eWJ7FwRRIHSp2keU0uH13FtPFVjO1fiadqJd5yy21UfiDCey/8ls/e+T6aUzL6uwWMm+EkOacmbEFnePmaIhHymUQ47F7FHu977PG+R5lzl3Wt4EjbbTSFzPDgFteSO3sZQo2MYSJsdu625ivYk9kOOiwXbIq9TOfY4BDGBoYyKnRqq5fpbK98nY3aS+FaoWDashpie43AS8CHwHcNwzhb1/UlhmFc3OKrx5cTwgh0BhobHdQWSso13lhlzeJds9uNQ5OcPzTAN8dXMvl0vx0PvYaKwxqfPZ7KF88lgSkZ/41XmKrfTobvICH3IPxpU/CnTSHkHYkvt1unfPmqSbRykEiOOLey1/see7yLKHJbE/nSwn3p459Mb/8UugXHojVRQ+6sCiyWpmTc7zgcna+w0r2ZTc7dSCHRpMaw0CmMCw6NjkTqaSame7SzlmGvnunRpXBvMB386PDRNrXo2xtFNNcwjCd0XddbfWVFh9HeRnUgBP9d7+XV5Un8d72XsCkYnh/i3muOcuWYKrpl1A9CVlUqWPFEKp//M4VwUDBiWiXn/KycjN7n4w+9gSxbRFLZu6QWP0Fa8eNEnHlw5HI8rgsIJJ8NWsdPvDreCARZ4cFklQ/m9PKfUqkdYq93MXu877EpZR4bUp/EE8mid+ASevun0CtwPs7jVDs+XvSK5HJVVS5XVZ0HwDFRwWr31ujs5peS/8szqW8DkB/Ojc5VGBccyuBw7zavQnciMDwYiS6FOyw7h6IELIfbEiNQqOv6tUCSrutXEa/lbBSdDilh9S4Xr36WzL8/T+JIpUa39Ag3X1TBNeMqGZbf8HJ2gTLBqqdTWPGPVILlgmFXVnHOz8vIHlAzoiLi6kVl9k1UZt+EiBzBW/6+Ndro8MvkRJ7G1FIJpFxojTZKvRDpyDhet92pSDa7M7jyegZXXk9IlLPf80F0tNH2ZAOH9NIzMJHe/in09l9CUoJqxh1JukzhgsCZXBA4E4AQYTa6dkVbC8s863k9+WMA0sxkRgdPjQ5PPTM0iGRZ3z16orKoqJHl7+JIS4zA94CbgdVAPlB/RlMD6Lo+FXgMq1L6tGEYD9Q53gd4Fsi009xpGMbbLRddES/2Fjt4bYU1i3dnoROvy+TSkX6+Ob6q9qpcdQhVCVbPS2b57FSqSh2cemkV595eRu7Qptc+lY5MqjKupirjanzZqZTtecMefrqIpLI3kTgJpJyNP3UK/rTJmK6eCbjrzo9LptLXfzl9/ZdjEuKQ+zP22gZhb+ZikIJuwTEM0a4ix3EuGZFOPXq7zbhwckZoIGeEBjK94vLoMp01LqRNPJj+EgBO6WBEqF/UfTQ2OIQ8M7uZK3RtWmIEhhiG8biu692AG4G+wOamTrA7k2cDlwD7gJW6rv/bMIyNMcl+AxiGYczRdX0Y8LadtyIOOA4exPGHP5CWlETS0aNU1VmXuKxK8PYaL698lsyn2yw3zIRBAWZMKeOykf4mV+UKB2DtCyl8+tdUKg476Hehn4m/LKHHGfVXMGsWzUsgdRKB1Ekc7f4ArqrVeMsX4S17l8yCX0PBrwl6z7Ain6ZOJewZ3O7IpyciGi56BifSMziRccdmUuLcYBmEpHf5yHk35EFGaBB9/FPo7Z9MbujMThfoLl7ELtP5zaoLgJplOqsnsj2fsoinU63BFKeE86Luo7HBIQwK56OdpGXTFlpiBP4CTAJmYnUQz8WaL9AU44DthmHsANB1/WXgCiDWCEiguus7A6jf46toM7vdvfjdqfcgZQpP/fEpRgFlV1zN0i0eXl2exDtrvPhDGv27hbnj8mNcM66K/JzGY8gDREKw/pVkPnkklbIDTnpPCHDlk6Xkj4vT0FqhEUoeQyh5DGXd7sYZsCOflr9L+uEHST/8oB35dLId+XRMs5FPT0YEgpzwCHLKRzCy/DZcvgq+rFrAHu97rE+dw7q0x0mKdKO3/xJ6+yfTI3AuTk4eF0lDZMpUJgVGMykwGqhZprPaKHzoWcNryR9aac3UmGU6h3BGcGBHit7htMQIJOu67gE8hmG8pOv6j1pwTi9gb8z2PmB8nTT3AYt0XZ8BpAANjjjSdf0HWPMUMAwDn6++D7SqqqoFInUtqucCBzUXXx/1OpcbS1j6eQ8OlgqyUiQ3XGjynfNDjB0oEcILTSgJMwJfLNBYfL+D4h2CPuNMvvXPEAMvFAjRvlj0Tqezwf/UwgecBdxLMHgQrfgttOJ/k1I6j9SSJ5FOH2bO1zBzLkdmTgJHciP5JEq+zoHT2Z2JSXcAd+APlbJDe5ft4k12Jr/B1pQXcMkU+srJDDQvp795KUkcf/dIR5RjT3pEZyrLiGR7ZB+fauv5VKzjE8+X/Nf7OWAt0zmawZyVN4KzzdM5W55GLh0TDLApElWGLTECzwNvAPfquu4FdrbgnIba63X9C98C5hmG8Rdd1ycA83VdH2EYRq1hJ4ZhPAk8WZ1HQ8O4wuGmfdB1eXxkHhWH69cgU3Ij/GRNQavyaoj2hJIGuOuuu1i5ciVCCFwuF3fddRcTJ04ErFnCP/3pT9m7dy9er5c///nPjBo1qkl5/I4kXsn6OpdmFHL/NBcXDffjsSMXFDcRJUOasPUdLx8/lEbxVifdhoW4Zt4xBlxsxfRv6tyW0vKheS5wXQndr0TkluOp+MDuWH4dV8GzmMJLIOUCKxR26iWYzvgous46dDCWujJ24xK6cQnjCXDQs4w93nfZ613MNufrCOkgLzjedhtNIS3S+oCA8ZCxI8giicsYy2VYK7JVL9O50r2JNSlf8XftNR51LACgX7hHrYlsA+xlOjuSOASQa5BmjYBhGLOx/PvV3NiCa+4DYp+ufOq7e74PTLWv8altYHxAYQvybxcNGYCm9rcWIQS33HJLrVDSs2bNqhVKeu3atSxbtowZM2bg8XhqBZa76667SE+3atgbNmzg2muvZd26dQgh+OMf/8j48eN56aWXWLFiBTNmzGDp0qWI5vzkQvD2Y90JfjKG8u9/H/+ll4Kr4Rg2UsKO9z18/Oc0Cta7yR4Y4oonShj8teMf1rlB+Ryp+NMvx59+OcgQ7spP7U7ld0kqfxeJRjB56kvP+AAAIABJREFUvL0+whQi7lM6WuQOwYGH/MCF5AcuRB79I0WutdH5CCsy7mVFxr1khYbaM5ankh0a0eGK7ngSu0ynz+NjX9EB1rm/sjqbXZtZ5FnJguT3rbSRdMYEBzM2ZBmF04MD8ZygMaDqkqgwfyuBQbqu9wP2A9cB/69Omj1YfQ3zdF0fiuWPONzeCy+5J53CjW3/c178Zk6jx7oNC3HxzOYD1GVlZUUNAMCoUaN47rnnAJg9ezZfffUV8+fPx+128+KLLzJjxgyeeuqpaCjpagMA1kS4WAX/5ptvsnz5cgDGjRuHx+Nh7dq1jBw5skmZvCLE0fvuI2XuXLJvuYVI9+5U3HADld/5DmZMBNLdS9189Od0DnzuJvOUMF97rJRhV1WhdVbXu3ARTDmPYMp5HMu7H5d/Pd7yd/GWvUdG4e/IKPwdIc9Qa4Ja6lRC3hFdsmPZCnR3JrmhMxlVdifHHDujkU+/TP0ra9MeJSXck94Ba8ZyXuAsHF1spV8vbnuo6VDAXqbTuT86X2GlezOLkqxgCR7p4vTggOhEtjHBIWTHIQ5UR9CoEdB1PcswjNK2ZGoYRljX9Z9gBZ5zAM8YhrFB1/WZwCrDMP4N3A48pev6z7FcRTd24IplCaOtoaQffPBBXn/9dY4ePcpTTz2FEIKSkhKklLXCRvfq1YsDBw40aQSStBDPzjhKxeDpVHzve3jef5+UZ54h/U9/Iu3RR6m86iq2jPsZ/3ttJLs/8ZDWI8KUPx3htGsrcZxIlR0hCCWdRijpNMpyf4kjuNvuWH6P1KK/klb0KGFnT3vG8mSCyRNAnEg3GD/SI/0YUfEjRlT8CL9WzF470N22pJfZnDIPl5lOvv8i+vgn0ytwUVwC3Z1oCAQDw/kMDOfzrUqry7J6mc5qo/Bk6pvMFlZI90Gh/P/f3n2HR1Xm//9/npnJZErKECChIwqKKxakyIoKKE0FYsE7iGLFspbP6ioqLrqrroJ11d+6uH4pFlzhFsEgIsWCXQFFXSsiIM0kQEibksnMOb8/zhADJJCEmcwhuR/X5SVJTmZenJB5z7nPfb/v6oVs/cI9E7JNZyLU2TZCCPEs0Ar4GVgOfCKlbNjge/zFpW3Ewx3rHh+7c1t8JyndfffdFBQUMGPGDGy2ho+lfPTRR0ydOpWFCxdSUVFB//79Wb9+ffXXJ0yYwMUXX8w555yz1/d9+MVm7p6+Dt3h45FbT621OZ5j3TpKHn2Hd5cO5Ad9JGkpxZx2wY8cd38nHGlN0wu+qcaKbZFiUitW4CpfTqp/JTYjhG7LJJR2VmxLzSEY9v13y7LCWPbBxDNjRAuy3fkhW2Ib5oTsu7AZKbFGd8PpHBqOV2/4ug2rn8fG5gtSydfO9dUtL9Y4f6TUZi7wahPN3Gt1c6+qboe0TWeTbyojpbweQAjRA3O+/7WxTWU+BxZIKbc2Kk0Lcv/997Nx40aef/75RhUAgNNOO43y8nJ+/PFHTjjhBMC88bznamDbtm213vTplBVlykVRHI7SWgvAjp8cfPTYKaxbMhhXRpRhfZcz5Meb8M77mchHHQlccQX+iy/GqKON9eFGd2QR9OUR9OWh6UFS/R+YeyxXLMdTtgBDc1LpOY1Q+gi8u2eRUvlT9ffuObvh1OPYeeTy2p+gmXAYbrpUDqdL5XD00ig7nF/Eho2W8Znvr3zGX2kdPpEu1Y3ueh4W73YTxU0qA8LHMSB8HLD3Np17pqe+5TaHb12Gk5PC3atXN/cJH9PgbToToT43hn/GvBr4d2wR2ClAe8ybv4clb9tonbOD4mXatGl88803vPTSSw3aS8AwDH755Re6dzfnLn/99dfs2rWrejP5UaNG8eKLL3LLLbewatUqQqFQdXGoj90b7Xz0RDrfL3Tj9BoMvK2MfhP9pGb0ojT6DpUrVuCdOZOMBx8k7fHHCV54If6rriLSs2fDToCFGTZ3dRM7jAjO4JrqDXN8BXdiYE6x1WpMaDNwmusSWhAbdnLC/ckJ96dv2RRKHevZ7FrKZtcy1mY8ytqMR0mPdK3ufGo2urPmbmJNxYaNYyJdOCbShUsD5hBwoa14ryuFf6ctJBrbY+GYyN7bdHaOZh+wqBbainky7VW+cP7E8p1PxCVzs9tj2AoOpZW0ruuMHTuWkpIS7HY7LpeLSZMmccYZZnOtoqIibr75ZrZu3Yrb7Wbq1Kn069dvv8f56oPNzPnrOuyVPrK2jcGdFaXHyBD/m+fBnmLQ52o/p1xfgTur9p+/44cf8M6ahWfBArRQiMqBA/FffTWhoUPBHr+7xJYaJjAMHJU/4S6dT1rxs/sUAQdFR75LNNV6rRmScQ4DtkK2uFawxbWM7akfoWthUqOt6FR5Fl1CI+lQOYgU4/d1G5b6OdeiKfMFtBBrU36ubnnxhXMd5TZzb4CcaKu9VjcfV9UNB3YKbcU82/YNXrC9hYFOWIuwbfuCej9ni9poXjHtWwQA7E6Dkyb4GXBTBWnZ+3cBrY1WXIz3lVfwPP88ju3biXTpgv+KKwiMG4eReehN3qz64pDx22S8Ja+gUYWB+dtjaC6C6ecQ8OUR9pyKJebLkvxzaDa6W8lm1zK2ut4lbCvBbrhoX3labD3CMDpnHWvJn/MeyTyHe7bp3DN8tNr5I1sd5kRJl+7Ep6exw16CDY0q7ffRiiYrAkKIkVLKpUKI7sCtwDwp5Qf1fvb4UkWgnmorAn9aVUBGx/q9+O8nEsG1dCneWbNI/fxzdI+H4EUX4b/ySiI9ejQ6Z7JfwOpiqyok55c/ohmVGJqLXR1n4PavwF36Oja9lEhKZwKZgqBPEE3plNSsVjqHOlUUOj+v7nzqd2wFQ6ODMYD2FWfSJTiczKj12jRY6RwCbLftZLXzR+7LmE2hfXetL9/xKgL1eStze+z/d2OuHn683s+sWEqjCwCAw0Fo1Ch2LVhA0bJlhEaNwvPKK2QPHkzW+PGkvv22uedkM6Gn5ODPzMPAhj8zj3D6EErbPURBjy/Y3eEZIs5upO98guz1A2i9eRzu0oWgq/YlNlJoHz6NU8ruZ2zRZ4wpWs5J5bcRIcgXGQ+yMGcQC9qewZr0hyhKWYNB8/k3E08d9Dbkhk5j6c7HuNw/klTDidNIzP2W+hSB9Fjb56iU8lMg8Q2uFUuL9OpFyT//SeHq1ZTdcQcpP/1E68svJ/v00/HOnIlWXp7siHFR0eYWjIxTqWh7y++ftLkJZp5HcZdXKOr+GeVtbsMe3kSr7TfR7ueTyfxtMinBr81l1y2chkZW5DhOqriVyyKfM7ZwFaeU/AOv3oHv0v7Dkra5zMs5mY8zJ7El9W0ihJId2XKy9VY8VHYtnxVO50p9FK4EFIP6DAedB1wA/ANzle/fpZR3xTVF/anhoHqqbTgo3msgqlVV4VqyhLSZM3F+8QW610sgLw//lVcSPfLIA36r1S7D91WvfIaOM/ApnpK5uMuXoBkhqlJ7EsjMI5h5Ibqj7lXoTZYxyfbNWKmVss31Lptdy9iW+h5VtgocuoeOlYPpHBpOp9BZuIyma3R3uJzD74t/5sm0V1nj/LFBs4MOdXvJbpgbvux5BUlWAVAOQTynv+4nJYVQbi6h3FxSvvoK78yZeF96ibRZswideSb+q6+m8owzoJFrJSxPsxH2DiTsHUhp9EHcZfl4SuaRWXQfGUUPEkofTiBTUJk2BLSWPYVyj1QjkyOD53Nk8HyiVFKQ+ml1G4tf3UvQDDvZ4f6xvkYjSI92SXZkS9hzZRBP9bkSyAUuwuz5/wYwX0pZHNcU9aeuBOpp48aNrIvTRvONYSsqwjNnDt4XX8S+YwdV3bvjv/JKghddhOH1Vh9n9Xdgh5LPUfkTnpJ5uEvnY4/uImrPJuC7iECmIJoav5ujVj+HUP+MBjq7Ur5hc6zRXUmKuX9Vq6pjY+sRRtK66vi4L1BrTuewNnGZIiqEyAKexez8uQx4Skr5UaMSNZ4qAvWU7CJQLRzGvXgx3pkzcX71FXpGxu9DRV27Wv6XLy75jCpcFe/iLpmLq+IdNKKE3X0J+MYRTB9da7uKJs+YYI3NWGbfVN35tMi5CkPT8UTbx1pYjKBd5R/j0uiuOZ9DOMTZQUKIs4UQLwDPAyuALpibvDzUqDRWETXwTg+S06sE77NBiMbvRl5xcTETJkzg9NNPZ+jQoUycOJFd+zTff+ihh+jatStPPLH/uN7kyZMZOnQow4YN45xzzuHDDz+s/tqzzz7L6aefTqdOnVixYkXcMieM00nwggvYuXgxOxYtInTmmXhnzyZ74EBaXXkl2rvvNv+bqFoKofQR7O48m8IeayjNnoIWLcH32+3k/HwSvu234Ax81vzPQyNkRI/gOP91nL3rNfIKv+a03f+kTfhE1rslK1qPZ267E3jfdwMbXPmEtYN3+FX2V58ByuOBu6WU22p+UghRrw3nrci+IUqr6/04NkSxBSH9sRDuBVXsftZL9MhDXw2byP0EBgwYwIgRI5g0adIh52xSmkZVnz6U9OlD2ZQpeF96Cc+cOdjPPpu2xxyD/6qrCF54IYbbneykCaU7svG3/hP+rOtJCX1pDheV5eMpfZVIyhEEfHkEMseipzS8SVtz59Kz6B4UdA8KIgT5LfVDNruWs8W1go2e/Fijuz9W77PcmEZ3LVF97gl0BSYDacDlwJVSyhlNkK02Bx0Oyrg3QMr3B74J6lwdgcje10YGgAPC/equi1V/sFN2f8O3MHzzzTd58cUXmTdvHs888wzffvstTz31FE6nk1AoxM0330z//v2r9xOo6bPPPuOaa67hm2++2WtfgbFjx3LdddcxbNiwWp/TMsNBBxIKkf3ee+hPPonz22/RfT4CF1+M/4oriHZK7gKsPZpimEDTA7jKl+ApmUtq4FMMbFR6BxHw5RFKGw62A/eeau5DGQejE2WH88vYsNEyyhwbAGgdPqF62KhV5NgD3kdo7ufwUGcHzQRuAv4tpYwKIS4GklUE4sLwgG2fK0cN0OO/RW1c9xNodlwu9AkT2DlyJM7Vq81ZRc89h/c//yE0ciT+q64iPGBAs98ExrB5CGaOJZg5Fnv4VzylEneJJGvb9eh2H4GMCwj48oi4eiU7qiWZje76kRPuR9+yKZQ41scKwlLWpj/O2ozHSIt0qS4IOeH+Lb7RXU31ORN2KeWPQog9H1t6nl993qm7X6skc3IAW41lb7oXSv/hIXhh/Tt+1seUKVPwer1ceeWVDfq+SZMmMWnSJD766CMefPBBFi5ciNPZTHd60jTC/fsT7t8f27ZteF98Ee+cObiXLKHq2GPxX301gfPOg2Y+VAQQdXalvO0kytv8hVT/R3hK5+IteZm03bMIp/YybyZnnodhbx4tvhPBF+mOr6I7x1fcSMBWxFbXCja7lvGj9yW+T5tBqu6jU6hmozvvwR+0GavPC/q7sQ1mOgghnsK8OXxYCw1zgn2fd5d2zfx8HO3ZT2D69Olx2U+gJdA7dqR88mQK1qyh5LHHAPDdfjs5/fqRPnUqtm3bDvIIzYRmpzJtELs7TqegxxeU5DwImoavcArtfj6ZVluvJ7ViJRgJXP/RDHj0bI4OXMLQ4he5uOB/DC5+jk6hs9jieof3sq7hlXbH83bWZXxjm0nAlvDtzS2pPvsJPCCE6AW8A/yEuWr4sGZkaBT84EvocyRqP4EWw+0mcPHFBMaNw/npp3hnzSLt3/8mbfp0QmefjX/iRMJ9+zb7oSIAw96KQNYVBLKuwBH6LnYzeQHu8jeIOtqD/3LsztFEnUckO6qlpRhejgidyxGhc9GJUOhcxWbXUra4lrPccQPkaLStOjl2Y3kEvoj1Gt0lwoG2l3wVuERKGa7xuWOAl6SU/Zso374Oi3UCidxPYPr06cyYMYPi4mK8Xi+pqamsXLlyv7//YXFjmIbd7LJv2YL3hRfw/Pe/2EpLCR9/vDmraMwYiJ3nZOZrUnolrooVeErmkepfiYZOpeePBDIFoYxRGLYE3OA6BJY9j5gbytO2gG+Cc9nsWs4u5zcAZESOpEtwJF1Cw2lTdTI24rePRmM0+WIxIcSFwJ+AsVLKEiHECOAB4DIpZbLGJg6LImAFzbEI7KEFArhfew3vrFmkrFtHtHVrAhMm4J8wAb1du6Tna2pt0isJbXwOT8lcHFWb0G1pBDPGEMjMo8rdxxJXS1Y/jzXz+W3b2BzbMOe31E8wtAiuaBs6h4bRJTSc9pWn46Dp708lZcWwEOJU4FHMFcKnAuOklCX1eVIhxEjgKcAOzJBSTqvlGAH8HXOG5tdSyvEHeVhVBOqpOReBaoaB88MPSZs1y2xlbbcTHDUK/9VXU3XyycnP10SqMxoGzuBqPCVzcZW9gc0IUOXsTtCXRyDjQvSUnORntKi68oW1Mramvsdm99JYo7tyHLqbDpWD6RIaTqfQ0CZrdJeMK4EHMF+cewLDgelAFYCU8t4DPWFsL+J1mBvUbwVWAxdLKb+vcUwPQAJnSil3CyGypZQHuzOjikA9tYgiUIN90ya8zz+PZ+5cbOXlhHv3NoeKRo2CQ5hVZfUXL6g9oxatwFW+2BwuCq7CwE5l2pkEMvMIpZ8FWtPONLP6eaxPvihhClI/rV6PELAXoBm2fRrddU1qxro0dp3A27H/vwM808Dn7A+sl1JuABBCzAVyge9rHHMN8IyUcjdAPQqAotQpesQRlP3975Tffjvu+fPxzppFq5tvJuOBB/BPmEBgwgT0tm2THbPJGPY0gr5xBH3jsFf+gqdU4il9layKFUTtrQlmXkggM4+Iq2eyox427DjpWDmIjpWDOKX0wepGd1tcy1mdeR+rM+/DV9WTLqHhdAmNoHXViXFvdJcICdljWAgxFhgppZwY+3gCcIqU8qYax7yOebUwEHPI6O9SyqW1PNa1mL2KkFL2CYfD+x7Cli1bcLeAOeQN8euvv7Ju3ToyMjI499xzkx2nTg6Hg0gkEv8H1nW0t9/G/q9/YVu2DMPpRL/oIvQbb8To0yf5+eKo3hmNCNruFdgLXkArXoxmVKGn9UVvdzl6WwGOxM2Ys/p5PNR8JWxgve0N1tsWs037CEPTSTM6cpR+Lt310XQxBh9yo7tDyRhbY9ToFcONUduT7VttHEAPYDDQCfhQCNFr33sOUsrngOf2PEZtl0NW/seVLLquo+s6FRUVh/1leKOdfDLMmoX9l1/MoaJ580h5+WXCfftScdVVhM45B1JSkpcvThqWsR9k98OWdT/usgV4SuaSsv5mjF8mEUw/m4Avj7BnIGjxXRNq9fN46PkyOIJLOIJLCGnFbHW9w2bXMr5LfYmvU54jRU+jY+WZdAmNoGNoCKlGZpNmjA0H1SpRRWAr0LnGx52AfQfztwKfSSmrgI1CiJ8wi8LqBGVSWqjoUUdR9sADlN9xB5558/DOnk3WDTcQbdcO/2WXEbj0UvTWid39y2p0Rxb+rIn4W11NSuh/eErn4i59HU/ZQiIpnWK7ol1E1Nn54A+m7MVlZNE9eBHdgxfFGt19FGt0t5xN7kVohoN24T/SJTiCLqHhePWOSc2bqBYQq4EeQohuQggnMA5YtM8xrwNDAIQQbYCjgQ0JylMrW+EOWl94Dbai+L5DSVQraV3Xueaaa6ofd9y4cWzatCmu2ZszIz0d/8SJFH34IbteeIGqY44h45FHyOnXD99f/oLj22+THbHpaRpV7hMobfcQBT2+pLjDv4k4jyR95xPk/DKA1r/m4S5dCHow2UkPSw7cdK4cxsDSR8krXMs5O/I5ruJa/LbtfO6bwqvt+vNGm5F8lfZPih3fmWsWmlhC7gkACCHOAZ7EHO+fJaV8UAhxP7BGSrlICKEBj2NuUhMFHpRSzj3Iw8Z1dlDG5Kl4X3oN/4QLKZs6ucHfX5fdu3fzww8/7NVKuqSkZK9W0qtXr+aJJ57g5ptv5qyzztqrsVxZWVmtraQNw+Dtt99m6NCh2Gw2Zs+ezVtvvYWUcr8MLW12UGM51q3DO3s27ldfxRYMUjlgAP6rrkILBkl/5BHs27cT7dCB8rvuInjBBUnLeSCJOIf2qm24SySeUomjajO6LYNgRi4B3ziqXCc2eO1Bsn/OB5OMfKX29Wx2L2ezayk7Ur4EzSAt0jm2g9oIcsKnVDe6C9gK+ST7/zi16P/Do2c3+LnisrOYRdSjlfRjpHy/7oAP4vzsS7Ra/t6GphEeUPf88qo/HE3Z/bc3MHJiWkkDfPPNN/zpT3/i448/3u/7VBFoGK2kpHqoyLFlC4am7fVvRHe7KX3kEUsWgoSeQ0PHGfjUXHtQvgSbEaIqtWdsuOhCdEf9htGs8nOuS7LzBW072BJrdLc99UN0rRKn7qNTyLyPsC11JT975nGMfwJ/LGv4fl6H2kq62QmffDyOX7dgKy5B0w0Mm4ae5SPSNf7jn4lsJT179uw69xNQGsbw+fBfdx3+iRPJ6d0b+z7Dd7ZgkPSpUy1ZBBJKsxH2DiTsHYgWfRB32SI8JXPJLLqPjKIHCaUPI5CZR2XaENBa5MtJXLj1thwdGM/RgfFUaX62p77PZtcytrreZoNngTmtRoP13nmcWHFLo64G6tLsrgTqK/Ouh/DMWQDOFAhXxX1IaI+7776bgoICZsyY0ahOoh999BFTp07dr5X09OnTefPNN3n11VdrnR6rrgQar32nTrVfKQLBCy4gOGYMlYMGHdIitHhKxjl0VK4zG9mVzsce3UnUnk3AN5ZgZh6R1P0br1nx51yTVfPpRHiv1TVscb0Nmo7NcNLDf3GDrwYOaY/h5sq2sxj/ZWPZ8cYL+C8bi33HroN/UwMlqpX07NmzWbhwIS+99JJaH5EA0Tqm0xkeD65336X1FVfQ7qSTyLz9dlI/+ABa4BTlSOrRlOXcQ2GPNRR3mkXY3Zu0Xf8he8Mg2mzKxVPyClq0ItkxD3sh2y62u94HTQdA18Ks986La9vrFnv9tnvGY9V/Lnvorrg/fqJaSc+ZM4c5c+YgpaRVK7WxSCKU33UXmXfcgS34+4wY3e2m9OGHCY4aReqHH+LOz8f9xht4X3mFaOvWhEaNIpibS7hfP2hkwT8saSmE0kcQSh+BLVKEu9Rce+D77XYyCu4hlDGKgG8cGNZdsGhlX6c9ud+MIQOdr9OebNS9gdq02OGgREpUK+mKigp69uxJp06dqv++qampLF68eL/HUcNBh8a9YAHp06YdeHZQKITrvfdw5+eTumIFtlCIaLt2BMeMIZibS9WJDZ9F0xiWO4eGQUpoLZ6SubjL8rHpFRiuoyhPv5BA5kXoKdbbAN5y5zAmv81wdju/2+/zrcLHkbtzeb0fp0XNDlJMqgjER33zaX4/rhUrcOXn43rvPbSqKiJduxIcPZpgbi6RY49NWEGw8jnU9CCu8iVk+hdgK12JgY1K7yACPkEobQTY4ruda2NZ+RzukcyN5hVFOQjD6yV43nkEzzsPraQE17JluPPzSZs+nfR//YuqHj0I5uYSHD2aaPeWsWMVgGFzE8y8EO9R17F7+xd4SiXuEknWtj+h230EMi4g4Msj4uqV7KgtVgsavFSUpmH4fATz8ij+738pXLuWkqlT0du0If3xx8kZNIg2I0aQ9swz2LdsSXbUJhV1dqW87SSKun/Grs6vEPIOwlvyMtkbR9B2w3C8xbPQIsXJjtniqCLQTB1mw3zNlt66NYHLLmPX/PkUrl5N6d//DikpZDz0EDkDBtBm9Gi8M2ZgKyhIdtSmo9mpTDuDko7/pqDHl5TkPIih2cksvId26/vQauv1pFa8B0Y02UlbhGZRBAzDUC96NRiGQTSqfoGsRm/fHv8117Bz8WIKP/2UsrvvRqusJPNvfyOnb19ajx2L58UXse2K/3RlqzLsPgJZV7Cz21sUdVuO3zcBZ+AjWm+5lJz1p5Be9DD28MZkx2zWmkURcLlclJWVJTuGZZSUlLAlNtRQ20pjJfmiXbpQceON7Fi+nKL336f8L3/BtmMHvsmTyendm6xLLsE9bx5aaWmyozaZiOs4ytrdT2H3Lyju+BxVqceStutf5PxyGq1/vRB3iUTTA8mO2ew0i9lBANu2bSMYDGK325ssjM1mQ9f1Jnu++jAMg+LiYrZs2YLD4aBr166ccMIJyY5VJ6vPymjSfIaB4/vvcS9ahHvRIhybN2M4nYSGDCGYm0vlsGEYHk9yMzZSYzPaqn7DU/oantK5OMIb0W1eguljCPjyqHL3jduMq+Z8DqEFTBHd49tvv2XDhg1NNhTi8XgIBKz5zsRms9GzZ0+OPvpoS18NWP2XL2n5DIOUr76qXpRmLyhAd7upHDaM4JgxhIYMgdgaFKufQ4hDRsPAGVyNu2Qe7rJF2IwAVc6jCPrGEci4ED0lJ7n5moAqAqYDFoGmZvV/OFbPB9bPaIl8uo5z1Src+fm43nwT+65d6OnphEaMIJibS/r557PT4sNG8TyPmu7HVbYYT8lcUoOrMLBTmTaEQOY4Qulngdbwnk6W+DkfhFonoCgtlc1GeMAAwgMGUPrAA6R+8gmu/Hzcb72FZ/58jKwsMs8+22xbMWAANOGQaDIYNi9BXx5BXx72yl/wlEo8pfPJqphI1N6aYOYFBDLHEXH1THbUw4K6EjgEVn/3YPV8YP2Mls5XWUnq++/jW7YMbdEibIEA0exsgqNGERwzhqo+fSzTxyjh59GIkOp/H0/JPFzly9GoIuw6iYAvj2BGLob9wHv6WvrnHKOuBBRF2VtqKpXDhxMdP55dW7aQ+s47uPPz8b78MmmzZhHp2JHQnj5GvXo1SR+jpNEcVKadRWXaWdgixbjLYo3sCiaTWXgfwfSzCfjyCHsGgmaNwmgVqggoSjNguN2ERo0iNGoUWnk5ruXLzYLw//4fadOnE+nWrbqxXeSYY5IdN6F0Rxb+rIn4W11NSuhbPKVzcZcuxFO2kEhKJ4KZgkCmIOqM/yZShyM1HHQIrH4JafXFQRtDAAAUnklEQVR8YP2MVs8HB86o7d6Ne+lS3Pn5OD/+GE3XqerZ0ywIY8YQ7dYt6RmbhB7CVbEMT8k8Uv0foGFQ6TmNgG8cwfSRtMnufFj/nA8mKbODhBAjgacwN5qfIaWcVsdxY4FXgX5SyjUHeVhVBBrA6vnA+hmtng/qn9G2YweuN980W1+vWgVA+MQTzSmno0cT7dgx6Rmbgr1qG+4SiadU4qjajG7LwMjOo9h1HlWupmn/3RiJKgIJGRwTQtiBZ4CzgT8AFwsh/lDLcenA/wGfJyKHoii/09u2JXDFFexauJCCVasovece0DQyH3iAnP79aX3eeXhmz8ZWFL9dq6womtKRira3UnTUx+zs8iqh9GHYiubQdtO5tN14Ft5d/8EWsUbBagqJukPSH1gvpdwgpQwDc4HcWo57AHgECCUoh6IotdA7dsR//fXsfPNNCj/+mLI778RWUYFvyhRy+vShdV4env/+F2337mRHTRzNRth7KiUdnqbqlF8pafcIhs1LZtH95Pzch1ZbJ5JavhyM5r19aKJuDHcEavbJ3QqcUvMAIURvoLOUcrEQ4va6HkgIcS1wLYCUkjZt2iQgbuM4HA5L5dmX1fOB9TNaPR/EIWObNtC3L8b991P1/ffYXn0Vp5SkTppE5uTJGEOHoguBPno0ZGQkJ2OCORwOPD3+DPyZsP8H7IUv4Cr6L+7ytzBS2qHnXEI05zLwJG/tQaLOYaKKQG1jT9U3H4QQNuCfwBUHeyAp5XPAc3sewyrjimCtcc7aWD0fWD+j1fNBnDNmZ8ONN8INN+D47juzbUV+Po6lSzFSUwmddRbBMWOoHDoUw+1OTsYE2DtfW8i4HdL/TGrFe3hK5uLa+iT2rY8TdvchkDmOYMZoDHvT7mYYh3sCtUrUcNBWoOb8q05AzTu66UAvYKUQYhMwAFgkhOiboDyKojSEphHp1Yvyv/6Vos8/Z0d+Pv5LL8W5Zg1Z119Pzgkn4LvxRlKXL4fKymSnTQwthcr04ezuPIvCHl9Qmn0PWrQcX8Ekcn7ujW/7n3H6P4XDa4blfhJ1JbAa6CGE6AZsA8YB4/d8UUpZClRf1wghVgK312N2kKIoTU3TqOrbl6q+fSn7299wfv55dR8jz+uvo2dkEIq1ragcOBAczW/5ke5oi7/19fizriMltBZPyTzcZfl4SucTSTmCgE8QyLwIPaXud9xWlZArASllBLgJWAb8YH5KfieEuF8IMSYRz6koShOw2wmfeiqlDz9M4dq17Jozh9CIEbiWLKH1+PHk9O5N5l134fz0U7BYm/W40DSq3CdT2v5hCnusZXeHp4mmdCBjxyPkrO9P1uZLcJUtAv3wuTpSi8UOweE1zmlNVs9o9XxgkYyhEK6VK801CCtWYAsGibZrZ/Yxys0lc9gwdlp4x7RDPYf28GY8pRJ3yTwcke3odh+BjPMJ+MYRcfVKekbVSjpBLPHLdwBWzwfWz2j1fGC9jFogQOqKFbgXLcL17rto4TBG165UjB5NcPRoIscdZ7kFWXE7h0aUVP/HuEvn4S5/C82opCr1OAK+cQQyzsNwZCUloyoCCWK1X759WT0fWD+j1fOBtTNqZWW4li4l86230N55By0apeqoowjl5pp9jLp3T3ZEIDHnUIuW4C59HU+pxBn6GkNzEkobTsA3jkrvGaA1rOX3YbViWFEUBcDIyCAoBJE33qDwq68omTYNPSeHtH/+k+xBg2g7bBhp//oX9s2bkx017gy7j0DWFezstoSibivwt7oMZ+BjWm+5lJz1/UkvmoY9vDHZMdWVwKGw8jswsH4+sH5Gq+eDwzOjraAAd6yPkfOLLwAI9+5tNrYbPRq9ffuk5ksYI4yr/G08pXNJrXgPDZ1KzwACmXmEMkZh2PbfQzoeGdVwUIJY/ZfP6vnA+hmtng8O/4z2LVtwv/EGrkWLcP7vfxiaRviUU8zGdueei94EK42TcQ5tVQV4SufjKZ2HI7wB3eYlmD6GgC+PKnff/e6bqCJgUkWgAayeD6yf0er5oHlltP/yC+5Fi3Dn55Py888YdjuVAwcSzM0lNHIkhs+X1HwJYRg4g2twl8zFXbYImxGgynkUwcw83KWvkRL+ab9vCacex84jl9f7KdQ9AUVRDgvRo46i4tZb2fHeexS9/TYVN9yA49dfaXXbbbQ76SSyrrgC98KFaH5/sqPGj6YR9vSjtMPjFB79FbvbP4Fub0PGjodwhH/C2Oe128BJ2B2/5grNb2mfoiiHP00jcuyxlB97LOV33knKN9+YfYwWLcK1YgW6y0Xl0KHmFcKQIdCAPkZWZti8BH15BH152Ct/wbt7Nt7ds/c+SLNR0faWuD2nKgKKolibplF14olUnXgiZVOm4FyzxmxbsXgx7sWL0dPSCA0fbratOOMMcDqTnTguoqlHUdbuH2BU4S2Zi0YEAyf+zDx0R3bcnkcVAUVRDh82G+H+/Qn370/pfffh/OQT3G+8gXvJEjwLFqD7fATPOYfgmDGETz0V7A2bi29FFW3+grf0VXNfgzhfBYC6J6AoyuHK4SB8xhmUPvooBWvXsuuFFwideSbu/HzajBtHTp8+ZEyZgnPVqsO6j5GekoM/Mw8DW9yvAkBdCSiK0hw4nVQOHUrl0KEQDOJ6913cixbhfeUV0mbPJtq+vbkGITeXqhNOsFzbioOpaHMLHn1D3K8CQBUBRVGaG7eb0LnnEjr3XLSKClwrVuDOz8c7axZp//kPkSOOqC4IkZ7J2ymsIfSUHCInvoOegGmsqggoitJsGWlpBM8/n+D556OVlOBauhR3fj5pzzxD+tNPU3X00QTHjIHLL4esxjd3O5ypewKKorQIhs9HcNw4il95hcIvv6TkoYfQs7JIf/xxnMcfT5uRI/FOn45969ZkR21SqggoitLi6G3aELj8cna99hqFq1YReeQRcDjI/Mc/yDnlFNqMGYN35kxshYXJjppwqggoitKi6R06oP/5z+xcvJjCTz6hbPJktGCQzHvvJadPH1pfdBGeOXOwFRcnO2pCqCKgKIoSE+3alYqbbmLHihUUrVxJxa23Yi8owHfnneT07k3WhAm4pUQrK0t21LhRRUBRFKUWkR49KL/tNoo++ICiZcuouO46HOvW0erWW2l34om0uvpqXPn5aIFAsqMeEjU7SFEU5UA0jUivXpT36kX55MmkfPml2en0jTdwL12K7nYTGj6c0JgxhAYPBpcr2YkbJGFFQAgxEngKsAMzpJTT9vn6X4CJQATYAVwlpfw1UXkURVEOmaZR1acPVX36UHbvvThXrTL7GL35Jp78fPT0dEIjR5p9jE47DVJSkp34oBIyHCSEsAPPAGcDfwAuFkL8YZ/D1gJ9pZQnAPOBRxKRRVEUJSHsdsJ//COl06ZR+OWX7Hr5ZULnnINr2TJaX3opOb17k3nnnTg/+QSi0WSnrVOirgT6A+ullBsAhBBzgVzg+z0HSCnfq3H8Z8ClCcqiKIqSWCkpVA4eTOXgwTB1Kqnvv2+2vn7tNbxz5hDNziY4ejTBMWOo6tPHUm0rElUEOgJbany8FTjlAMdfDbxV2xeEENcC1wJIKWnTBFvN1ZfD4bBUnn1ZPR9YP6PV84HKGA9xzzd+PIwfT8Tvx/bWW9ikxDtnDmkzZ2J07Yo+diy6EBgnnljvgpCoc5ioIlDb36rWfSyFEJcCfYFBtX1dSvkc8Nyex7DSNnpW39bP6vnA+hmtng9UxnhIaL7Bg2HwYLTyclzLluHOzyf1qaewP/44kSOP/L2P0dFHJyxjbHvJWiVqiuhWoHONjzsB+20OLIQYCvwVGCOlrExQFkVRlKQz0tMJjh1L8UsvUbB2LSWPPEK0fXvSnn6a7CFDaDt0KGlPP41906a9vs+9YAHZ/fuT4nKR3b8/7gUL4porUVcCq4EeQohuwDZgHDC+5gFCiN7Af4CRUsqiBOVQFEWxHCMri8AllxC45BJsRUW43nwTd34+GQ8/TMbDDxM+6SSCo0djpKSQMXUqtmAQAMe2bWTecQcAwQsuiEuWhFwJSCkjwE3AMuAH81PyOyHE/UKIMbHDHgXSgFeFEF8JIRYlIouiKIqV6dnZBK68kl2vv07hqlWU3nMP6DqZDzyA7957qwvAHrZgkPRp0+p4tIbTDKPWoXqrMrZv329UKWla9DhnnFg9o9XzgcoYD1bMZ9+4kezTTqv9Bqum8VsDup3G7gnUegdatY1QFEWxoGi3bkQ7dqz9awe40dtQqggoiqJYVPldd6G73Xt9Tne7Kb/rrrg9h+odpCiKYlF7bv6mT5uGfft2oh06UH7XXXG7KQyqCCiKolha8IILCF5wQcLuW6jhIEVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBYsYZvKCCFGAk8BdmCGlHLaPl9PBV4E+gC7gDwp5aZE5VEURVH2l5ArASGEHXgGOBv4A3CxEOIP+xx2NbBbStkd+CfwcCKyKIqiKHVL1HBQf2C9lHKDlDIMzAVy9zkmF3gh9uf5wFlCCC1BeRRFUZRaJGo4qCOwpcbHW4FT6jpGShkRQpQCrYG9NtEUQlwLXBs7jg4dOiQocuNYLc++rJ4PrJ/R6vlAZYwHq+eDxGRM1JVAbe/ojUYcg5TyOSllXyll39j3WOY/IcQXyc5wOOc7HDJaPZ/K2DLyxSljrRJVBLYCnWt83AnYXtcxQggHkAkUJyiPoiiKUotEDQetBnoIIboB24BxwPh9jlkEXA58CowF3pVS7ncloCiKoiROQq4EpJQR4CZgGfCD+Sn5nRDifiHEmNhhM4HWQoj1wF+AuxKRJcGeS3aAg7B6PrB+RqvnA5UxHqyeDxKUUTMM9eZbURSlpVIrhhVFUVowVQQURVFasIS1jWiuhBCdMdtdtAN04Dkp5VPJTVW72MrtNcA2KeWoZOepSQjhA2YAvTCnBl8lpfw0uan2JoS4FZiIme9/wJVSylCSM80CRgFFUspesc9lAfOAI4BNgJBS7rZQvkeB0UAY+AXzPJYkI19dGWt87XbgUaCtlHJnbd/fFOrKKIS4GfN+awR4U0p5x6E+l7oSaLgIcJuU8lhgAHBjLS0xrOLPmDfmregpYKmUsidwIhbLKYToCPwf0Df2S2jHnOWWbM8DI/f53F3AO1LKHsA7JHeSxfPsn28F0EtKeQKwDpjc1KH28Tz7Z9zzBm8YsLmpA9XiefbJKIQYgtlp4QQp5XHAY/F4IlUEGkhK+ZuU8svYn8sxX7w6JjfV/oQQnYBzMd9tW4oQIgM4A3OGGFLKcDLfGR6AA3DH1rF42H+tS5OTUn7A/utparZgeQE4r0lD1VBbPinl8tiMQYDPMNcNJU0d5xDMHmZ3UMui1aZWR8Y/AdOklJWxY4ri8VyqCBwCIcQRQG/g8yRHqc2TmP+g9WQHqcWRwA5gthBirRBihhDCm+xQNUkpt2G+09oM/AaUSimXJzdVnXKklL+B+SYFyE5yngO5Cngr2SH2FZu6vk1K+XWysxzA0cDpQojPhRDvCyH6xeNBVRFoJCFEGvAacIuUsizZeWoSQuwZS/wi2Vnq4ABOBqZLKXsDfiy2TkQI0QrzHXY3oAPgFUJcmtxUhzchxF8xh1NfTnaWmoQQHuCvwL3JznIQDqAV5jD0JEDGo+mmKgKNIIRIwSwAL0spFyQ7Ty0GAmOEEJswO7ieKYSYk9xIe9kKbJVS7rmCmo9ZFKxkKLBRSrlDSlkFLABOTXKmuhQKIdoDxP4fl2GCeBJCXI55o/MSC3YGOAqz2H8d+53pBHwphGiX1FT72woskFIaUspVmFf5bQ71QdXsoAaKVd6ZwA9SyieSnac2UsrJxG6+CSEGA7dLKS3zLlZKWSCE2CKEOEZK+RNwFvB9snPtYzMwIPYuMYiZcU1yI9VpTwuWabH/5yc3zt5iG0zdCQySUgaSnWdfUsr/UWMILVYI+iZzdlAdXgfOBFYKIY4GnOzTdbkx1IrhBhJCnAZ8iDllcM94+91SyiXJS1W3GkXAalNET8K8ae0ENmBOG0zKtMa6CCHuA/IwhzDWAhP33JRLYqZXgMGY7wALgb9hvjhIoAtm8bpISpmUZox15JsMpGLuIAjwmZTy+mTkg9ozSiln1vj6JpJcBOo4jy8Bs4CTMKfb3i6lfPdQn0sVAUVRlBZM3RNQFEVpwVQRUBRFacFUEVAURWnBVBFQFEVpwVQRUBRFacHUOgGlRRJCDMKcdmcDosA9UspPhBClwJdACmaLgw7AUCnllNj3/R1YKaVcWeOxPJhtOo6Ofd9zUsoXaKRYh9UzLboQUWlm1JWA0uIIIdoA9wHnSSkHYzZcC8a+/D8p5RDgNszeS/XxN+D92GOdBmw8xIg+4IJDfAxFqRd1JaC0ROcAc/b0fIp1g127zzFfUf9ul6dKKe+MPZYBfAAghHgac2FPGXAJZrPBoVLKKUKIK2LfuxJzAVAxZuuCXOBaYJgQYiXmwq8dDf8rKkr9qCKgtEQdMFd8I4QYD9yAuYr19hrHnAH81NgniHV49Eopz4g1nrueurvNtsLsVXQxcCHmhuJdrNTqQ2m+1HCQ0hL9hlkIkFL+F7iU3xtxHS+EeA+zMEwDQpgtD/Zw8fvQ0YEchXlvAcyeQ93Zu099ze6P30spdWAb5lCQojQZdSWgtERLgPlCCCmlLGXv34M99wQAEEKEgd5CiD1vmE4GHtnn8T4RQlwipXw51mBwIGY/pOGxr/fF3FaxFGgf+9zxwDexP+9bHKowdzJTlIRTVwJKixMbY78PyBdCvAv8G3Pf6NqO3YXZNvwDzMaB82tpznYfMCg2hv8xcFSs1W9QCPEhMB54FvNFv4MQYgnQ9gARC4AsIcT82P7BipIwqoGcoihKC6auBBRFUVowVQQURVFaMFUEFEVRWjBVBBRFUVowVQQURVFaMFUEFEVRWjBVBBRFUVqw/x+v9jbwaEwdHgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgUxdnAf9Vzz+wB7LKccggCIt7iTcCIYNTEiKHRqPGIaPyin0QTwqdGjRrERImJMRpjRMWL9oqJGkU0oIgHnkENN6tyLSwLuzv3THd9f3Tv7uzsNbvMusDW73nmma7qt6vf7umpt+qt6reElBKFQqFQdE+0rlZAoVAoFF2HMgIKhULRjVFGQKFQKLoxyggoFApFN0YZAYVCoejGKCOgUCgU3ZhuZwSEEDcLIdZ2tR5diRCiXAhxQ1fr0RLZ+gkhFgshHuyKc+9GOQ8LIRZlpNt87oQQE4QQUggxcHfPn4N+ebmnQoiLhBDpfOikaBshxBDnGTkxX2W681VQd0YI0QO4GTgFGALUAm8D10spV3adZvsMU4C9raK5mj27kbU33lNFJ7AnP6R7E/2AocCNwBHAGUAB8IYQomdXKrYvIKWsklLWdLUe7UFKWS2l3NnVerTE3nhPFZ3DPm0EhBA+IcR9QohqIcROIcR9gC9LRhNCzBZCbBdChIUQTwkhZtR1cYXNS0KI5UIIT8Yxi4QQbwsh3FLK/0opz5RSPiulXCWl/AD4IbZxaLHbJoRYKoS4LSP9a6erNzEjb4kQ4rcZ6VOc88aEEJuEEPOEECUZ+48QQvxLCLHNuZ7lQohT27hPE517dG0O99QjhJgrhNgohEgIIbYIIZ7KkpkmhPhQCBEXQuxw9OmZof9iIUSVc84lQoij2zhnI9dFXVoI8SshxFanrIeFEKEMmVZ/1zYIOOXXCCEqhRB3CCHq/yvNuYwc+cUZ6UbuoBau6yrnPkaFEK8Cg9pSLMdrF0KInwsh1gshkkKIdUKIGc2V04Fyb814tp4CmjRyWntGhf2f/FgI8fcM+YAQ4jMhxIK2rt+RL3f0qPtvbxNCXOmUfY+w/+ubhBBXZh13tRDiE0f3rc4z0S9jf5077hQhxJvO7/KFEGJyVjm/EUL819n/tRDifiFEcZbMuc59jwshlgkhzhBZbhwhxHAhxLNCiF2OzguFEAdnlaMLIdbWlQMckss9ahdSyn32A/we2AacCYwC7gRqgLUZMtcAYeAC4AAnXQWkM2R6A5uAO5309Y7MoFbOvT8ggeNakbkFeCcj/Zaj7+1OOgAkgFOd9LeBKHCVo+tY4N/Am4BwZCYAFwKjgRHAbUASGJFxnnLgBmf7POf6f5jjPb0G2OicZ5Cjw4yM/RcDKeBXjg6HYLtGSp39ZwFTHd0OAh507mVJc/o56cXAg1npXc7vOwo41Un/uj2/awvXV+48I7cAI53jI8A1Lenn5D0ILM5IPwwsykjfTOPn7kxsd8w1zr34MVDhPDMDW9Evl2v/KRADLnOu/SdAHPjxbt7Tq517caGj80xHJvO/ksszOgLbZXqlk/4rsB4ozvEZLHfOew0wHLgBsICXM/L+z8kbnaX/ROxe+3HAMmBJxv4Jzv3/1Ln+A4BHnXP1yJC7ARiH7fo9GVgJPJKx/0jn3Lc5z9D3gbVO2Sc6Mn2ArcB9wMGO3D3ADqC3I3O4U87tzv4pwIbMcvJST+aroD3tA4ScB396Vv4HNP4zbgJuzZJ5iqzKAjgJ+097E3YlN6WVc7uAV4D3Aa0VuQlOmUVAELvCvxZ439l/CnYFHsr4o87JKmOQ81Ac1sp5PsUen2hUiQE/B6qBU9pxX/8AvIHzh25m/1fAn9pRngbsBM7L1i8jvZimFdZ/ssq5n8YGNafftRl9yoG3svJmAxtb0s/Ja68RWAo8nlXGneRmBNq69q+B32bJ/B5Yv5v3dCPwmyyZZ2hsBHJ6RrENSRzb2CaBo9vxzJQDf896hmqAfzbzXF3ZSjmHO3oNcNITnPSUDJm+Tt7kVso5C/u/qznpx5t5hn5CYyNwM/BulowA1uE0qoDHgGVZMleSZyOwL7uDhmG7fpZl5S+t2xBCFAH9gXezZN7JLkxK+W/gLuwf70Ep5XPNnVQI4cJuPYzAfpisVnR8B/sP8C3slsWXzrGHO93Lb2MbhIgjPxaY4XRnw0KIMPCFs+8A5/y9hRB/FkKsdLqZYewW9+Csc1+G3VL5tpTytVZ0zGYedstlrdMNPlsI4XXOXQbsByxs6WAhxFAhxHyni1uD/ectbka/tvgkK70Ju3XVrt+1BbLl3gYGOOXmi9G08my2QVvXPhC75Z3JEmCIECK4G+UOyEHnNp9RACnlI8AL2D3GX0kp329Fr+b4NKMsC9gO/CcrbxtQVpfnuHtedVw4tRm6Zz979fdBSrkVMHHug1POFMddtNm5vscBL7bBAPu3bevZGwscmXWfarF7F3X3aTT2s5dJrs9IzuzLs4OE891amNRcZGxBu3I/AfuBGC6EENIxzRkyXuBJ4FBggpRyY2tlSikTjp/vZGxj8IaUcrsQYiV2q+TbwKsZh2jAHcD8Zorb6nw/jN3ymonddYxht4C9WfLvYPdufiyE+Cj7WlrR+RMhxFDsXspJ2D2DW4UQx2aKtVLEi0Altsvia+zrXtqMfm2RzFaNhjGunH/XHBFZaauZPE8Hyu2ofq1de0tlZ+vb3nJzvae5PKMIIQqwJ1GY2A2m9pLKSssW8jTnfIOw3UXzsXsfldjGchFNn73s+0BGOccAT2O7aH6B3ds4Fngkq5xc7tPr2C37bKqdb5FDObvNvtwTWIv9Y56QlX983YaUshrYjO0fzORYmnIz9sN6ArbP75eZO50W1j+wrfe3pJRf5ajnG9iV/bexH4q6vLOc87yRIfsBcJCUcm0zn7Aj8y3gz1LKf0gpVwBbsMcnslmBXYlPAR4QQuRSSQAgpQxLKZ+XUv4vcBRwIDBeSrkN22UwubnjnMHB0djuglellF9guwTKmpPvKO38XZsjW+44YLNsmE2zDbunkcnh7VLSbh1nP5vZ6Xbj6LgRGJ+161vABilltIPlVmP3DNrSOZdnFGxfuIn93J8vhDinI3q1g7HYY2wzpJRvSylXkdG6bwcnApVSyhuklO9JKVdjG5NMvqDtZ+8D7B76pmbu03ZH5nM64RnJZp/tCUgpI0KI+4HbhBAVwCrswbdR2H/iOu4Cfu20vt8HTgcmkWGBhRDjsQeaviulfE8IMR14UgixWEr5rhCiELuVMRB7wM8SQtR1DaullLFWVH0D2y1jYg+g1eU9g92yyexG3ggsFEL8HrvlUYvddZyK7fuMOdd5nhBiKfbYxC3Od3P36HPn2t4A5gkhLmnDfYUQ4hfYFewn2AOA5zq6r3ZEfg3c59zzZ7AbGidh90aqsLvt04UQ64AS4LfYvZV80+bv2gqHCSFuBp7ANnJXYzcC6lgE/I8Q4nlsF95PsF0KVe3U72khxPvYz86J2IPQ+eB24C4hxBpsH/23gSuwe1+7w13Yvb6V2O6O72EPtGbS5jMqhDjfSR/r9CyvA/4ihHhPSrlhN3VsiTXYv/21QojHsXvrN3agnFVAbyHEj7H/rycC/5MlMxdYLoS4BduvPwp7rA8anr8/YddHfxf2DMGvseuP7wAvSSmXYY/jLBdC/Ab7Xh6UUU7+yNfgwp74wbb8f8HuXlUDD2D/QTIH6DQnrxJ7NslTwHVArbO/l/MD3ZVV9l+w3S3FNAwoNfe5qA0dXY5un2bk9cAeMF7UjPw47EqoFnumxn+BuwG3s/9gbL9tDHsA7X8c+Yczyiin8cDrcOwB3ccBVxv6Xg58iO3LDwPLgTOzZM7D9tkmsGc7vIQzuwK7hfopdg9gFXA2dq/t5lb0W0zTQcwHs855A1Ce6+/ayvWVA7/BHvuowa7Yf5d5X4BCbLfCTuwGxc20c2DYybsau3Udc36jC8ltYLitaxfYrooN2A2J9WTM4NrNezrbuacRbCP/M5pOomjxGXWetRrgqix9/wW8B3hy+F83ej6cvEbPkJO3ErgtI13ngoxhuyBPde73BGf/hObuP/Z/8aKM9K3YM7ki2Ab8XOe4IRky52IP8iawG3K6I3Nkhsxg7P/cdkfuS2yjMTRD5pyMct7DbmTmdWC4bsqWIgMhxEPAoVLKI7taF0X+UL+roqsQQvwIu2FRIqXc1dX6ZLLPuoNyRQjRH9v//m9st8Z3gR/R/ICNYi9B/a6KrkQI8XPsZ68KezziDuDpPc0AAKonIIToAyzAfqnJj92tvEdK+dcuVayLEEJ8TsvTNR+TUv7km9Sno6jfde/EGR+4rqX9UsqCb1CdDiOEeBR7Bl2dO/l54CbZwYH5zqTbGwFFY4QQg2l5umONtGcAKRSdghCiF3bF2SxSym4dAbgzUEZAoVAoujH78nsCCoVCoWgDZQQUCoWiG6OMgEKhUHRjlBFQKBSKbsxe956ArusPYa/ctc0wjDFtyA4GHsJeD6AKON8wjFaDuikUCkV3Ym/sCTyM/bp3LtwJPGoYxiHYMXRu7yylFAqFYm9kr+sJGIbxpq7rQzLzdF0fBtyL3eKPAtMNw1iJHbHyZ47Yv4G/o1AoFIp69saeQHM8AFxlGMaR2Ktl/dnJ/xQ7QBnYIQQKdV0vaeZ4hUKh6JbsdT2BbHRdL8BeI+BpXdfrsusWk/858Cdd1y/CXmlpE3ZEQIVCoVCwDxgB7N7MLsMwDsveYRjGZuxFU+qMxdmGYVRnyykUCkV3Za93BxmGUQNs0HV9KoCu60LX9UOd7VJd1+uu8f+wZwopFAqFwmGvix2k6/qT2Is/lGIv7HAT9spY9wH9sIOfPWUYxi26rv8Ae0aQxHYH/dQwjERX6K1QKBR7InudEVAoFApF/tjr3UEKhUKh6Dh728Cw6rYoFApFxxDNZe5tRoDNmzd3tQr1lJaWUllZ2dVqtMierh/s+Tru6fqB0jEf7On6we7p2L9//xb3KXeQQqFQdGOUEVAoFIpuzD5vBALPPUfZ0UfTb+BAyo4+msBzz3W1SgqFQrHHsNeNCbSHwHPPUTxzJlosBoB70yaKZ84EIDZlSleqplAoFHsE+4QRkFISiUTIfuchXlLCrj//uam8z0eqtna3zxuLxUin97xQREIIQqFQV6uhUCj2AvYJIxCJRPD5fHg8nsY7xo9v8Rh/J+vUlSQSCbZs2UJhYWFXq6JQKPZw9okxASllUwPQjfH5fESjUZ5//nkSCRUlQ6FQtMw+YQQUTXG5XKRSKVasWNHVqigUij0YZQT2YVwul+oJKBSKVum2RqAzp45WVVVxwQUXMG7cOCZOnMill17Kjh07GsnMnj2bwYMHM3fu3Eb5lmUxffr0+mPPOeccysvLm5xj7ty5DBgwgJUrV+ZNb4VC0f3olkagbuqoe9MmhJT1U0fzZQiEEFxxxRW89dZbLFq0iMGDBzN79uz6/XfeeSeffvopy5YtY+nSpdx7772Njp86dSpLlixh0aJFTJ48mZnOtNY6VqxYwUcffcSAAQPyoq9Coei+7BOzgzIpuvFGPF980aqM98MPEclkozwtFqPHtdcSfOKJFo9LjR5NzS23tKlDz549Of744+vTRxxxBI8++igA9957L+vWrWP+/Pl4vV6eeOIJrrrqKv76178yffp0NE1j0qRJ9cceeeSRPPjgg/XpRCLBddddx7333svUqVPb1EWhUChaY58zAjmRZQDazN8NLMvi0Ucfra/Yf/rTnzba7/f7+etf/9ri8fPmzeOUU06pT995552cffbZDBo0KO+6KhSK7sc+ZwRyaamXHX007k2bmuSbAwaw45ln8qrPDTfcQCgU4uKLL273sffddx9r1qzh6aefBuCDDz7gk08+4brrrsurjgqFovvSLccEamfNwgoEGuVZgQC1s2bl9Ty33HILGzZs4L777kPT2ner582bx/PPP8/8+fMJOLq+++67rFu3jmOPPZZjjjmGLVu2cN5557FkyZK86q1QKLoP+1xPIBfq4gYVzpmDa/NmzP79qZ01K6/xhObMmcN//vMf5s+fj8/na9exjz32GI899hiGYdCzZ8/6/CuvvJIrr7yyPn3MMcfwyCOPMGrUqLzprVAouhfd0giAbQg6K4jcqlWruOeee9h///353ve+B8CgQYP429/+1uax4XCYWbNmMXDgQM455xzAfgP4xRdf7BRdFQpF96bbGoHOZOTIkWxqZswhFwoKCti4cWNOsu+9916HzqFQKBR1dMsxAYVCoVDYKCOgUCgU3ZhOcQfpuv4QcAawzTCMMa3IjQXeBaYZhpHfuZkKhUKhaJPO6gk8DJzamoCu6y7gDuDVTtJBoVAoFG3QKUbAMIw3gao2xK4CngW2dYYOCoVCoWibLhkT0HV9AHAWcH9XnF+hUCgUNl01RfRu4JeGYZi6rrcqqOv6ZcBlAIZhUFpa2kQm5iwkr2hA0zQ0TaOgoKDZe7an4Ha7lX67idJx99nT9YPO07GrjMBRwFOOASgFTtN1PW0Yxt+zBQ3DeAB4wEnKysrKJoW1d7H3F0onsdP7eZP8nsmDOLNyYbvKao6qqiquvvpqysvL8fl8DBkyhDvuuIOSkpJ6mdmzZ/OXv/yFq6++mmuuuaY+37IsLr/8clauXInP56O0tJQ5c+YwZMgQwH5L2Ofz1b+FfP311zNhwoQmOliWhWVZhMNhmrtnewqlpaVKv91E6bj77On6we7p2L9//xb3dYkRMAxjaN22rusPAy82ZwA6i7LkkVR71mCJhqihmvRSljwqL+XXrSdQF0761ltvZfbs2dx1111A4/UErrrqKnw+X6PoolOnTmXixIlomsa8efOYOXMmhmHU73/ggQdUqAiFQpEXOmuK6JPABKBU1/WNwE2AB8AwjE4dB3iv6EaqPK2vJ2CSxCLVKM8izQ7PZ/yr5ActHtcrNZpjarp2PQGFQqHIJ51iBAzDOLcdshd1hg6t4cJLwCwj5toGQoIUBMzeuPDm/Vz5Xk8AqA8iN3bsWGbNmkVxcXGetVYoFN2FfS52UC4tdYCoVsGzfY7DJIELH9+tfIWgVZZ3ffK5ngDAc889x4ABA0gkEtx0003ccMMN3HPPPflUWaFQdCO6bdiIoNWH4ZFpIAXDI9M6xQDkez0BoH5dYZ/Px4UXXsjy5cvzqrNCoehe7HM9gfZwaHgGuzyrOTQ8I+9ld8Z6AtFolHQ6TVFREVJKXnjhBQ466KB8q65QKLoR3doIBK0+fGfHs3kvt7PWE9i+fTvTp0/HsixM0+SAAw5g9uzZeddfoVB0H7q1EegsOms9gcGDB7Nw4e6/x6BQKBR1dNsxAYVCoVAoI6BQKBTdGmUEFAqFohujjIBCoVB0Y5QRUCgUim6MMgIKhULRjenWRqBCE5xdEmSbJvJablVVFRdccAHjxo1j4sSJXHrppezYsaORzOzZsxk8eDBz585tlG9ZFtOnT68/9pxzzqG8vLx+fzweZ9asWZxwwgmcfPLJzJw5M6+6KxSK7kW3NgJ3F3h5z+vi7oL8Bo6rCyX91ltvsWjRIgYPHtzopa7MUNJLly7l3nvvbXT81KlTWbJkCYsWLWLy5MmNKvrf/OY3+Hw+li5dyuuvv84vfvGLvOquUCi6F/vcy2I3Fvn4wuNqUy4JfOR1IYVgfsjLZ562Y4iOTpncUpNos+zOCiUdiUR45pln+OCDDxDC7r307t27TX0UCoWiJfY5I5ArG10NnSDppPc3rbyfJ5+hpMvLy+nZsydz585l2bJlhEIhZs6cydFHH513vRUKRfdgnzMCubTUKzTBcX0KkE5rWgpBtQv+XBmjzJJ51SefoaRN0+TLL79kzJgx/OpXv+Kjjz7ioosu4u2336awsDCveisUiu5BtxwTuLvAS3ZVbzn5+STfoaQHDhyI2+3m+9//PmC7mXr16sX69evzqrdCoeg+dEsj8KHXTVI0nhGUFIIPvPnrGNWFkn7ooYc6HEr6ySefbBRKulevXhx//PG8+eabAKxbt47Kysr6RegVCoWivexz7qBcWFgZ6dTyOyuUNNjG5dprr+WWW27B7Xbzxz/+US0vqVAoOkxnLTT/EHAGsM0wjDHN7D8P+KWTDANXGIbxaWfo0hV0VihpsMNJP/PMMx1VTaFQKBrRWe6gh4FTW9m/ARhvGMYhwK3AA52kh0KhUChaoVN6AoZhvKnr+pBW9i/LSL4LDOwMPRQKhULROnvCmMCPgX+1tFPX9cuAywAMw6C0tLSJTCwW6zTl9lY0TUPTNAoKCpq9Z3sKbrdb6bebKB13nz1dP+g8HbvUCOi6fhK2ETixJRnDMB6gwV0kKysrm8ik0+lO0W9vxrIsLMsiHA7T3D3bUygtLVX67SZKx91nT9cPdk/H/v37t7ivy4yAruuHAA8C3zEMY0db8gqFQqHIP13ynoCu64OA54ALDMNY3RU6KBQKhaLzpog+CUwASnVd3wjcBHgADMO4H7gRKAH+rOs6QNowjKM6QxeFQqFQtExnzQ46t439lwKXdsa5O0K4QuPtuwvY/KGXixfuvl+wqqqKq6++mvLycnw+H0OGDOGOO+6gpKSkXmb27Nn85S9/4eqrr+aaa66pz7csi8svv5yVK1fi8/koLS1lzpw5DBkyhK+//ppLLrmkXrampoZwOMznn3++2zorFIruyZ4wO6jLqKv8P1sQQkowk/lZXKZuPYG6cNK33nors2fP5q677gIarydw1VVX4fP5GkUXnTp1KhMnTkTTNObNm8fMmTMxDIP99tuP1157rV7uxhtvxDTNvOisUCi6J/ucEVh0YxHbvvC0KmMmoXqji8g2Z90B2VD5P/GDkhaOgrLRKSbeUtOmDp21nkAmyWSS559/nieeeKJNfRQKhaIl9jkjkAs7VntI1Aogv8tKNkc+1xPIZOHChfTt25eDDz44vworFIpuxT5nBHJpqYe3aSy7u4AVC0JIq7Eb6IfP5He2aj7XE8hkwYIF9QHmFAqFoqN0y1DSBWUWk2bXcPk7FRxybgS3X+Ly5ncxGcj/egJ1bN26lXfeeYezzjorn+oqFIpuSLc0AnVkG4Oyg5J5K7sz1hOowzAMTj75ZHr16pUvdRUKRTdln3MHdYQ6Y5AvOnM9AbCNwK233po3fRUKRfdFGYFOoDPXEwBYunRph8pWKBSKbLq1O0ihUCi6O8oIKBQKRTdGGQGFQqHoxigjoFAoFN0YZQQUCoWiG6OMgEKhUOwlhCs0Xv2/IuZNyt8yk93SCJROOpei/7sdrWJ7p5RfVVXFBRdcwLhx45g4cSKXXnopO3Y0Dkcxe/ZsBg8ezNy5cxvlW5bF9OnT648955xzKC8vr9//2muvMWnSJE455RQmTpzIyy+/3CnXoFAo9hzCFRrPXeXiL8f1YcVTIbZ97s1b2d3SCHg/X03oqRfoc9yZnWIM6kJJv/XWWyxatIjBgwcze/bs+v2ZoaSXLl3Kvffe2+j4qVOnsmTJEhYtWsTkyZOZOXMmAFJKrr76av74xz/y2muv8cc//pEZM2ZgWVZe9VfYVGiCk90ptmmdH2hQ0f2QEtJxiO0U1GzWqFrnouIzNxuXeyh/08uahT5CR51HxRG/5/Gx8N5DGumEyFvI+zr2uZfFim68E88Xba9YKZIpAELznyU0/1nMshLMgf3B23IY6tToEdTc8vM2y+7MUNJCCGprawF7UZmysrJ2xyVS5MbdBV6WCcndBV5m1yS6Wh3FN4SUYKUgFROkYoJ0TJCOi0bpZrfbkKnbn7kvM4x9c4xnJWNYz0E8w/tM41V+Rg198nq9+5wRaC9C2oHjXBWVaNE4qTEj81p+PkNJCyG4//77ufjiiwkGg0QiER555JG86quAF0onUe7dweN8hgXMLxBsKphLSTrEYeEr0bC70Jq0g5HXp5HN5GXLSTRAyCyZRnmymbxsuQaZKJJdmmhGTrZyDifNNxFQPX9YaUjFW6mIW8rLPiarQrYSbhKRPvX7pdn+u+LyStwBiScg8fidbb9J0BumZ48IQXcYvztMQIvg18L4RAS/jOCVEbxWGK8VxZMO40lHcaeiuJMR+Bg82DHNjuMxjmZBvTHIF521xvBDwBnANsMwxjSzXwB/AE4DosBFhmF8lI9z59JS7z/gyPpt6fWAphGZ9j3CMy7FKsvfgAvkN5R0Op3mT3/6E/PmzWPs2LEsX76cK664gsWLFxMKhfKqd3dkuyZY4nPztLiHd+UYTGEH/rPwsoibwQ0LenStjs2Tgr6FHT4621g0b3hkE+OhZcpZ4I5JPBGBOwqeqMAdA1dM4IkIfAkLEe6BKwquKLijAldU4IqBVrcdBS0m0KJ2nhar+xYIZ1t0wBUiXRJCEvxAUEJAIgL2tugtIWDhKTSxvCncvgTFLruS9osIfhHGTwS/FcYvI/jSEfxmFF8qgi8ZwZeM4otH8MajeCMRPJEo3nAUTySKZ3sETySWs56m30c6FMR0PumCYKP9LkxcmBzPo/RlFXBv8wW1k87qCTwM/Al4tIX93wEOcD7HAPc5398YnV35Q0Mo6YcffrjDoaQXLFhQH0r6888/p6KigrFjxwIwduxYgsEga9as4bDDDsu7/vs6SeBDr4vFPjf/9ml87rgCC60DMWnsFnTLGL9In0dPmURIPxp+hPSh4QfpR5M+BH6QXgQ+hPRlfHsR0vlgp5FewIOQXsDr5HmcbzcCDxINS4BFw0eCkyfq80KFBdSEw83ICUwJVgKsmHA+YEXtbRkVWHH7W8YEVlRADKSzjzj2d8z5OPuFkyeiIGICERWIeAcqZyGxgmAFJWYQrIDEDErSQTB7SMz+YAYszCCkgxIzIEmH7O10AEy/hUcL4xZR/ITxyCg+K4zPjOA3I/iTEfypKKFohGA4SkFthFAkYn+HoxTWRiioilJQtx2O4k2lctLd1DTCBUFqC0OEC0JsLwzZ6dL+jfJr6/Kd76Z5djoSCpD2NHVFS3FA/Xbc60FqLtYOPpvX5Qz0dt/x5umshebf1HV9SCsiZwKPGoYhgXd1Xe+h63o/wzC2dIY+2SQPGkHyqEM7rfKHhlDS8+fP73AoacMwGoWS7tevH1u2bGHt2rUMHz6cNQtTjrUAACAASURBVGvWsH37dgYPHpxv9fdJJJLVnh0sDER50xfkI3d/4sKPJlMM4j0msYjhLGK5dhEfyQswnb9HcIvgmFsKqXzHYNS7P8AUEUx2YmoJTJHAFHH7mwSWSGKKBJZovTKREkh5kNFgkw/RIDIWgEgRIlKMiBZCtBCiBRAphGgIoiFHPsCueAFmxIsVDSCjPsyYDyvqw4p7MKMekLk1QDKl3H4Ltx/btRFocHO4AxJPX6tRXpNtf1O3SO9+xURrtuIlgteM4LUiuJNRtGgUEY4iIvZHizjpaBQRiaGFI4htMUQ4ghaJISIRRMRJx+I5//aW34cMBZEFQWQwiFUQRIZCyNLeWAVBvCW9iLg0akP2PjMUxCwIOi3zkL3tpFOhIKbfh9REhuEVuIBiAYVAH7KNdoPhllmG3UqDrElikWxk3OuGC5JeD2mXxryLf8AdN/wP3y4szusYVVeNCQwAvs5Ib3TyvhEjULnwyU4tv7NCSZeVlXH77bdz+eWXI4T9hMydO7fZNQdcW7bg+s1vKPL5CFxxBbEpU/J4hXs2aWLUuNdT7V7HNvdXvOv1sdwziBXa4VSK4QD0pJzD5FMcmvovY1NV9DX7U5weRnH6Dt4UIzFLfQQ3C074pZcDnnYjLHCl/Ax9ZkHLfudG/mZIxSXJGKRiZByjOR8X0mz/gL7wptCCcUQwjhaIIQJxtFAMArugxyZEMIIrGEYL1iJCEUQwWv8hY7vZTyBmywViaFLiDQv8tX4CNT78tV58tW58tR78tW68YTe+sAtvrYY3DN5dAs9G8IYl7rDEE7Zwh03c4TTucBpXJI2Wzm0Wm9Q0ZEEQKxR0Ku4QMhggPaAvMhSwK+9QAMvJlwVOuk7eqeytYF2lHwB361VdaWkp0crKxvca8DifpkoCpszKyD/Rg0by6LeO5OYbr6Sib28AFkjJjHCSMis/5+wqI9Bc37HZK9J1/TLgMrDj6JeWNm25x2K5+92+CTozlPSUKVOYkkuFnk6DlGhVVfT45S8pLCzEOvfcDulkT5ewwDTtctPphu3mvk0T0ZaM8+2Skt7JJCIH2cZ5KWoLqqnqtZ2q0kqqSnewo2wnO8p2sbbXfqzWTmENJ1POVEx8eK0Yh277gAtXv8Cp737MkR9up6jCjUjVlbsWzH8jU2me+mI/XrJuYROHkz2L+pkLSpq9RUKTeEPgCYI3AJ6gxBsEXxAKi+x8T1Da++r3p51t8Gbur0+DNyCdY+3jXG4An/MpBsDtcpEOR6A2DOEoImJvy3AtZngnMlyNrK1Bbq1F1tZCJAy1YUTYB2EfIuxHCwfRwnG0cAKtNokrlm7mKi0g4XwaSAUhVSBIFkpSBZAssAj3NkkMNUkWWiQLJMlCmfXdND9VYOel/dTXEEJquPDjdj4uvHZa1qX9uPHhwodbNqTtfb4MWV99OQ2yvvpyK12bodSTcWyDrIarY/+bPDBpxSUsFadh4q/PS4kEM/q8xsJUfpaX7SojsBHYLyM9ENjcnKBhGA8ADzhJWZllrcEeMFW0jIhGcU2fjnXnnXZlW1dRZ1fYGXmk0wjLsr9Ns1P1a609nAxC1QjYMRKqRsKOUc72CEgV2DIxelAem0B5dBL/DZzMDq0/ACPLV3Plv59k8pJ3OfHjFfjTaaTbDS4XuFyk3RZoGqbHx9fJI1mxYwIrto1junUex2PwKgObTMf7GcfiIYrX+XiI4urhQ/QtwerfD7NvX6y+fTEzPla/flg9e4LTeyOdtl0ade6PSBQRjiCiMbTtjrsj4rg/wk7acZvISIbrJMNt4s3xN5IuV1YrO4gMlmL1cwYjnXzLcZs0aVWHso8N2PczA4E9ButDYpHCFAl6lBSwvWqz4z7L+JCVTiQwE8kGF1uGnO1qizfKS4o4MVHdyCVn769zy7XDbdLC+1dCunFJn/3B17CdQ54mvbgcg9NYxjZmmXlaM3lflo3FdPkb6WPi50trLM3VhS3Rv3//Fvd1lRH4B3ClrutPYQ8IV39T4wHdllQKq18/pFMB4nLZFaKmNaoY6/JwuxvL1m1n5LdLNjPPOQaXix6lpVTV7CIS2snO0NdUhzZRHfya6sBXVPvLiXgrGq5BCgrMARSmR2BZE1gdO56P3SP4zN0DKyAo9knGJdJM2BVjfDxNf29fmDwFJk+hFqjNuB2WCRvf97L6ZT+rXg4Q3upC80iGfivBwNc/px8rOYYFvMcPeJ0rSRHCS4TQs9fj3rgR16YtaFsrENu2Q2UVYtcuPCs24F32GSKeQNRPvrQ/Eg00F0hRPy05F6xgoMHFEQpihQJYJT2R+/W33SGhAIHepUQEzbhH7O/6dDAIPm+DMepkBMJuuUsvQUoJWd/8ZFSJhUmdQUg6hiLTwNh5oSIvO2u31+dbIukYlCxZssaBRAKTJEmttlG+5Zwzl/Gh1pjeTJ5L+jl7+zsdvylZdNYU0SeBCUCprusbgZtwXGuGYdwPvIw9PXQt9hTR9s+fVLROVkVjDhhA1cMPd40uDikRptq9nhr3Oqrda6l2ryPi+5IqVmNqDYN8HquQ4vQw+qRPoDg+nOL0MBLWKD50DeN1f5A3fS52aRpCSg5LWVwdTjI+nubwlNnqA22m4Ot3vKx6KcDqV/xEK124/ZKhE+KMPD3O8IlxfEUSBjRMxzuBxzmRxxsKObvl8qXbhSzogVkSBK8H6XGDJgDpTHBPIhJxRDSCCNeipZP2Pnv4EJCYxUVYZWWY/fpgDhiA2b9fk56FzOxVAN7SUsLtaBV2JwSa7fKR/lbd9qWylMp459xDCzPDqGT3guL1higzz8rIK/e9xA7vChAWmvQyPDKNoFWWN/06a3ZQq85nZ1bQT1uTaQ9SSqSU9YOl3R0pJel4Q6VqBQLUzpr1jZzbwiTi2kS1e51T2dsVfo17PVHX1no5ITUKzEGUMore0WOcQdnhFKWHEbB6k0Sw3Ovin343iwvd/NdjuxzKTItJ8TQT4mnGJUx6tdGqNpNQ/paPVS/7WfNKgPguDU/QYtjJCUacFmPYyQm8IaeMeILAglcbHV/3RElgh3d/tD9f4bTInVZ2hnsEXzviuUiJqK7GtXUrrq1b0bZutQfzM9KeLz7H1UzlLv1+zD596o2Ca+hQQsXF9a4ns29fzD59wJu/+DKKjqPhQpMB3AQ6NH48LPoDnu1zHCYJBBqHhmfkVb994o1hv99PTU0NxcXFXa3KHsGuykq2vvYaUghkz55UX3993mcHJUVNVkW/zpmRs76RH9ZrFVOcHka/xDinoh9GUXoYRekhuPBRWlpKZY1d0W1wCf7ld7PY7+Ztr5uYJvBIydFJk+tr4kyIpzkwbbX5hmsqBuVv+ln1op+1i/wkajS8hRYHnBJnxOlxho6P4wk0yLs2bSH46DMEH38e187qRmVJrxc0QWTa90jnc0qxEMgePUj36EF61KiW5ZJJXNu2oWUYiHqjsXUr3k8/RXv1VYrjTadLmiUljXsR/ZrpVfTo8Y25hxQdI2j1YXhkGqtCj+W9FwD7iBHwer1s376d7du348oapDKBLS4Nid2q62dabY71p0SEhFZFwCrDJRvP8Xdt3oz0+bBKStA0Lf/B26RExOOIWAwRj9uDtEIgfT5kIID0+1ud7ialpKqqiooTTkAbN46CQw8lNmRIh1SxMAm7vm7kvqlz58Rc2+rlhHRRaA6iKD2M/onxjSp7v1Xi+MebEhbwjrD4Z7GfxT43X7rtIeIhaYtpsRTj42lOSKYJ5dB6SkYF61/3serlAOsW+UhFNfw9LEZ8J87I02MMPjGBO/OnlBLvsg8IPWzgf2UxAPFJ3yJy8TRKp11R/zKhdaHO9p+c32nvk7SJ14s5cCDmwIG05FkuLSlhx9q1TY1EhuHwfPIJrqxIttC0V2FlDWirXsWewaHhGUSC6/PeC4B9xAgADBgwgM8++4z169djZsyUWORz85nXhYk9C+WQpMnJidZnE1kixeehv1CU3p/B8dMa7Qv8859oFRVELr2UYDBINBrdbd1FLIZr3To869fjKi9HpFJInw9z6FBSw4ZhDhliV/45sCo4n5hrG6KnpGhkNTuP3AlAz+RBnFm5sNljEmJXM636ddS4y7FEsl7OZ/ak2BzGgMRJ9ZV8cXo4helBuFqaWpGBBL5wayz2u1nsc7Pc6yIl0gRdHk5IprksnGB8Is1QM7c+c6JWsG6Rn1Uv+Vn/bx/puEawxGT0WTFGnhFn0HEJXFmTvEU0RuDZlwk9vADPynVYPYoJX3EB0R9NxRzYD2j8MmGv0aOw9nR/u9PjS/fsSfrAA1uWSyRwbdtmG4ks15Nr61a8n3yCa+tWRKLpjBrVq+haglYfzkm/TqWV/2ex3UZA13Xh+PT3OMaMGcOYMQ2hiio0wbV9CkhkPJhfS8mNFeE2X7QYWPQZq0KPcmrF7QSshlZgqLKS4ptvZuuxx9Lr4IPbNU0rE9eGDfhffRX/woV4ly9HWBZmv37EJ00iPnkyieOO61Dr652i91kT+rhR5a1JL2XJI6hxrXda9OsbVfpxV8M1COmmKD2EovQwBsYn2pW9abfs/VavdutTpQne9NmhGZb43Gxz2a390SmT6eEkZ/pDHLC9ilzfqY7tFKxdaM/oKX/Th5kUFPQxOeTcKCNPizPwmCRaM109V/nXhB5+muCCF9BqwiTHjGTn3JuIfW8SBBob2M5+mbDL8Pkw99sPc7/9WpaRErFzZ0OvoqIi516F5fc37klkbItRo3AFAphlZapXsYfRkZ7AXMhjCLtO5O4Cb5NxmBQwt8DLnDZeux4Z/RH/Lfgba4JPckj4qvr85FFHAeD94AM4+ODclbEsPB9/jH/hQvwLF+JZbYe7To0eTfh//5f45MmkDj54t1tSh4ZnsDa0oPGpSbE69CSrCubX5/nNEorSw9gvfgrF6eEUp/enKD2MQnMQWvPvSOZEGvjYicez2OfmU4+GFIIelsX4uMn4hN3a7+sY4VJfIW2Z0egOjdWv2C3+r972YaUFRQPSHH5hhJGnxxhwZArR3MsGloVvybuE5i3A98bb4NKIn3YykUumkTzqUNVqbQ4hkL16ke7Vi/To0S3LtdWr+PjjJr2KujcuzNLSpu9TZPUsZHGx+n2+IdptBAzD2CsMAMCHXjfJrAdJCsEivwfaMAI90sPplziBVcHHGBP+n/q3BlNjxmD5/XiXL4e2IoPG4/iWLrUr/tdew7VtG9LlInnssVSffz7xSZNab5W1k4TYRXngn7gsP6bLuT4JBeZ+DI19j6L0/k6FPwyfzF84zE2aYInj4nnL56ZGE2hSckTK5NraBBMSaQ5JtT0Wk0ntVo01r/hZ9VKAr9/1Ii1BjyFpxl4eZuRpcfoemmqxjhA1tQSffpHQPAP3hq8we5cQnnEpkfPPxnJevVfsJu3sVfSMRomsXl1vJFxbtuDavBnPRx/hqqpqcmh9r6Jfyy/gmWVl0EzQNUX7aNMI6Lr+N8MwfuxsC+CvhmFc2uma5YGFlZFGaQn8sFeQj7wutmiCfm24hEZGLmRxr8vY5HuD/RJ2TH88HlKHHYb3ww+bPUZUVeF//XX8CxfiW7wYLRrFCoVInHQS8cmTiX/727bvNE9IJNu8H7A6OJ/ywEuYIk6v5Bh2aREskcaFn9MqX8jrjII48J6vobW/2pm+2de0OD2WYnwizbhEmh7tdBrWbHKx6mW7xb/pAy9IQckBKY69Ksyo02P0Hp1utXHoXrOB0LwFBJ55CS0SJXnEwey89jZip09sdbEgRSeR0auQpaVEnV50ExIJXBUVTXoVdT0L74cf2r2KZLLRYVIIrFx6FUVFqlfRCrn0BPav2zAMQ+q6PqwT9elUBHB7dYyTywq4sdjPX3e2HnNoUHwSQbMvK0OPNhgBwCosxP/uu+D3U9a/P5FLLwUpbf/+++/b/v2+fYmdfbbt3z/+eGhnJNG2SIhdrAs+y+rg4+zyrMJjFTA8qjMich4l6TG8U/R/eZtSJoF1bq2+0n/H6yKuCbxScmzCZFo0zkmJNCNymL6ZTeU6eO/xAla97GfLx7avuGx0ihOvrWXk6XFKR7QREsQ08S96i9BDC/AtfR/p9RD73iQil5xD6tBW3BmKPQefD3PQIMxBg1qWkRJt584Wp8q6Nm7E88EHuHbubHKoFQg0GIUWehZ04+nluRiBSl3XLwWWAccBTUeE9iKGmJIZtQnmFPlZGE0xqZWZQhoeDoj+kE8Lfk+Nq5wicwiB557Dv2SJXdlJiXvTJop//WsAUgceSPiqq2z//iGH5L31IZFs93zIqtBjlAf+iSnilCYP4/hdv2No7Ew8smFhmd2dUlYrYKlT6S/2udnoTN/cP21yXjTJ+ITJcck0wQ5MEdix1s2ql2xXz7bPPYCXvocmGX9dDSNPi9FzaNtxcMTOaoJPvUDokadxf70Zs18fan75U6LnnYVV0jSqqmIvRwisXr2wevUifdBBLcvF4w29imZewPMuX46roqLZXkWf3r1bnyrbty+ysHCf61XkYgQuxI7i+VNgFfCjTtXoG+DycJLnAx6uL/ZzwvZwq/PQR0R+yH8K/sDq0GMcVXMDhXPmNHmAAMw+fdi+aFGn6JsQ1awPPMfq0OPs9PwXtxViWPQHjIyeT0mq+cHp9k4ps4DPPA2t/Q+9LtJCUGBJTkyk+WnY9u0PynH6ZiZSQuVKNytfCrD6ZT+Vq2zXTP8jk5xxR5qB43dQvF9uAdDcn68mNO8pgs+/gognSBx3JDW/mkF88vg2wwUrugF+P+bgwZitrbHhRNfN7FUU1tQQX78e19atuL/+Gtf776Pt2tXkUCsYbNn15LxvYfXps1c9i7lomgC2Yr93dR9wOLC8M5XqbLzAHbvifL93iN8V+ri5lUHikNWPQfHJrAk8xWE1P8e1udlgp2jbtjWb31HsVv9HrA49xgb/PzC1OCXJQzh+12+dVn/Bbp+j0llOcbHPxRKfmx3O9M2DkyY/CSc5KZHmyGT2Gls56i+hYoWnvsW/c4MbhGS/Y5NMvLWaEd+JUdjPst8YrmzDAKRS+P/1b0IPG/je+xjL7yN69mlELppGevQBrR+rUGQjBFZJCVZJCWlnSnmwtJTq7OnesVh9r6K5noX3/fftXkXWamRSCKw89ioCzz1H4Zw5uDZvpqx/f2pnzcprBIBcjMBjwBLgXMMw7tF1/XZgYt406CLGpkzOjyT5W8jLlFiKQ1Itv/k7KnIhXwZe5svAi/Tv3x93M2sFmK2Eam0PSVHD+sBzrAo95rT6gwyLnc2I6PmUpg7JuZwKTTDNneIeTdS/E5GiYTnFxT43K7z2gG4v02JCIs34hMn4RJreHVysQlqw+WOPHaDtZT/VX7sRLsmg45McfXmYA06NE+qd+xvW2vYdBB9/ntD8Z3Ft3UZ60ACqfzWD6DlnInsUdUhHhSJnAgHMIUMwW3vj3rLsXkUzrifX1q24v/qqY70K5+NbtoziWbPQnDVT3Js2UTxzJkDeDEEuRqC3YRj367qeryUt9xiuq4mz0O9mZnGAFysjLd6MvskTKEoNY2XoEcbMmkXxzJn1PwrsfoA2iaTS8wmrg4+xIfACaS1Gr+QYjts1h/1jZ3Wo1X93gZdlQnJbkY+xSZPFPjdLfW7CmsAlJUclTWbW2AO6Y1JWqzH9W8MyYdNyrzOrpyEk85BxCY6fUcsBk+IEerXPqHg+/ozQQwsIvPgaIpkiPv5Yds35PxLfPqFJ7HqFokvRNKzSUqzS0vpeRXOIWKxhEDu7V1FR0XKvgqYrcGmxGIVz5nyjRmCbruvTgICu62fxDS0B+U1QLOHX1XGu6BVkXsjL9EhTXz/YcdFHRS/k/eIb2ThtNvDb+u6ZuRvds6Sorff1V3k+x20FGRo7y/H1H9JizJ22+NIleCLkxRLwbNDLs0EYkLY4M5bipESaExJpinbjnW8rDV85IZnXvOInst2FyyfZf0KckdfFGTYxjr+4nSdIJAn88zVC8xbg/eRzrIIQkfOnELlQxxw+pOPKKhR7ADIQwBw6FHPo0JaFmulVFLfQuGzJLd0RcjEClwCXAh9hrwDW3DoHey3fjad5Op7it4U+ToulGNCCK2RY9Ad8VHg7K4OPUjLld8SmTHH82e0LGyGR7PB8yqrg42wI/J20FqVnajTH7prN/rEpeGVhh68lBjwW8jKn0Efa8TW6pOSMWIp7d8U7aFJszCSUL/Wx+mU/a17xE9vpwhOw2P/kBCNPi7H/yQl8BR2wLJu2UviHB+wInpVVpIYNZtdtM4n94HRk4e6PeygUew3N9CoK7rmnU93PkJsRGGUYxp90XS8DLgKGACvzpkEXI4DZ1XEm9C7ghmI/D+2MNVtZ+mQx+8emsC7wLEfV3IBPtm9ecUqEWR94nlXBx6jyfobbCjA0dqbj6z+sw61+sF/eeiLk4U8FPipcGlpGjH1TCF4NeNhek2j3wtTpOGx408eqFwOsfc0JyVxgMXxSnJGnxRk6IYEn0IGKX0q8739C6KGn8Pzr33gsi8TEcey6ZBqJccfsc1PwFIqOUtsJ7udscjECdwEnA7dgDxDPw35fYJ9hP1Py89oEtxX7eSWW4jvx5t8dGBn5EatDj7Mu+DSjI7m9NF3p+Q+rg4+xPvC80+o/kGN3/cZp9e/e4GYCeDLo4Z5CH1tdGscl0hyaNFnsd5Pp2LKwxwhmtxEqA5yQzG/YLf51i/wkI3ZI5gNOjTPq9BiDx2WFZG4HIhYj8PwrhOYZeL5YjdWjCOt/f0ylfgbmoAEdK1Sh2IepczPnw/3cErkYgaCu6z7AZxjGk7qu/yRvZ9+DuDSS5LmghxuK/ZyYCFPYTAO3JD2G3skjWBl8lAMjP26xLLvV/3dWBx9nh/c/uCw/Q+NnMiJyHr1TR+xWqx/syv8pp/Lf4tI4JpHmjztjnJA0mVQaahIvKSkEH3jdzpHNlFcrWPe6E5L5jYaQzAd+P8bI0+MMOr5pSOb24PpqE6FHnyH45N/RdtWQOvAAdv3uBmJnnUrJfvth7umhmhWKLiQ2ZUqH3c+5kOsU0ReAm3Rd9wMb8q7FHoAH+O2uGN8tDfHbQh+3ttBqHhW5kLd6Xs1W79v05vuN9u1wf8aq0Hyn1R+hR2oUx+y6jf1jU9rtPmqOJGAEPfyhwMdmt8ZRyTRzd8YYlzTrzUpmvKTWHpr4LsGahX5Wvxxgw5KMkMznxBhxWoz9jkmi7c77LlLifet9QvMW4H/tTdA04qdOIHLJOSSPOVy5fBSKPYQ2/+aGYdwL3JuRdVEuBeu6firwB8AFPGgYxpys/YOAR4AejswswzBezk3tzuHwlMVFkRTznHcHDm/m3YHPQvcD8GrpNF4FcMZnXJYfU4vjkn6GxL7LyMj59E4dudutfrDn9z/tVP4b3RpHJNPcuSPGtxJmm6WHKzTevruAzR96mfZkFatf8bP6ZT9fLrVDMhf2T3P4jyKMPCPOgCOTzYdkbgciHCHwzEuE5i3As7Ycs6Qn4asuIXL+FKwBfXevcIVCkXc65d1mXddd2IbjFGAjsFzX9X8YhvFFhtgNgGEYxn26ro8GXsYedO5Sflkb518BNzN7BPjX9qbvDpQlx7LTswpEYwPhkj6Oqr6e/aNT8hamOQ08G/Bwd6GPr9wahyVNbt8R5aREOqfK/81fu3j/kT5YaZCm4E+H9bFDMg9OM/ayMCNOi9PvsJZDMrcH17ovCT28gKDxIlo4QvLQ0ey8+9fEvnsK+PMbPE+hUOSPFo2Arus9DcNoGpIvN44G1hqGsd4p6yngTCDTCEigbmS0GMjfxNfdoFDCrdVxpvcK8mDIy0+y3h04NDyDNaGnsJyhV3NLX6K33ETwrR9x4MKmbwV2hDTwXMDDHwp9lLs1Dkma3LI9yridJqmwoCrsIhnRSIYFyXDdtyAZ0ait0PhyqZcdazxIC5ANNfyxV4YZeXqMsoNaD8mcM5aF7423bZfP4neQHjex755C5OJppI5ox4I7CoWiy2itJ3C7rus9gTXAQmCZYRhtxPWtZwDwdUZ6I3BMlszNwEJd168CQrQQikLX9cuwA9hhGAalpZ2/4PcFSP5upbmryM/5gUKGNGp3l3KwdRGfVPyL2ttmEZt3McJyE016KC1tfDvTSUjUOp+wIF5Tt20PxmZux2sgHoYNYVgXlpi1gom1gh61Amrg4wh8JHOpuetGtJvKTvmdD3JeyLEVdlWjPWzguv9RxIavkP37kL7pGqwf/xB3n960Z/TD7XZ/I79pR9nT9QOlYz7Y0/WDztNRSNn6PG9d1w/Aduscj12zvAc8ZxjGxlaOmQpMrlt8Rtf1C4CjDcO4KkPmGkAYhnGXruvHAX8DxhiG0VpwGbk5j2/KtcYml2BC7wKOTaZ5tMp+d0BasOkDD2/+3sfXywJgCbAaKv7eB6YatcrNZG7NbZdPQoGktggihRJ3oWRQwKR/wMJXYOENSbwFEm+hha9uO2TZ33XbhfZ2olbwzh8KWLEghLRopMMvN+3evXOvXEtonkHg2ZfQYnESRx9G5OJpxL9zUodXeOqsGQ/5Yk/XD5SO+WBP1w92T8f+9stlzVZIuQwMr8HuDfzZ8fUfA/TDbt23xEYgc925gTR19/wYONU5xzvOzKNSIL/hODtI/7Tkmv+kWLAqyN/e9hL60E3FZx4SNS2PnPYYlG6osAsyKunMijxk4SuQeAsl7pDFa6Uu7u7lY43HxaiUyTW1Cb4TT3c4lo83KJk0u4bjZ4T56P7eLH9Ea2IM2kU6jX/hm4QeegrfOx8i/T6iZ51K5CKd9JhRHdRSoVDsKbRrYNgwDBN7cZm2WA4coOv6UGATcA7wwyyZr7BfQntY1/UDAT+wvT36tJfMmTIXL2ywqFJC9Vcutv7H43y8bF3hIVGt8R1gm1fS98AUB54Zo+eQNJs/9rB2kR/LSiOT3vpypjyU2xCKBbzkd/P7wiCrPC5Gs10SmQAAFPVJREFUpEzur4py+m5U/tkUlFlM+aPJET/ZzrK7C+zlGtuBVrWT4OPPE3z0GdybK0gP7EfN9f9L5Jwzkb3ytzymQqHoWjpldpBhGGld168EXsWe/vmQYRif67p+C/CBYRj/AK4F/qrr+s+wHdkXGYaxG2HNWqau8v9sQQgp7Vbxyhf9VKzwsOVTLxUrPMR32dWv5pGUHZhi1Bkx+h6aovaINOef6OOH6RSXVMcbytymOS1tmXNL2wJe8buZW+jjvx4Xw1Mmf66KckY83a5F2NtDQZnFpNk1Oct7/vNfQg89ReAfCxGJJIkTj6bmtpnEJ45TETwVin2QXMYETjUM4xVd14cDPwMWGIbx5jeiXVPaNSZQV/mveCpkT5O0GlfUmlvSe1SKvoem6Huw/V06MtUkLMJNRT7+FvLy98ooR6UaFkApLS2l/Iuq+pZ2Zu+ikdLAq343dxX6+MLjYv+07fb5XqzzKv86/XLyISZTBF5aRGiewf+3d/dRVtX1HsffwxkGmIEUGEwHRhkJMnyKInVh10cyMAVX5ldQl6h5Wdo1r1dJ08p86N646s1oZRZh0b0+4Fd0iRmJJpKWaZr4/FAG5AyggAIJDMw5M+f+sc/gmTPnDPNw9pwN+/Nai8WcfX5nn4+zZH/3/u3f77cr/vIyLZUDaDzjlKDLZ8yBu/58b2QskajnA2UshqjngxLeEwBmAY8A1wA/B34EfK5bSXrZQ18bTP2zFW2GSbY6d/F6hh3U/oCfz5Uf7mBx/75ctXd/Hlm/tc2Ttjo6004Dj/ULDv6vViQYmWpmzsZGTmtMhnMJ1kV93ltP1Z0PUHnn/STWvU9qZC2br7+CbXYq6Y91fzVTEdl9dOZYNCgzu7c5cwN36y4/ERFTbt/I0z/MjJRphubkR8Vgv8OTHXyyrao0/Ofm7Zw/tJKfDazgki35nzvQKg083i/o9nmpIsEBqRZu3djIl6Nw8E+n6fv8y1TNv5cBD/+OslQz2084mk0XTGPHsUdBn2LdlRCR3UFnjknfB74HfC8zgueZcCMVT+tZ+oTLtnxUDLo5UuakHSlObkxy66B+nNqY5IA8D1xPA09kDv7LKxLUplr4n42NnN6Y7NZzeruj+qTpNH32MLZcdiFkjynevoMBi5YED2155U1aPjaQredPY+uMM2iuqy28QxHZo3WmCNQRrOvT2hlfvIWse0luMejqSJlWN2zezpP9BnL1XgP4wabGnc/wHdaS5sl+CW4Z1I8XKsoZnmrh5k2NnLGt9w7+rSpe+yt9/7aSqnt/TcuMM+h72kn0X7w0eGjLxs0kPzmKTd+/msbTTyZdVdnL6UQkajpTBFYAN5nZXsCvgYXu/kG4scLR1ZEyufZrSfPNf27n23sP4LK9+/N0WZor9+rPxkSwVHNNqoXZmxo5c1uS7pWZHkoFE7rLmoKurj4/v4vquXcCsP34CWy9+FyaJozXCp4islNnJostAhaZ2RDgp8AtZrYEmOPufwg7YNScuy3JgsoKnupXDmXw2IC+DGtu4b82NTJtW7IYizJ0LJUi0bCW8pX1JFbVU77yHcpX1lO+qoFEfdvH0JVlRn6ly8ro07idpqN3i/v5ItKLdlkEzGwywWSvwQRXAjMJhhotAo4JNV0EJYCRqWZe7RvcQE2k05zUmGTGts7faN6lZJJEw7uUZw7ywcE++JOoX01Z6qNhqi2VA2geWUvyU5+g8eTjGfTj+TvfS1f0hT592HrmlOAegYhIjs50Bx0KXOPubU4zzWyPeuB8Z73Xp4zHBvTd2aXSXFbG/VUVzNrS1LVn+CaTJOrX5j/QN6xpe6CvqgwO9AePofFLJ5I6cH+aR9aSqqulZdjQNt07g348f+fBv2WGsf6ic2jZJ9oLY4lI6XSmCNwLfMfMBgIzgPPdfZ67vxVutGga8sWz+OGEcVx/7SW8u98+QAfP8E0mSbyzJutA3xB036yqJ1G/lrLmtgf6VF0tyUMPovHUL5Cqq6W5rpbUyPYH+o40HTyGpvGHs+WyCxky9iBaIj4BRkRKqzNF4A7gEuAn7t5sZtOBeeHGiq4DXn6T8978OzPmP8Avzz+d2VfNpP+OJipfX0HVy39r01efaHi37YF+YFVwoD9sLI1Tvkhq5AiaD9w/ONBXDynKDdsNj97T432ISHx0pggk3P1NM2t9HfvZRP0zo28uvv1uLr797jZzsVsGVZGq25/k4QfTeNokUiNrSdXtT3NdLS1DB2tkjohESmeKwFIz+ylQY2ZzgMdCzhRJ5W++TeU9i9psaz2cp8vKSI4dzQf3/ISWIXvrQC8iu43ODBG90cwOAR4H3iJYAjoWyj7cwoBFj1K54EEqlr9Gum/bX1e6ogL6lO0cfdMydHCJkoqIdE9Hzxi+Dzjb3Zvc/VXgVTP7JMGjJo/orYC9Lp2m4s8vUnnPg/R/+Hf0adxO8qBRbL7uChpPn8y+h07U6BsR2WN0dCWwAFhsZl9x901m9kXgRuDc3onWu/qs20DlfQ9TueAhylf8g5aBVTR++WS2TZ9K8tMH7+zi0egbEdmTFCwC7n6/ma0FfpOZITwBOMndN/VaurClUvRb+kcq71lE/8f/QFlzMzuOHMeHXz+f7adMJF05oN1HNPpGRPYkHXUH3UiwMOZq4HLgduByM8Pdr+2lfD2SvaJmy8eH7dye+Ps/qLx3EZX3PUxi3fs07zOULRedw7Yzp9I86oASJhYR6V0ddQf9LvP348BtvZCl6LJX1Nx2+mSSY0Yx4LdL6ffsctKJBNtP/Dzbpk9lx/EToG9vr/cpIlJ6HXUH/b4nOzazScAcguV25rn77DxtDLiO4IrjJXfPfRh9j7WuqFl594OUAS2DBvLhpRew9Txrc3UgIhJHoUz8MrMEwdXDZGAsMN3Mxua0GQ1cDRzt7gcDl4WRpVXryP2yLVup+POLKgAiInRuslh3HAG87e4rAMxsATAVeD2rzb8Ct7n7RgB3XxdSFkAraoqI5BNWERgO1Ge9bgCOzGkzBsDM/kjQZXSduz+SuyMzm0mwfDXuTnV118bkpysqINGHlnPPoPmaS6nYdx+GdGkPhZWXl3c5T2+Kej6Ifsao5wNlLIao54PwMoZVBPKtm5C7znI5MBo4DhgBPGVmh+QOQXX3ucDc1n1s6MK4/OqsMf07J3QVcVx/dXU1XcnT26KeD6KfMer5QBmLIer5oGcZa2pqCr4XVhFoALKfXj4CWJOnzTPungRWmtlbBEXhuWKF0Jh+EZGOhVUEngNGm1kdwTyDaUDuyJ8HgenAfDOrJugeWhFSHhERySOU0UHuniJ4BsES4I1gk79mZjeY2ZRMsyXA+2b2OvAE8A13fz+MPCIikl9ZOt2FRyKWXnrNmtxepdKJej9i1PNB9DNGPR8oYzFEPR8U5Z5A3jXuY/+AGBGROFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGCsPa8dmNgmYAySAee4+u0C7rwD3AZ9z9+fDyiMiIu2FciVgZgngNmAyMBaYbmZj87QbBFwKPBtGDhER6VhY3UFHAG+7+wp3bwIWAFPztLsRuAnYHlIOERHpQFjdQcOB+qzXDcCR2Q3MbBxQ6+4Pm9msQjsys5nATAB3p7q6OoS43VNeXh6pPLming+inzHq+UAZiyHq+SC8jGEVgbI829KtP5hZH+BW4Lxd7cjd5wJzW/exYcOGYuQriurqaqKUJ1fU80H0M0Y9HyhjMUQ9H/QsY01NTcH3wuoOagBqs16PANZkvR4EHAIsM7NVwFHAQ2Y2PqQ8IiKSR1hXAs8Bo82sDlgNTAPOan3T3TcDO69rzGwZMEujg0REelcoVwLungIuAZYAbwSb/DUzu8HMpoTxnSIi0nWhzRNw98XA4pxt1xZoe1xYOUREpDDNGBYRiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRgL7fGSZjYJmAMkgHnuPjvn/cuBC4EUsB64wN3/EVYeERFpL5QrATNLALcBk4GxwHQzG5vTbDkw3t0PAxYCN4WRRURECgvrSuAI4G13XwFgZguAqcDrrQ3c/Yms9s8A54SURURECgirCAwH6rNeNwBHdtD+q8Bv871hZjOBmQDuTnV1dbEy9lh5eXmk8uSKej6Ifsao5wNlLIao54PwMoZVBMrybEvna2hm5wDjgWPzve/uc4G5rfvYsGFDUQIWQ3V1NVHKkyvq+SD6GaOeD5SxGKKeD3qWsaampuB7YRWBBqA26/UIYE1uIzObCHwLONbdd4SURURECgirCDwHjDazOmA1MA04K7uBmY0DfgZMcvd1IeUQEZEOhDI6yN1TwCXAEuCNYJO/ZmY3mNmUTLObgYHAfWb2opk9FEYWEREpLLR5Au6+GFics+3arJ8nhvXdIiLSOZoxLCISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISY6E9Y9jMJgFzgAQwz91n57zfD/hf4LPA+8CZ7r4qrDwiItJeKFcCZpYAbgMmA2OB6WY2NqfZV4GN7v4J4Fbgv8PIIiIihYXVHXQE8La7r3D3JmABMDWnzVTgV5mfFwInmllZSHlERCSPsLqDhgP1Wa8bgCMLtXH3lJltBoYCG7IbmdlMYGamHTU1NSFF7p6o5ckV9XwQ/YxRzwfKWAxRzwfhZAzrSiDfGX26G21w97nuPt7dx2c+E5k/ZvaXUmfYnfPtDhmjnk8Z45GvSBnzCqsINAC1Wa9HAGsKtTGzcmAv4IOQ8oiISB5hdQc9B4w2szpgNTANOCunzUPADOBPwFeApe7e7kpARETCE8qVgLungEuAJcAbwSZ/zcxuMLMpmWZ3AEPN7G3gcuCbYWQJ2dxSB9iFqOeD6GeMej5QxmKIej4IKWNZOq2TbxGRuNKMYRGRGFMREBGJsdCWjdhTmVktwXIX+wItwFx3n1PaVPllZm4/D6x291NKnSebme0NzAMOIRgafIG7/6m0qdoys/8ALiTI9wpwvrtvL3GmXwCnAOvc/ZDMtiHAvcBIYBVg7r4xQvluBk4FmoC/E/weN5UiX6GMWe/NAm4Ghrn7hnyf7w2FMprZ1wnut6aA37j7lT39Ll0JdF0KuMLdPwUcBfxbniUxouLfCW7MR9Ec4BF3Pwg4nIjlNLPhwKXA+Mw/wgTBKLdSmw9Mytn2TeBxdx8NPE5pB1nMp32+x4BD3P0w4K/A1b0dKsd82mdsPcH7AvBObwfKYz45Gc3seIKVFg5z94OBW4rxRSoCXeTua939hczPHxIcvIaXNlV7ZjYC+BLB2XakmNnHgGMIRojh7k2lPDPsQDkwIDOPpZL2c116nbs/Sfv5NNlLsPwKOK1XQ2XJl8/dH82MGAR4hmDeUMkU+B1CsIbZleSZtNrbCmS8GJjt7jsybdYV47tUBHrAzEYC44BnSxwlnx8S/A/dUuogeRwIrAd+aWbLzWyemVWVOlQ2d19NcKb1DrAW2Ozuj5Y2VUEfd/e1EJykAPuUOE9HLgB+W+oQuTJD11e7+0ulztKBMcC/mNmzZvZ7M/tcMXaqItBNZjYQuB+4zN3/Weo82cystS/xL6XOUkA58BngdncfB2wlYvNEzGwwwRl2HVADVJnZOaVNtXszs28RdKfeVeos2cysEvgWcG2ps+xCOTCYoBv6G4AXY9FNFYFuMLO+BAXgLnd/oNR58jgamGJmqwhWcD3BzO4sbaQ2GoAGd2+9glpIUBSiZCKw0t3Xu3sSeACYUOJMhbxnZvsBZP4uSjdBMZnZDIIbnWdHcGWAUQTF/qXMv5kRwAtmtm9JU7XXADzg7ml3/zPBVX51T3eq0UFdlKm8dwBvuPsPSp0nH3e/mszNNzM7Dpjl7pE5i3X3d82s3sw+6e5vAScCr5c6V453gKMyZ4mNBBmfL22kglqXYJmd+XtRaeO0lXnA1FXAse6+rdR5crn7K2R1oWUKwfhSjg4q4EHgBGCZmY0BKshZdbk7NGO4i8zs88BTBEMGW/vbr3H3xaVLVVhWEYjaENFPE9y0rgBWEAwbLMmwxkLM7HrgTIIujOXAha035UqY6R7gOIIzwPeA7xIcHBzYn6B4neHuJVmMsUC+q4F+BE8QBHjG3S8qRT7In9Hd78h6fxUlLgIFfo//B/wC+DTBcNtZ7r60p9+lIiAiEmO6JyAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjmicgsWRmxxIMu+sDNAPfcfenzWwz8ALQl2CJgxpgort/O/O564Bl7r4sa1+VBMt0jMl8bq67/4puyqywekJEJyLKHkZXAhI7ZlYNXA+c5u7HESy41ph5+xV3Px64gmDtpc74LvD7zL4+D6zsYcS9gS/3cB8inaIrAYmjk4E7W9d8yqwGuzynzYt0frXLCe5+VWZfaeBJADP7EcHEnn8CZxMsNjjR3b9tZudlPruMYALQBwRLF0wFZgJfMLNlBBO/1nf9P1Gkc1QEJI5qCGZ8Y2ZnAV8jmMU6K6vNMcBb3f2CzAqPVe5+TGbhuYsovNrsYIK1iqYDpxM8UHz/KC31IXsudQdJHK0lKAS4+93AOXy0ENehZvYEQWGYDWwnWPKgVX8+6jrqyCiCewsQrDn0CdquU5+9+uPr7t4CrCboChLpNboSkDhaDCw0M3f3zbT9d9B6TwAAM2sCxplZ6wnTZ4Cbcvb3tJmd7e53ZRYYPJpgPaSTMu+PJ3is4mZgv8y2Q4GXMz/nFockwZPMREKnKwGJnUwf+/XAIjNbCvyE4LnR+dq+T7Bs+JMECwcuzLM42/XAsZk+/D8CozJL/Taa2VPAWcBPCQ76NWa2GBjWQcR3gSFmtjDz/GCR0GgBORGRGNOVgIhIjKkIiIjEmIqAiEiMqQiIiMSYioCISIypCIiIxJiKgIhIjP0/6JL6spkrtlsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEkCAYAAADHDTFTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZhUxdW437q9T8/OwACD7DsoCAIuiAqCy6fRaGg1LhgTE03008R8xl+ixmhi/Iwxq1FjElBwoVUQl88EURF3QET2fZVlYBhm7+32rd8f9/ZMT0/PBt0zDdT7PP1Mz13qnltd99SpOqfOFVJKFAqFQnFioXW2AAqFQqHoeJTyVygUihMQpfwVCoXiBEQpf4VCoTgBUcpfoVAoTkCU8lcoFIoTkE5X/kKIB4QQWzpbDsWRk/gbCiFuFELoHXTtDrvWiYQQ4lwhhBRC9EpBWTuEEPemQi5F6wghZgkhFrV2XKcr/3QghMgXQvxRCLFWCFErhNgvhHhVCDH0KMv9hxBicYrEPJ6ZC5Rk6rWEEBMtxdY3LRKlASHEvUKIHR14yU+AHsDeDrymogM5LpU/ZqPtB9wPjAEuAbKB94QQBem+uBDCme5rZDJSyoCUsvR4u1YyjtffWkoZllLul1IanS2LIk1IKTvsA7iAJ4FK4LD1/bfAlrhjNOBh4CBQA7wE3Ano1n4BvAUsAxxx5ywCPgbszVy7CyCBS1uQzwE8DnwNhIB9wEvWvges8+M/N1r7JPDfwAvWvb1sbR9iyVpjfd4ABsZd70ZAB84CVgB11n2NTZDrfGA1EARWAedY17yujfV+GfClVX4FsBQ4NW7/AOBloNw6ZhVwibWvAJgD7AICwEbgLkDEnf9Awm94Y+z3Svd9tvdaQN8kv+PiuPOvBlZaMuyw2oM3bv9i4J/AQ1b7ONjGOh4IvGrtOwwsBE5OuJexwL+BKqu9LAUmWPeUKPMDzdTHudb+qcASS551wAUJx7XWNmPl9GpnuaMwRw1BYBPgs+rx3rhjsoE/AXuscr4Erojb7wPCwPi4bTdYZZ6a7L6TtQngPMz2FAA+AHoCk6zr1WLqjJK48/oB8zBHO3XWudcnlL0Y+AdwH7Af85mZldBGxgBvAwesul0GXJhEH71syVFqtadngUUJx90ObLDufTPwC+J0HObzOTeunF8nKydpPR2pIj+SD/AHq0IuA4YCj2E29HjF8ROrwq4HBln/l9P4Ae9qNZzHrP9/YR3Tu4Vr97ca7xktHPMTTMV/LtAbGAfcGddgn7cadnfr47H2SeCQ9UMNAAYDHmAn8C7mQz0WeB/YAjjjGqmB+TCdbdXJQmBr7AfGnNKosxrccGAK8AVtVP6WnGHgbqtxDwO+jaV4rP2lmA/CREv+y4CL4/b/zGrQ/YDrrN/nO3HXeIDWlX9a7rO91wJswDescsdZ91cYd+5hzLbXH1NRrAJmJzz81cBTlpwnt6GOizEVxZPW8UOAv2C2ma7WMSMwH+AXgdMw2/41wBmYbekRYDcNbS+7mfo417q3r4ALrXKew+x08q1j2tI2Y+UkKv/Wyt0D/B9mJ3AGpuKrw1L+mMbb+1Y9TrTq+ftW/U2Ju49nrN8sF/N5qgZub6OeibWBxZid5xhMxfmhte104FRMpTo37ryTgR8Bp2A+B7djdSIJv38Fpi4batVFBfCrhN9gBmb7GIypkMPA4LhjXsfsHM+zfvuZmIbjorhjHrB+p29itquLMY2wh+KOmW/9bpOtcuZg6tTMUf6AF7P3ujlh+3IaK4498TdnbXuJuAfc2nae9cP8EogQZzkkubYN06JaCmgtHPcn4D3irNqE/f8gzkqM2y6BfyZs+67V6IvithVjWiE3xDVSCYyJO+Z0a9sQ6//fYFpOtrhjLqTtyv9U69i+zex/CFMxeVsrK6Ge3klopK0p/7Tc5xFea2KyOrGuf0vCtknWsQXW/4sxH1ot7pjW6vgB4LOEbQJTucWMi9mYijVp+wTuBXa04bc515Il3pLubm27oB1tM1ZOovJvqdzvYRoGBXHHjLSOuTeunCCQlyD3v4DX4v73AGsBP6al/lpr956kDYyO2/Y/1raxcdt+DJS1UtYC4Jm4/xcDqxKOeQr4tJVyvgJ+YX0fZMkS39k5MDv3Rdb/WdZvlDhiuAGosL4PtMqZGrffialDW1X+HTnnPwBz2ueThO0fxb4IIXIxh2afJRzzaWJhUsr3gd9jPlj/kFLOS3ZRIYQN00IZjNlwW5rDnInZ+28RQjwlhLiyHXO6SxP+HwGsk1KWxclcijltMiL+VjAbRow91t9i6+9wYJmUMhp3TJP6aIFVwH+ANUKI+UKIO4QQJ8XtHwt8IqWsTXayEEITQtwjhFgphCgTQtQAtwB92iEDpP8+23OtJgghumLe0+NCiJrYB3P4DuaDFuOLhHbUWh2PA8YmlFuNOQU1yDpmLPBuK+2zPayMfZFS7geiNNx/W9tme8sdDqyXUh6OO2YNpkUbYxyWgkqoj+toqAuklAHgKuAKoBtwUxvuOR6JOW0TY7/1d1XCti6WjkAIkSWEeMQKFCm35LqYpm19ZcL/e4hrW0KIrkKIvwkhNgghKqxyRsSVM9z6W6/npJQRTEM4xgjMDvDVhHp6Gsiz2musnE/iygljjrZaxd6Wg1KEsP7KozzGPND8wc7CbHwDhRBCWl1f3DFOzGH0KOBcKeXXLZUppVwphOiHOa95HqaF+5AQ4nQpZVUrIiVTnsnuQyRsNxIUXmyflmRbS+UmRUoZFUJchPnQnQ9cCTwihJgupXyzDeXdBfw/zCmxFZhK68fAf7VVBou03ucRXCuR2L47MKclEolvO41+6zbUsYY5xXJbknLjFeOR3m8ywkm2tVTX0LRttrfctpyvYd7zuDaUPdH6m4/ZAZS3UnY8SduApWQbbaNB7/wOc8rzLswpoVpMAzOvFTkljet2Fua08d3AdswR1UuYnV7iec0RK2865kgzkfI4uY+IjrT8t2BW2lkJ28+MfZFSVmI6W85IOOb0JOU9gGnNn4VpNf0sfqcQIgtzXm04MElKuastQkopa6SU86WU/4059zoM0/GIJb+tLeVgDllHCCGK4mQqtmRe28YywHSqjYtZJxaJ9dMi0mSplPJhKeUkTOfXd6zdXwBnCSG8zZw+Cfi3lPKfUsovpZRbiLPQUshR32c7iD289deyLN/dmFNDW5J8gi0V2EodL8e05PYkKfegdcwXwPlCiOaeyfa0vdZIVdtMVu5wIUR+XLkjaKw8l2Mqc3eSutiVcN7jwA8wR18vCSFcRyFbW5gEPC+lnCul/ArYhlknR1LO36SUr0spV2MGBvSP27/O+lvfvoUQdkw9FmMt5vRY/2baY5SG3+rMuHKcJO9Ym9Bhyt+aVngK+LUQ4htCiCFCiEcxnSbx/B64UwhxrRBikBDiTmAacb2kEOIcTGt0hpTyc+Bm4EEhxOnW/hzMYfgQzKGjIYTobn08zckohPgf67ojrBHATZgji1jPux0Yau0vaqUxvoAZsTRXCDFGCDEWs/ffg+mdbyt/wxxSPimEGCaEOA9zfhzaNkI6UwhxnxBighCitxBiCqZDK9YA/4bZDhYIIc4SQvQTQlxiWbJgTgWcK4Q4TwgxWAjxa0wnWqo5qvtsJzsxHYIXCyG6CSFiyukXwH9bMfUjrTZ6uRDi6ZYKa0Md/xVTcb8mhDhbCNHXWmvwGyFE7MF9FLNTfV4IcZoQYoAQYroQIqYgtgPdhRBnWG0v6yjuP1VtM1m51cAcIcQo63n8F6blG+M9zOCCeUKIbwoh+gshxgohbhdC3AwghHBb8rwupfwn5vNdgBkgkk42ApcJIcYLIYYDf8echj6Scq4VQpwshBiNOfsQb2hsxoyuekIIcY51racxnduxEUoNZtTjw0KI26y2OEIIcbUQ4n+tY7ZgGrhPWM/ncEy/ZE6bpGzNKZDKD+Yc1tOYw75Kq3KThXr+FiijIdTz50C1tb8Q00L7fULZT2M+IHk0OKeSfW5sQb4fYFpgsVC7ZcBlcfsLMSMZKuPLohmnJGbn8380hNO9SZJQz4RzelnlnRu37XxgDWb46SoaHKFXtqHOR1gy7LfO34k5vHXGHTMYM2qgEtPJ9BUN0T55mE63KszolCcwncQ74s5/gDaEeqbjPo/iWndjKrsojUM9L8f0NdRZ97wSuD9u/2JMH1N767gPZrTYwbhj5gD94o4Zj6kYazGV6OdY4Y6YDsEXMIf7ktZDPXslbNeJa/u03jYbldOOck+16i+E6dC+mqahnrHope2YI5r9mAEZk639T2Ja3Xlx50zEDOz4RhvafLI2cB3mAC1+29XWPcUizk7CNBprMa31X2GG9ca3j2S/fyNnPKbf8BPMTm8H8EPrd50Vd0wX4BWrnR0AHsQM/Xwjoezv0hB6fNhqE7cmlOO3ZD6IqTvbFOoprAIyGiHEv4BRUsqxrR58AiCEiE0rnCLNYeVxyYlynwqFNd25AXO0c1dHXLMjHb5tQgjREzOu9X1Mq+xSzPCmZM6yEwIhxK2Y1vheTB/GH4DPjzeFeKLcp0JhGTbdMMNYczCDKPpiOos7hIxT/pgKfzrm1IIb01F8q5TymU6VqnPpg+njiC0WegfLwS2E+DnmtFhSpJTZHSFgimj2PhWKGFbIY3M8LKV8uMOEOXJsmNNFAzGns9ZgLibrMEPnmJj2UTSPEKIQ0xeRFGk6hRSK4wYhxMAWdpdLKdsTEnrCopS/QqFQnIAcr1k9FQqFQtECSvkrFArFCYhS/gqFQnECopS/QqFQnIBkVKinz+f7F+Zbtw74/f6RrRzbG3MlWz5m2NQ9fr///9IvpUKhUBz7ZJrlPwtzSX9buBfw+/3+UzGXaf8tXUIpFArF8UZGWf5+v3+Jz+frG7/N5/MNwMwn0xUzD8bNfr9/A2ZOjlzrsDzUi6YVCoWizWSa5Z+MvwO3+/3+scBPabDwHwCu8/l8X2MmqLq9c8RTKBSKY4+MVv4+ny8bM1f1yz6fbyVm5s4e1u5rgFl+v78X5tt2Zvt8voy+H4VCocgUMmraJwkaUOH3+0cn2fddLP+A3+//1OfzuYEizPSoCoVCoWiBjLaU/X5/FbDd5/NNB/D5fMLn842ydu8Cpljbh2EmgTuYtCCFQqFQNCKjcvv4fL4XMV8aUQSUAr/EfPPPk5jTPQ7gJb/f/6DP5xsOPANkYzp/7/b7/Qs7Q26FQqE41sgo5a9QKBSKjiGjp30UCoVCkR4yyeGrhiAKhULRfsSRnJRJyp+9ezNjnVZRURFlZWWdLUaLZLqMmS4fZL6MmS4fKBlTwdHI17NnzyO+rpr2USgUihMQpfwVCoXiBEQpf4VCoTgBUcpfoVAoTkDS5vD1+Xz5wD+AkZiRPDf5/f5P21OGlJLa2lo6ei1CIBBA1/UOvWZbEULg9Xo7WwyFQnGMk85onz8B//b7/d/y+XxOIKu9BdTW1uJyuXA4HKmX7hglFApRVlZG165dO1sUhUJxDJMW5e/z+XKBScCNAH6/PwyE21uOlFIp/gRcLheVlZWsXLmSXr16dbY4CoXiGCVdln9/zCRrM61EbF8Ad/j9/tr4g3w+3/eB7wP4/X6KiooaFRIIBNIk3rGNw+Fg1apVjBo1CiGOaH1H2rHb7U1+z0wj02XMdPlAyZgKOku+dCl/OzAG8yUsn/t8vj8B9wD3xR/k9/v/jvmyFgCZuNAhU+fdOxvDMIhGo+zfvz9jR0aZvrAGMl/GTJcPlIyp4Hhb5PU18LXf7//c+v8VzM4g7XjmzaPb+PH06NWLbuPH45k3L2Vll5eXc/3113P22Wdz/vnn873vfY9Dhw41Oubhhx+mT58+PP744422G4bBzTffXH/u1VdfzY4dO+r333TTTZx//vlMmzaNb37zm6xZs6ZFWVRCPoVCcTSkRfn7/f79wG6fzzfE2jQFWJeOa8XjmTePvLvvxr5nD0JK7Hv2kHf33SnrAIQQ3HrrrXz44YcsWrSIPn368PDDD9fvf+yxx/jqq6/45JNP+Oijj3jiiScanT99+nQ++OADFi1axAUXXMDdd99dv++Pf/wjixYtYuHChdxyyy3cddddKZFZoVAokpHOaJ/bgeetSJ9twHeOprDc++/Hsa7l/sP5xReIcGO/shYIkH/XXWS98EKz50WGD6fqwQdblaGgoIAzzzyz/v8xY8bw3HPPAfDEE0+wdetWZs+ejdPp5IUXXuD222/nmWee4eabb0bTNKZNm1Z/7tixY/nHP/7RcH+5ufXfq6qq0DS1BEOhUKSPtCl/v9+/EjgtXeUnJdxMQFFz248CwzB47rnn6hX6j370o0b73W43zzzzTLPnz5w5k6lTpzba9tOf/pQPPvgAKSXPP/98ymVWKBSKGBmV1bMl2mKZdxs/HvuePU22R0tKOPTKKymV595778Xr9fKd77R/QPPkk0+yefNmXn755UbbH3vsMQBeeeUVfv3rXzN79uyUyKpQKBSJHFdzC9X33IPh8TTaZng8VN9zT0qv8+CDD7J9+3aefPLJdk/PzJw5k/nz5zN79mw8CbLG+Na3vsUnn3xCeXl5KsRVKBSKJhxXyj9wxRVUPvooekkJUgj0khIqH32UwBVXpOwajzzyCKtWreJf//oXLperXefOmTOHOXPm8OKLL1JQUFC/vba2lj1xI5aFCxeSn5/f6BiFQqFIJcfMtE9bCVxxRUqVfTwbN27kL3/5C/379+cb3/gGAL179+af//xnq+fW1NRwzz330KtXL66++mrAXK375ptvUldXxw9+8AMCgQCappGfn8+sWbMydgGXQqE49jnulH86GTJkSCMLvT1kZ2fz9ddfJ93XtWtX3nzzzaMRTaFQKNrFcTXto1AoFIq2oZS/QqFQnIAo5a9QKBQnIEr5KxQKxQmIUv4KhUJxAqKUv0KhUJyAKOWvUCgUJyDHlfIv2jaNnutLmnyKtk1r/eQ2kM58/jEef/xxSkpK2LBhQ0pkVigUimQcV8o/7BmLxNlom8RJ2JOa5KLpzOcPsHr1alasWEFJSUlK5FUoFIrmOGZW+Obuvx9HqJX3wcgwEEnYqOMIraHLzm81e1rENZyq7p2bzz8UCvHzn/+cJ554gunTp7cqi0KhUBwNx4zybxPCSdTWDVv0AAKJRBC1dQXhbP3cdpLqfP6PPfYYV155Jb179065rAqFQpHIMaP822KZA2iRUoq3ngEyBMJFWf9/Y9i7pVyeVObzX758OStXruTnP/95qsVUKBSKpBxXc/4AhqOY2ryrkAhq865Ki+JPdT7/zz77jK1bt3L66aczYcIE9u3bx7XXXssHH3yQctkVCoUCjiHLvz3UFN2JI7SJmq53przsWD7/2bNnH3E+f7/f3yhX/2233cZtt91W//+ECRN49tlnGTp0aMrkVigUiniOS+VvOIo51PfVlJebrnz+CoVC0dEcl8o/XaQrn38in3/++RFdQ6FQKNrKcTfnr1AoFIrWUcpfoVAoTkCU8lcoFIoTEKX8FQqF4gREKX+FQqE4AUlbtI/P59sBVANRQPf7/anJrqZQKBSKoybdlv95fr9/dEcr/jqtlLe7XEmddiCl5aYzpfOECROYNGkSU6dOZerUqSxevDilsisUCkU8x+W0z1fZf6TU+TlfZf8xpeWmO6Xz3//+d9555x3eeecdzj333JTKrlAoFPGkc5GXBBb6fD4JPO33+/+eeIDP5/s+8H0Av99PUVFRo/2BQKD+++e591PuaCWlMxAlTJlzBQjJRu9sDjnWYKPlrJ6FkeFMqOrclM7tQdM0NE2jqKgIh8NxRGWkG7vd3uT3zDQyXcZMlw+UjKmgs+RLp/I/y+/37/X5fN2Ad3w+3wa/378k/gCrQ4h1CrKsrKxRAbqut/uitbb4VbSSWtvX5Eb7t7uc1kh1SmegPr/PuHHjuOeee8jLy2v22oZhUFZWlrHKv6ioiMTfM9PIdBkzXT5QMqaCo5GvZ8+eR3zdtCl/v9+/1/p7wOfzzQfGA0taPqt52mKZ12mlvFp8BghpbhCSsK2Sc8r+RpaR2uyeqUzpDDBv3jxKSkoIhUL88pe/5N577+Uvf/lLKkVWKBSZRFTi/XsQx9+24v2Rk9qb3WATHXb5tMz5+3w+r8/ny4l9B6YBa9JxrXi+yv4jEtlom8RI+dx/qlM6A/WvbnS5XMyYMYNly5alVGaFQpE52LZFKbqompzfBxHlBjmPBSm6qBrbtmiHyZAuy78YmO/z+WLXeMHv9/87Tdeq54DzCwwRbrTNEGEOOJen7BrpSOlcV1eHruvk5uYipWTBggWMGDEiZTIrFIpORkq0wxJtv4Ftv6TglhpEXcMkhRYAx/ooRZdXU7oqv0NESovy9/v924BR6Si7JS4rW5jW8tOV0vngwYPcfPPNGIZBNBpl0KBBjaKIFApF5iICMaVuKnat1MC2z8BWatQre1upQYJdanEI+DVwH8IoRB9i6zC5VUrndpCulM59+vRh4cL0dlwKhaKd6BKtTFpKvbEir1f2pRKtUjY51cgCo7tGtFgjPM6GUSyIdtfMT7GGc2WEnN8F0WrnYM6Iz8bw3kHd1al/33hzKOWvOKGYVuRlbFjnzpowmRv8p0grUiIqJbZSGafU46z2/ZbVfkAijIRTbWB0MxW5PsBG+EyNaHdRr9SNHuZfmQOI5p23RVedjQjFDwXeQKt9g/z/cRK48tO03HYiSvkrTijWOm1sdmjM9TqZYejcogmKjaaWm+IYJSixHbAUeUypVx4kf0etabVbyl4LNj3VyBf1ijwyzIFRbFnq3UW9FW8UiZRE5Dzi/ZSpoV9zCv/GSYAwHlZxEQuz7+VGOqY9KuWvOO4JAZvtGmsd5nxq2LLI/q4Z/LM4m6lBnYcqg/RUnUDmYki0Q5by3mc0WO2lcVb7fgPb4aa/oXQHcVrTLuFRNoxpjvopGCNmtXfTwNNxYZb7y3sSJAc7QSK4sBMkSA6lh3oAeztEBqX8FccNEjioCdY5NNY5bKyz21jv0Nhi19Athf/d7ll4S5uG5z5VbHB4WwVTQjrnBHUKVD+QcqQEGQUjCkZUIKPm/7IKU6HvM7AdMLCXGtgPWp8DBo5DBo5yA5EQBSkFhPM0wvkaNfl2QqfYCOZqhHLtBHI0gjk2Ajl2HAU5VFfV1l/TMEBGBMYOMLaCjAoMHaRhylX/XRfWNpC6wIhts+5BRoV1Lw3fZdy9GdG4MnTrGOs7QA4H+Zgb+ITrOZPZ5JLaXGStoZS/4pgkjGnNr3OYCt5U9hqHbA2KvUQ3GKZHmRbUGR6JMixi8GppdtLyvKUa81125mU50aRkbDjKlJDO5KDOcN2gOZswXqFJw1IclgIwH35zu7S+G4ZA6o2Pb0557M8SVFa421aGYSmoOGUUr2hjCq2RwkpQaDGF11wZTRRaFDRpJxLpllRGEZFkR3VydJ1cQydP6uShk0ek0V83RpN6rUOjCgeVuKjETiUOKrBTZf2txEG1tGNUCKhoS4tpOXxSs0uEDTRb7G/j78Im0eygaQ3fhWadZ/3VbKDZwe42mi/DBsIGa/x2/sW/6q//Co+05SZSilL+ioynLGbN222sdWisd9jYYteIWNa8W0qGRAymBXWG6Yal6KPkx1nvoQgs29ZyJMXVJdkITRIVgoiA/cBLEmxRcEXAEQXNUu71it5I91RB4RGf2aCwJMLeWBk1UliWYkqmsDS7WYbNJc3vGmg2A49hkB3WKUDgqg6SHdbxBnW8wShZ0ShZIR1PsKlSNzQIZNsI5toI5tjYk+cklKcRyrMRKrARLtCIFGgYWVojOVw2SXcb9LRHEFq4XtEKzVLEzSpuSZeuhVRUljdsiztPs+qio1njz+r4iyaglL8iY4gAW+KtebuNdQ6Ng3HWfPeoqdynWNb8cN2gn240achSwro9dpZscPHhBhdfrHPSf6vGOS1cf69TEIpqGMKcUjAESE0iskHPkhhZEryQ4zHo7jHo54lSkmXgzTLqLboGBSMtKzHeOmyw/hItQS3hu7BBYVE+VVWH6xV3I4WV5PhYGTGr9EgQAWnNqSc4Tevn15uPWY92sRyjQzWi3e1UddeaOE2NAgGa2WEKwGN9rBKsT2rJK4KIs2lHdKJzXCv/Uq2cP2a/zBfOjSwse7z1E1qhvLycO+64gx07duByuejbty//+7//S5cuXeqPefjhh3n66ae54447+MlPflK/3TAMfvCDH7BhwwZcLhdFRUU88sgj9O3bF4BgMMgDDzzAhx9+iNvtZuzYsTz66KNHLXOmUq4J1lqKfp1lzW+2a/XOWJeUDI4YnBfSGR4x6qdtCmXzk/H7KzRT2a938eFGFwerbBRWwdn7o9y4VTSzyKaBJz7bRyAs2HNYY+9hG3vLbew9bGNPuY3dh+1sr7BxoNyGHrZBEPNzGGwOSdeCKP0KovTKiVJSEKWkMErPAvN7z4IoXnf7nQhFRZKysvYnN0yKLtEOyoYwxljMeqLTtKqFmPXuZsx6tLuoV+o5Qwsod1eZDlNnxzlMj3W8XaPUHmy6oMvb9dhP79CpxJT+XO/7SAzCIjUPUCyffyyt80MPPcTDDz/M73//e6BxPv/bb78dl8vVKNvn9OnTOf/889E0jZkzZ3L33Xfj9/sB+M1vfoPL5eKjjz5CCMHBgwdTInNnowNb7VqcJW8q+9I4a744ajA8YnBuMMwwPcrwiMGAJNZ8IrVBwaebnfXW/aZ9ZobToqwoUw2dXpsNQusc2Fwawy4PMObGWp67uGuLZXqckoHFUQYWJ38IpYTDtYLllXYWVTtZVuVga4Wd/YcEBw7Y+XKTg9BhgZSNFWF+lkHPwig9882OIbFzKM6P4jiSxZ2xmPXExUcxq91S7NrB1mPWQ2dpVkijsCJhTAUvc5pX6tlFHqJltUcg+InNbStL6793VtbRY0b535/7T9Y5drR4TJgIX9sOcsBWAUjin79vdbmv2fOGR/ryYNV3W5UhXfn8a2treeWVV1i+fDnCsny7dm1ZSWUihwWWcrexzabzZZGXTQ6NkHVPTikZpBucHWfND9cNurQxxDJqwFc7HfXW/RfbnUSiArdDMn5AiG8NDdBzncbe1z3U7FP5pRYAACAASURBVHfh6qVzxi+qOPnqOrIKTc13tBaXEFCYLZmWHWEaEQACAj5y2nnPbeddl509QoNyQd/9kiH7JMX7QRzS2FtuY89hG8u3O6mobTwvowlJcZ5hdghWxzCsGHpU2+kdidAjoJN3ONqg4EvbELPew5xuiQxz1Mep18esd9cwuqQmZl1xbHLMKP+2sMnxNdWijmZDM1JIKvP579ixg4KCAh5//HE++eQTvF4vd999N+PHj0/fDRwFUWCbXWNd3LTNOoeN/fHWvDQYGpHcVBtmmKXoB+oG7X37wM4yG0vWu1iy3sXHG11UBsxrjDwpzM2Ta5g4NMRJlZK1c7xseDyHTRFB33OCXPDbCvpPCaEl6Pl0WFweCVNDOlNDOhLYaNfMjmCwnUUjbUSFIN+Icl4wzA9DEc6p0/HukRzeDNXbILwL5D4D+1qDrEMG+ZU6XWuidAk37ZCCNkFZjp2qAjuhrhrRkRq2XgJ3H8gdAPZegmixBm6l1BUtc8wo/7ZY5ge0w9Z0z3sYCdM9rxx6KKXypDKffzQaZefOnYwcOZL77ruPFStWcOONN/Lxxx+Tk5OTUrnbS4WA9ZY1v95S9hsdGkHLmrdb1vyZIZ0REYPhujk3P6ywC2XlVe2/Xp3g442msv9wg4udZWYT7Vmgc9HoAJOGhZg4JEyuXbLuNQ9f3pnLZ2ucOHMMTr2hljEzaikc0HHzpolo1ZKRpRFO2Rfirv0GkVJJ6UFJ7UGw7zMo3hul+/4oDh36xZ0nNTC6WguOBpkO08qSfHZpYfa4HeywO9gkHGwJOthTYWdvuY3SKs2cXtqH+fkMCrxRehYYlBTq1pSS+b1ngUHPAp3iPAN7x+UOU2Qwx4zybwvdjAIervo+d9ZMb7YTSAWxfP6zZs064nz+c+fOrc/n36tXL+x2O5dffjlgTicVFhaybds2Ro3qmOSoUWC7TWuImXdorLfb2GNvuL8uUYPhusGMOGt+kG608pLMlgnr8MW2hnn7r3Y6MKQg221w5uAQN0+u4exhIQZ0iyIEVOy08eUfs1n1UhbBCo2ioRGm/baCEVcGcHrb6FQ9kpdohM20Adr+5E7TmONUSzL9XZRrpQ0o1jh4lp0PTnKzrJeDz/s42NvTRqS7YGSe5LxolLNDOrHbKCoqpEtZGV2QnEIYc3VDA5Eo7K9ocErviTmoD9vYfcjO55sbRkoxbJqkOK+xzyH2PfYp8MqW0tIojhOOK+UfI7ETWO7ckLKy05HPv7CwkDPPPJMlS5ZwzjnnsHXrVsrKyuojgZrjgHaYv+bOb3c0U1XMmrdr9Vb9BrtGwArBs0nJQN1gfDjK8LpwfaRNN0Me9YyalLBpnxmCuWS9i8+2OKkLadg0yal9I9x5UQ2ThoUY3Tdc7wCVBmx738WXs7xsfc+F0GDwRUHG3FjLSaeH26WobNuiFNxSi31bFBGAnMeCZL0cpupnHrCJJk7T+miYsiRpA5zUz6NHhtuITnY0OErjnKYyq0FAGzDc+lykRXnfJXjXbWeBy8EcTeCUkglhM5T1SmSLUf4OG5zUJcpJXZof6dQERX3nEOsYYt+/2unk7ZU2wnrjCvQ4jXpHdLxzOv7j6bjkk4o0IWQLoXMdjNy7t3FOi+rq6k6f9ohn48aNTJ48mf79++N2u4H25fMfOnQovXr1qr+nWD5/gJ07d3LXXXdx+PBh7HY7P/vZz5g8eXLSsrZs38qf187irRmbMZzmyGbP3nlNjjOAHfHWvKXsd8dZ8wWWNT8sYkbZjLCs+fZ1a02Jn08/UKnx4QYXSza4+GiDi/2Vplbv301n0rAQk4aGOGNwiFxP47YYrBCs9mfx5bNeDu+w4+0aZdS1dYy+rpacHkcWt118SgXa4aaRL4lEiyzlXdyQtTHmKI0WC4weVsx6ikzkCLDMaeNdt533XHY2WT1fX93sCKaEdE4PRY/6d0nEMOBQjdaoc4jvLPYetlFa2XSeqEu22Qn0626nyFtndhaFDR1Gt1wDWycsnkrGCfAO3yNqhEr5H4Ms2bWM+1Y/ztYZIaRlgf24agYG/ajVenJQK2SX3ctGu506y5rXpGSAboZUDrfCKYdFonRPgTWfSCAsWHegiDc/D/Hhehfr95pu3gJvlLOHhpk0NMTZQ0P0asZiPbDWzopnvax91YMe1CgZF2LMjXUMuTiA7Sgtzi7fqsb1adNpwMgQjYpHvWZK3q6i02PWd9kEn3fJ53U9yCcuO0EhyDIkE0N6fdqJjkpEF9bN6aU95U07h9IqJ7sOQnWwsaa3a5Lu+QkhrQnf8zwdM72klH9yjstpn+OfrsDZQF+gF9CfP+T2jNtfBazFbXxNiV7GSXo1A6JRekfz6RHtQs9oET2iXSg0uiDaHX/TlKgBa3Y7zKicDS6Wb3MS1gVOu51xA8L8/PIqJg0NMaJXhOZcJNEwbHrbzYpZXr5e6sLuNhh+RYAxM2opHpk6n03dNU4cq/RGc/OGF2p+5CZyWuY8Dr2jkjGGjenlAQICPnHaedcKJV3oMX+zYdZK5/NDOqeGo2l7mJ126F0UpXdR0846priqAgnTS3Ejhy+2OXmzwkYk2lhHeV1Go86hR2yayZpa6lEQxX30zVPRDJnT2hXtQAA9gCswExav5idVeXSLlpMt9xEW+9hnK2Of7RB7rb9vOQ9RodU0KakomtfQIRhd6Bnt0qiD6B7tgitJB7H7kK1e2X+00VUftz6sJMJ3zqnlkgkuhnUrw+Ns2Tqt3q/x1fNeVs7JovaAjfw+OufdX8kpV9Xhzk+9ZRuc6iTv3gDE50y3CYJTM3cS2yNhimXxS8yEdu+6zM7gqWwnf81xkW9Izg3qTAlFODcUpbCD01PneiS5Hp2hPZN31IYBB6u1Bue01THEVlGv/drBwaqm00tFcSumexQ0dVR3zTGaNSgULaOU/zGNAOkF4eWumiiQZ32GJj26TgTZqzV0CLG/+2yH2GUv5TPbWiqThKsURfMoruqNc/UEAl+NonTNIA6XmlkSi/LDTDm5jnOHRjh7aIiuueZkumkRJldAUsLXS52smOll09tujCj0Py/EmBsr6H9eKK2JtmSuYP/6/DgZM3c6IBkCGKwbDNbD3FobpkrABy5zgdl7LjuvZTkQUjImEmVyUOf8oM6IFrKSdhSaBsV5BsV5Bqf2jSQ9JhhpOr0U6xy2lNr5YL2L2lDjxuGwSXrkW47ouFFDSZz/Qb2xLTlK+R+j2ACXNOgTWYNkNvBAq+dkSTcDoyUMjJY0e0ytCJidgTzE5zs0lq/LY8vanqzd2ssMRncHYOQXcMlSGL2Usl47eFXAkmg+PeNGDAO1PuR53HEjiEJknYN18zysmOXl4HoHrjyDsTfVcuoNtRT067zY/GOZXAmXBnUuDeoYwCqHxnsuO++6Hfwu183vcs30GZMtp/HZIZ3sjHHzNcbtgL5do/RtZrW1lFAVEI2d03Hfl2118vphG3pCptVcj6RHfteG0UNC59AjP4rzBNSEJ+AtH/sIJL3YyjNluxggBtAWxd8aUsKW/XaWbChiyfoSPt3spDakoQnJqD4Rrr2wlklDQ4zpFybs6ME+20T22oayr6LxSGKbfS8fuVZTrdWBFc2at/kkTnniSkbMugRnZTahk7+Gv3xJzpX7qHbls8Uwp5qKo4U4VJM8YjRgdMRgdCTMT2rCHNQE71vTQ295HLzodeKICyWdHNQZEO38UUFbEQLysiR5WTrDeyWfXooacKBKa+R3KA9ks3WvboW3OjhU03R6qVtuNHnnYI0kio7D6SX1pB2DFMrD9OEt9mUdYkDgt0dcTlm1FYJprabdV2GFF3bVuWJ8gElDQ5w5JER+VmNT0Sk9DNJ7MUjv1WzZ9gIP7756gPUzC6h8rxvYowQvX8PmWxeydeJS9trLqNECjc4RUtDNyK8fLZi+hy70iBbRM1pET8PsIOyoJaptoash8QUi+AIRIsByp80aFdj5VZ6bX+VBX91gcjBSH0rq7myhjxKbBj3yDXrkG4ztZ04vFRV5KCs7XH9MIAz7Kho7pWPfN+218/5aF4FwY03vtEvTCZ3feNQQP82U3c7MrR9vdHL937oQighcjh7M/uEhzhrSSurZFHJchXpOK/IyNqxzZ004LS/lTldK5927d3PTTTfVH1tVVUVNTQ1r165NKsfqXUvwr76P/jfsYFr1c/QMT0K0wX4LhGHZVhcfWI7adV+bjtz8LIOzhoTqY+6TRXW0lUC5YNXcLFbNyaV8hyC7e5TR19Uy6tt1ZBc3Dq6vErWNfA97bYfYl+CTqE3IWqZJjW5GflznUNTkb7FR0KYOoqioiLXlm1Ka9juVpNMnsdsm6juCj512gprAY0gmhvX6UUFJG56hY8Fv0l4ZpTTTjMR3DomO6v0VNqJNppeMpiGtcQvlusdlbv14o5MZTxY26mQ8ToNnby1vVweg4vwtSnrm4pRm3PpVteGUdwKHDx9m/fr1jVI6V1RUNErpvGzZMh5//HFuv/12pkyZUp/wzTAMFi1a1Cil89tvv12f0jme+++/n2g0ym9+85ukcqze9SH+1ffS74ataC5Jrt6ffoHL6Be4nHx9YP1xhmG90GS9myUbXCzd4iSkCxw2yWn9w/XK/uTekaNekLN/tYMVM72sX+BBDwr6n21w8rUVDLowiO0Iw/UkkipRxz5bmdkx2BKc1ZrZYdQ100Ek6xhiowkJ/KPrWzyrvV2f9jvZQrnOpKMUawD41NUQShpbBBgLJZ0c0hnbTCjp8aj824IeNaeXko0eYquoD9c2NkCEkBTnGvQoiNanMUnE5ZBs+9O+NstxQsT535/rYl0bEp7HXgYy2+tkttdJt6ikV7Tl/DPDI1EerAq1Wna6Ujo3kj8cZv78+bzwwgstSGJ1aAI06cAd7cJX2X/kq5w/wL4plC3/IZtWncOn6/Mpt+Y3h/aMcMOkWiYNC3H6wDBZrqPvFPUQbHzLw4qZXvaucOLwGIz8Vh1jbqxl2Nn5lJUlyTXcDgSCPOklT/cyVO+T9BiJpLLJCKJhJLHesZN3XV8Q1MKJJyKAJM/fCYcHmBwylfyvMd+mliyU9ByrIzgvpLc5Dffxit2GlSzPAJJHL9W/GKjczt7DGntifw/bkyp+gFCk4xpkWpW/z+ezAcuBPX6//5J0XisRaXUCpTao0zRGRlL7GrdUpnSOZ+HChXTv3p2TTz652XMrDvZhyby7mf2fq7jm7hvoETiXvV++xeKNgt37zMC27IJ9jBzzb84ZonPZwIH0ySlotrz2UrVHY+UcL1+9kEVdmY2CfjpTflXJyOl1uPM6VikIBPkym3w9m2EtdBAVooZ9tkPcVvAHNtl3I0WjSH8AHsidyQXB8YwLDz1h/QoCGKQbDNLD3GKFki6JCyVdYIWSjrZGBd8SBiWYzmZFY1p6MVD/O3okVfQuR8c9P+m2/O8A1gO5R1tQWyzzkp4Nl3FKiUbD9E+3FFsqqUzpHM/cuXO5+uqrWzw/9pYoPeLmuYfnAwKP0+CMQWFuOrOSsSN2YB/wEjuyFnDYsZ7FUqN7+Cz6111O7+BFuGReu2WWEnZ94mTFLC+b/+NGGjDw/BBjvlNB37PTG5t/tAgEBTKHAj2Hlw49EJfxVRIWDVbbs963eSb7DfKNbKYEx3JBcDznhkbjlZ4WSj++yZVwSVDnEiuUdI2jYVTw+xwXjwmdbsXZjUJJc07sQUGbmP3DQ83O+XcUaZvz9/l8vYBngd8AP2mD5Z+yOf90Kn0wUzqvX7+eWbNmtTuz58yZM3nxxReZO3duo8yeAPv372fixIksXbqUwsLm8zkuWb6L+55czVbnDKRw4rBJNj6+D1eSufUK+ya2eV5ju2cB1fYdaNJJSfA8+gUu46TQVBwyq0V5QzWCta94WPGsl0ObHLjzDUZ9u5bR19eR37t5x3CmzwUf0A7zVNc3eFb7v/q03xv3Pc9i15f8x72M99xfUKHV4JIOzgqdzLTgOKYFx1FstJRnM7Vkeh2WaYLlRfksiAT5wGWnUhM4pGR82FpgFtIZkAELzDK1HhtH+8gjivbJSIevz+d7BfgtkAP8NJny9/l83we+D+D3+8eGw41vfPfu3fU579vCtCIvp1nRPulQ+mCmdF6+fDmzZ89ul2xgpnSeOXMmfr+/UYRQjD//+c+sXbuWp59+usVyEpU/QMjfcqORSErFCtZrc9movUKN2INdZjHQuJShxnT6yQuwxXlGDmyAT56ysXyORqhaUHKqwVk/NBg93cDRhtu22+3oemrfo5Bq7HY7X+ulPGx7ls/EGpbq/6rfp6PzsVjNG9pHvKF9xA5hOuHGGcO41JjIpXIiw2TfNkVZHY18x0Id6rqOjuRTIfm3ZvC2kKzVzOevn4QLDY2LDI1zpMDdCV1Bptfj0cjndDohk5S/z+e7BLjY7/f/0OfznUszyj+BjM/qmc6UzgATJ07koYce4rzzzmuxrETl394IAYlBqfNztnsWsMP9JiHbYZxGHr2rL8G+4BZ2/OM0dn7oxuaUDL0kwKk31tJzTKRdGRgz1dqKp60ySiQb7LtY6F7GQvdSVjq3ANBX787U4Li0+QmO5TrcYxP100MfWaGk7rhQ0ikhnZJox8wPZXo9HlcpnX0+32+B6wEdcGPO+c/z+/3XtXBaxiv/TCFe+btd9nbHBsdjEGFb9VKWzQ2x55+TiO7qja3XHnrf9BkTrvLSO//kI7JuM/2BgyOXcZ92iHfcy1noXsrHrtWEhU6+kc35wdOYFhyXMj/B8VKHQeBTl60+7cROK5R0aFwo6WlpzEqa6fV4XCn/eI4nyz9T+PCLXdz7t9V87b2B535UfcSKf++XDr6c5WX96x6iYUHvs+o46eaPCV/xZ77OfgdDhPDqvegXuIz+gcso0Ie3uSPI9AcOUiNjjQg06ye4IDieqcHTjthPcDzWoQS2xoWSfu60oQtBniGZFNLrO4NUhpJmej2qfP6KNtOrMMpdl0W56KL9OBztW0GlB2HDG2ZytX0rnTi9BqO+XcepM2opGqwDI6D6acI11ex2/4dtngWszX6KNTlPkBcZVN8R5Eb7p+fmjjGypYdLgmdySfBMIugsda63poeW8V7+U/wMODU8iGnW9NBg/aS0+gkyHQEM1A0G6mF+UBumWsCHLnNx2XtuO294YqGkBlOCESaHdE6OGCqUNA0cVyt8TxS2b9/O6tWrueiii9qs/Cu/trFydhZfvZBFoNxG4cAIY79Ty4grA7haic0LauXsdL/FNs8CSp2fgZB0CZ9irSq+FK/RNEtopltbkF4ZY36C/7iX8o572RH5CU60OjSAtQ6NRVZH8KXDhhSCblGD86y3l00K6eS2U2Vlej0et9M+7UAp/zbSVuUvJez80IzN3/KO6aAedEGQU2fU0mdi+158HqNW28cOzxts9yygzLkSgOLQBPoFLqNv8BLchhnFlOkPHHSsjMn8BAXRHKaEzPUE54RGNfETnOh1eEgTLLZGBYvdZiip3QoljTmNB7YhlDTT61Epf6X820xryj9ULVjzchYrZmVRvtWBpzDK6GvrGH19HbklqcubX2XbznbPArZ7FlDh2ISQNnqEzqZ/4DJO9V5LdVnyZe+ZQmcphbb6CTJdaUHH1aEOrHDa6n0F661ULyfphvmWs6DOGSGdZG72TK9HpfyV8m8zzSn/gxvtfDnLy5pXPETqNHqcGmbMjbUMvSSAPY25eiWSCvsGazHZ69TYd2GTLkqCk+kfuIxewfOxJ30sO5dMUAqJfoJd9lLA9BN8UzuXieUjMtpP0Fl1uEcT9SknPnTZCVihpGfFhZL2skJJM+F3bgml/JXybzMrl+ziuftW03XrDDTpxJVrUDwiwq5PXdhckmGXBRhzYy09RnW85S2RlDm+ZF/hf1iPn4DtAHbDS+/gBfQLXEZJ6By0FLw0PhVkmlKI9xMsdC/lK+dWwPQTmCuMMy/vUCbUYRD4LC6UdIcVSjokEmVySOcKl5dBB8szpNU15ZhR/j6fT/j9/nT0GClR/qlYMt0c6crnD/DOO+/wu9/9DiklUkp+8pOfcPHFFyeVI1H5A+T20jn1hjpOuaaOrMLUJrE7EoqKijhQVkqp8zNzMZnnLcJaBS4jnz6B/6Jf4DKKw6ejdaIiywTF1RKhIsncuoXt8hN0NJlWhxLYZtPM9NRWKGlECHLjQknPC+l0zaCspMeS8v+D3+//8ZFcrBWOWvmn6gUJzZGufP5SSkaMGMG8efMYOnQo69at4/LLL2fDhg1oSd4dl0z5/8+uvWiZYxA2adBRwux1fcB2z+vscv8bXavDEy2mX+BS+gUupygyusOnNjJNcSUSL1+NCPC+60szhDTF6wlSJWMmUiPgq6JC5ofreM9tp9R6ccXocLQ+lPSUTg4lPWbi/NOk+Fvl/pdz69881RyfbXHWZ7yMEQhrXPXnLpw+sHnlP7xXhAenV7UqQzrz+QshqK6uBsw3eXXr1i2p4m+OTFL8ybDh5KTQVE4KTUUXAXa73mG7ZwEbvM+xLvsf5Oh96Bf4Bv0Cl1OgD+1scTOObOnh0uCZXBq3nsCcHlrGe/kr6tcTXBAcz7TguIz2E3Qk2RIukxpnVQaRlbDW3jAqeDzHxe9z3XSNCyU95whCSY9VWlX+Pp/vn36//7vWdwE84/f7v5d2yY6ARMXf2vajIZX5/IUQPPXUU3znO98hKyuL2tpann322ZTLnCnYpYd+wW/QL/gNwqKKne632e5ZwOrsv7Eq5y/kR4ZaHcFl5Eb7dra4GYcDO2eFT+as8Mn8quqmRn6CR3Kf55Hc5+v9BBcEx3NahvkJOgsBjNQNRtaEuaMmTLkmWOyy8a7LwUK3A3+WE7uUjIsLJR2UAVlJ00VbLP/6pZx+v1/6fL4BaZSnWdpimbf0goRXfnwopfKkMp+/ruv89a9/ZebMmYwbN45ly5Zx6623snjxYrxeb0rlzjScMpdBgasYFLiKgFbGTvebbPMs4MvcR/ky91GKwqdaHcE3yDK6d7a4GYdAMEzvw7CaPtxZM73ReoJZ3rf5e/YbGecnyBQKDckVAZ0rAjo68KXTVr/A7Nd5bn4N9LJCSScHdc4K63iOo1FBW5R/mc/n+x7wCXAGkFotmkI66gUJDz74INu3b2fWrFntmpoB0+KfP38+c+fOrU8JvXbtWkpLSxk3bhwA48aNIysri82bNzN69OhWy8wuzNw51/bgMYoYWncjQ+tupMa2hx3uN9jueY1leb9iWe6DdA+fTr/A5fQJXIxbdvz89rFAD6MLN9RdwA11F1At6ljsWslCa5XxK1mLM8JPkInYgXHhKOPCUf5fdYi9muB9653GL3scPOt14paSM0MNoaQndVBW0nTRFuU/AzPn/o+AjcANaZXoKDhrSJhnby1vFO2TKmdvjEceeYRVq1Yxe/bsdr/IZc6cOcyZMwe/39/oRS49evRg3759bNmyhYEDB7J582YOHjxInz7JX0tY2H0HU656lBln/wCH3UFt/jVU8fBR3VemkR0tYWTtLYysvYVK2xa2e15nm+c1Ps3/GZ/l/YKS0CT6BS6nd/ACHDK7s8XNSHJkVpv9BBcExzNI76X8BBY9Dcm1dRGurYsQAj53maOCd9123sv38AtgsBVKOiWoMy4czdhQ0uZoNdrHeg/vdKAr8CRwqt/vX5YGWTI+zj+d+fznzZvHE088gbByLvz0pz/lwgsvTFrWrk1LWL3oPmacvRWHw0XpwE8x7N1ScYspIx1RIBJJuX1t/ariWvsebNJNr+AU+gcupyQ4GTttX82W6ZEq6ZKvtfUE7fETZHodQupljIWSvuey85nLRlgIchJCSdvzMqmMDfX0+XwvAh8AN/j9/jN9Pt8iv99//pFcrBUyXvlnCrs2fcjqRfeayt/uoLbgWuryr0F3j+hs0epJt1KQGBx0fME2zwJ2eN4gaCvDYWTTO3gh/QKX0zM0sdXFZJmuuDpKvn3aIRa6l/GOe1m71xNkeh1CemWstbKSvmeNCvZboaSjwrFRQYRRrYSSZnKoZ1e/3/+Uz+fzHckFFOlDCieB7Ml4K14g+/BMwu5R1OVfQyD3cqTt+O40BRrdIuPoFhnH+KoH2O/8xFpM9n9szXoFV7SQvsFLrMVk4xEqKXCz9DC6MKPuQmbUXdisn2Bi6JR6P0E3o6D1Qk8QvBIuDOpcGNSRlbAuLpT0T9lO/pDjoosVSjrFCiXNk+YrZ8dar5wt6iTZ22L5vwAsAO4EHgWu8Pv916dBFmX5t5GdW1awZuFdfOvS0wmc9FtE9DBZlfPJqngBR2g9hvAQzL2UuvxvE/acxhGl7zxKOssijBJij3sx290L2O1eiK4FyIr2qA8d7RI5BYGgTivlk27/zZkH/kKWkVlTZjE626pO9BPsth8AGvsJzsgfzaGyjI0BATqvHsuF4AO3GUr6vttGhaZhs0JJP3PZsUuJDZhh2LjlYCXFR7DqON3TPm7ge8AwYANmnH/wSC7WCkr5t5Ht27aw9v1fctG3f4fN07Nhh5Q4gl+RVfECnqrX0IxaIs6B5mggbzqGvelL49NFZysugIioZbf7HbZ7XmOPazGGiJCr96Nf4DIqbdvY4XmTIbXXc0ZVZjrLM6EOY0gk6+07699jHPMTDJAlTKkdk9HrCTKhHqPACoeN96xRwVpHQz1pEhxIrqoNc2dNuF2dQLqV/2i/37/S5/N1A24EXvf7/RuO5GKtoJR/G2lLPn9h1OKuehNvxQs4A8uROAjmTKMu/9uEvGeDSO9DmgkPXDwhUcFOz9ts97zGPufHIMx2r0kHlx/4gNxo8siqziTT6jCevVoZ77iX837uShaLFUQsP8H5odO4IDiOc0KjyZJpTCXbDjKxHkt65jbZJqRkQjjKq4fq2lxOuuf8fw9MAR7EdPzOxIz3V2QwUvMSyL+KQP5V2EObyKp4EU/ly3iq30K3l1CXiCs51gAAIABJREFUfzWB/KuIOpq+het4xCXzGVx3DYPrruHDvDvZljUPKaIYIsL8bucwtPZ6BtddR4E+pLNFPSboaRQxo+5C7sq6ju2HdtX7CRa6l/Jy1vvKT9AOnBK0OMu/o2iL8s/y+XwuwOX3+1/0+Xy3pFsoRWrRXYOpKv4lVV3vwV2zkKyKF8kpe5ycsscJec+lLv8agjlTQTg7W9S0U6eVsiPrdaSIf6lNlA3e2azP/hfFoQkMrruOPoGL2xU2eiLT0nqCd/O/ANR6gmQ4pUSjYc6/PeGhqaAtyn8OpsP3l9b8//b0inTk/HV0MbUHm05neLtGuW1l6VGXn86UzosWLeJ3v/sduq6Tn5/PH/7wB3r37n3UMjdCcxHMvZRg7qXYwrvJqpxLVsVLFO75PlFbFwJ5082QUdfA1F43g/gq+49IGj9kAjv9675Jvj6Yjd7ZfFhwO0tz72dg4CqG1F6rXlbfDhLzDq2376x/j3FD3qEeXFC/nmAItgz0E6SbEeEop1nRPsMLu1DWCSmmj6uXufxvSc9m9/1sz95m97WVdKV0rqioYOLEiSxYsIABAwbw6quvMm/ePJ5//vmkchzJC9ybRUZx1X5AVsWLuKsXItAJecabo4HcS5HakeWBycR5VoAFRdM47FzbZHtBeASXlS1EYrDP+TEbvbPZ5f4PUuj0CJ3NkNrr6R2c1qEvosnUOoynPTLG/ATm+wnWdJifINPrMWMXeXUgLSr/RffncmBdyw/e7k+bT7dw0hmhZvd1Gx7h/AdbTxyXyFtvvcVzzz3H3LlzeeKJJ1izZg1/+tOfcDqdBINBbr/9dsaPH8/NN9/c5NxVq1Zx66238vHHH7Ny5Up+/OMf8/777wNmJzNy5EhWr15NYWHT3CspVf5xaPoBsipfIaviBezh7RhaDoHcy6nL/zYRzyntKivTHzhoXcY6rZTNWS+xKet5au178ES7MajuGgbXXUt2NP2+kuOhDpsjfj3Bu+4vqNRq0+YnyPR6zLhFXj6fr8Dv9x8+IolOAFKZ0rl///4cOHCAlStXMnr0aObPnw/Anj17kir/dGHYu1HT5YfUFN6KM/A5WRUvkFX5Mt6K2YRdI62Q0W8ibXkdJlNnkmUUM6rmDk6uuY09rvfZ6J3Nquw/szr7L5SEJjOk9jpKQpM79W1kxyqJfoLPnetZ6F7Kf9xLeTf/C4QUnBoZVJ9uQvkJUk9Lc/6/9fl8BcBmYCHwid/v1ztGrKa0xTJvadrn269kbkrn3NxcnnzySR544AFCoRDnnXceeXl52O3tftdOahCCcNbphLNOp7L4ITxVr5F1+AXyS39B3oGHCOT+l7WAbEKnLCDraDRsnBQ6n5NC51Nj+5pNWS+wOesl3u1yI169hMF132ZQ3TVkGcWdLeoxiQM7E8MnMzHBT7BQ+QnSSlvi/AcBU4EzMYcXnwPz/H7/1ymWJePn/GM8+OCDrF+/nlmzZrU7s+fMmTN58cUXmTt3bqPMnvEcPHiQCRMmsGbNGrKysprsT9e0T2s4AqutBWTz0YxqdGc/6vK/TV3edAx710bHZvpQG45ORoMIu93vsDFrNnvdSxDSTu/gNIbUXk+P8MSUpJM43uuwLaTCT5Dp9Zhx0z4x/H7/Zkzr/29Whs8JQA+gWeVvRQUtAVzWNV7x+/2//P/t3Xl81MX9x/HXd6/skWNDLg7lkqNQbwURgSqCeFC1iIMI1NuWX9UqioBV61HxptoWpfUGBB0RBc8KKvVGBbwAERBQQHJvNsnuZq/v74/dQAghJGE3+yWZ5+PBA0iyu+98A7Mz8535TEsCNocrL7Lf1T6JkoySzgBFRUXk5+cTjUa57777mDhxYoMNfyqFHEdR4bgXb8Ht2L2v4/QsJLPoHjKK7ieQMRKfezw1rlOTvoHMCExY6RY4m26Bs/Gat/CD63k2Ol5gm+NNMsLd6Vs9iV5+gV3Vyz8otfsJ6tcdqt1PYNdtDKk5mjMCA9R+gmZKyg3f+HGPLilllRDCCnwE/FlK+VkjDzP8Dt9klnS+6aab+OKLLwiFQgwbNow77rhj92vUl6qef0PMNZtweV7AUSExR0qJWDrhc48jrcdkSqqMXWc/0T3CMAF+crzFBuc8CtNWYtJtdPePpq9vEvnBAc2eszZ6jxVSl7H+fYLtluLd9wlq9xP0CndBQzP8dWyzq32EEE5ijf9kKeXKRr7U8I2/URip8d9ND2KvXI7Ts5C06tiqpRrX0NiS0fRRYGreKKk1JLNRKLds4AfnfDY5FxEyeXGH+tK3ehI9/WNI05t2w9zojRYYI2Nt3aHa+wTf7D6fIHafQKSNpHdxR8PeJzBs4y+EOFNK+bYQohdwA/CilPKDAz1xfIpoFdALmC2lnHaAh6jGv4kM2fjXYQ7tICf4Oux8Ckt4BxFzNv6ssfENZMYpn9AaDVdY87PFvpQNrnmU2NZgjtrp6T+fvr5J5ISOaXQ0YISG9UCMmDFV+wlaysiN/3Ip5QghxNPAE8A/pJQDmvoCQgg38ApwrZTyu3qfu5rYEZFIKU8IBveua/Hzzz/vPudW2WPbtm18++23TJgwwZCNP4DFYiEcqkHzvIdp1zOYSpei6SGiGYOIdryMaN5YMKd2WshisRAOt94CtkJtDV+bnmC96QVCWjUF0eM4JnoVv4qOw8a+16K187WE0TN6qWa55UuW6P/jbdNneLQq7LqN4fqJ/DY6hHOigykgtfdlDuYa2mw2SGLjv5LYMY63SSmvEkKskFKe2pwXEUL8FaiWUj7UyJepnn8TGb3nD/v2ZkzhUhwVi3B6FmINbiRqcsU3kI0nZD+2XZ05ENQq+dGxmA2ueZRb12ONpnOE/wL6VE+kQ7h/yvM1x6GUsTn3CVKRryUOpufflPVo9wJ/Ax6Mr+Jp7KYtAEKIvHiPHyGEAxhB7CwAJQHMv/yCedYs8ocMwbF4carjNEnUkkN1zh8o7vk+xd1eJZAxGkfFYvK2jiZvy0hcZU+hRdrHnkKbnsGvfJdwbvEyzi5eQtfAmWx0vsjS/JG8mXsemx2LCONPdcw2p3Y/wV3eK/isaA7LimZxY+U4wkS4N3M+p+Zfx9D8a7gr81lW2tYRIXGrBI2oKT3/2nn+Ji+UF0IcDTwHmIm9wUgp5V0HeJjq+TfRTx98wLe33cYlmzdjsdupeOAB/GPGpDrWXprSm9EilTi8S3B6FmILfIWupeHPOBufezxB58mgJffoRSP1Wmu0cjY5F7HBOQ+vdTO2qJuj9N/TteQCsiLGLbRnpGu4P03J2NB9gg6RTEbUnMAZSb5PYOQ5//OITftkAa8RW7Nf1pIXOwDV+DdR3cbfpuuEu3Sh6PPPUx1rL839B20JrMXpeQFnxcuYohWErd3xuS+KbSCzdjRExtago7PL9mm8sNxbRLUQHWsGxwvLnYkZY5XdNuI1rK+5GSs1H++nreEd++e8Z19Nhal6936CUYEBjAwMIC/qTlm+ulplqacQogMwBzgT+C/wqJTyo5a86H6oxr+J6jf+uqbxy/ZEb7g+OC3+Bx3146h8C6dnAWm+T9ExE0g/PbaBLH04aIkreWH0hsuRG2Fl4HF+cM6nyvIz9kguvX0X0cc3gYxIgst9t5DRryEcXMb93Sc4PtRnd92hg71PYNg5fyHEWUKI54BngWVAV2IrdIx58GlEx/W4n4IjPbjm+CGSuH0MZWVlTJo0iaFDhzJixAiuvPJKSkv3rhk0c+ZMunXrxqxZs/Z5/IwZMxgxYgQjR47k7LPP5sMPP9z9ueLiYsaPH8+QIUMYMWIEq1evbnowiwXLDz+0+PsyFJMDf9YYSrstovCID6nKmYzN/xU52y+jYNNAMoruwxzcmuqUrcJFAUdXXcMFRZ8wonQ+ecET+C79MV7OH8yyDpP4Ke0dohh3pU1bUP8+wTtFD3Nj5ThChPe6T3B35nOH3H2Cpkz73Aw8L6XcUe/jfaWUGxKY5aB7/uYfI2T/sRrLjxFMfog6INzTTPkcF5GeB7/B42Dq+QN4vV4yM2Nnd65du5Zx48bx7bffomkaU6ZMoWvXrlx//fV8/vnn3HDDDXz00UdoDayCqdvzt1os6FYrWjhM5XXXUXXNNWCAFUAJ7RHqIdKq3sfleZ60qvfQiFLjPAWf+2L8GWeCqWVzsUbvtTaUr9q0kx9cC9noXIDPvAtnpBN9qifQ23cRrmgnQ2Q0mmRl3GkqiR9o/wWf1LtPMCowkGE1xzTpPoFhp32EEN2AGUA6cAlwmZTyyZa82AE02vhn3u7Duq7xd1XbF2EI730ldAALBAfsf7og1N+M967m19E5mHr+n332GVdddRXffPMNmqbRu3dvVq5cubuE8/Dhw5k1axbHHnvsPo/96ZNP+PaWW5jo9xOYNo2aYcPIuu02HEuXEurXD8+sWYSObl79/URL1n84U+gXnBUSp+cFLKGfiJrc+LIuiG0gs/czRMZEaSxflDDb7e+ywTmPHWkr0DBxeGAkfX2T6FwzLCGF5Q42o1G0RsaDuU9g2MJuwFPANcBjUsqIEGI8kIzG/6DpTjDVq/ysAdEk1EdraT3/Bx98kFdeeYWKigqeeOIJNE2jrKwMXdf3qt3fpUsXdu7c2WDjH+nUiciUKRTVWedf/vjj+M8/n6wZM8gdPZqqP/6RyhtugDa2SS5q7URV7p+pyrkWm+9jnJ6FuDzzSC9/iqD9uNiZA5nnoad4A1mymbDQNTCKroFRVJq3xctML+Qnx9tkhLvRxzeBXr5xOKK5qY7aLmToTs4NnMK5gVP2uU+w3P0lmj6H40N9GBUYwBkN3CcoNJXxSPpLrLJt4J2SfaeMk6Epjb9ZSvm9EKL2763TpainKT1zx8s1ZM3wYare87GoCyr+5sR/QWJry7S0nv/UqVOZOnUqH330Effcc8/ug1sSITBqFDWDBpF5991kzJ6N48038Tz8MMGTTkrYaxiGZiLoGkrQNZSKcBlO72KcngW4d91MZuEd+DPPjW0gc5zQ5s8cyIh044TKGRxbOYWf7G+zwTWPVZkzWZPxIN38Z9PXN4mC4CB1GEorqX8+wTrL1t3TQzMz5zMzcz49wp0YFRjIgJpf8Zl5PXML3kYnSlBrvXs4TWn83xNCzAE6CyEeJXbT15ACI21k3eqHugd0mzUCIxO7PO6uu+5iy5YtPPvss5hMLXsvHDJkCJWVlXz//fccHZ+iKSsr293737FjR+2Qrln0rCwqHnoI/3nn4b75ZnLHjKH6kkvw3nILenrb7A3rlg5Ud7iS6uwrsAbW4PQsxFHxKq6KFwjZ+sRPIBtL1NKB3B/PwFaz5wzf2iscTPs1JT3fSc03kCBm0ugROI8egfPwWDbxg3Mem5wvscW5hKxQL/r6JnGEbyxpeuKWKSqN09D4dbgHv67qwQ1VYvd9gtfsn/Bv11LmpC+JNVcpeF8+YMslpbwb+BdwG7EpoMeSHaql9EyNXevd7NyRvfvXrvVu9MzEXdnaev5PP/10s+r567rOpk2bdv/966+/prS0lK5dY0v2Ro8ezdy5cwH4/PPPCQQCu98UWiI4dCjF775L1RVX4Jw7l7zhw0mLnxHcZmkaIcfxVHR6kMLea/B0egjdnE5W0Z0UbDqB7O1/JGrphF5vrbyOjaDjxBSFTg53uBcDvXcidq1iSPnfsemZfJ71V2THE/jQfT1F1lXoGOb87najczSXS31ngVbn2qdoQLbfG75CiJeACVLKYJ2P9QXmSSkHJiGL4df5H0w9/2g0ytixY/F4PJjNZux2O1OnTmXYsGFA7DCXa6+9lu3bt+NwOLj33nsZMKDh+nnNre1j/fJL3DfdhHXjRnxjx1Jxxx3o+zlFLFGMdCPQEvgeZ8VCnBWLMEU8+3S0dM1OYa9PiVryUxWxQYm+hmWWtWxwzWez42XCpmqyQ/3pWz2JI/xjsOotGxUa6ee8P0bMWGQq55H0l3jR9R5RdIJaaPfnduxsesmWpKz2EUJcAEwGxkopPUKIUcDdwO+llMmo02P4xt8oWlTYraaGjEcfJX32bKJuNxX33ENg9OikZTTifziiNdir3iaz8G4s4V8A0NGocZ1K2eHPgJb6JbJ1JesahrQqfnS8ygbXXMqsa7FEXfT0/46+1ZPICR9piIyJZOSMRaZy5uS9xnOmN4nG5/xT3vgDCCEGAw8S29E7GLhISulpyQs1gWr8m+hgqnpavvsO9003Yfv2W/xnn03F3/5GtCDxB48b+T+cKVRIweZBaHpw9yggYs7D5xb43BcRsfVMdUQg+ddQR6fE+hUbXPPYYl9CxBQgN3gcfasn0SNwLhb9wCvFjPxzrmX0jLm5uawr28gj6S/xpe37Zq32ScoOXyHE3cRKOewApgBrgClCiAMVaFMMLHzkkZS8/jreW27B/u675J92Go4XX4Qkn+hmJFFrAdVZF6Fjoto9idLDniPoOIH00jkUbB5KzraxOCpehmjbrqypoZEXOo4hnlmIwlUMrLiLkFbFx9lTkAUnsDLzdjyWjamO2S7kR7OZ6b261ZZ5QuOrfZbHf38XmN0KWZTWYrFQ9ac/4R81CvfUqWRPmYJjyRIq7r+fyOGHpzpdq6jKvR5n9Eeq8qYQteRTkzECU6gQZ8VLOD0Lyd55HVmmW/FnjaHaPZ6wvXnTIYeaNN1N/+or6Fd9OYW2lWxwzmODax7r05+ioOZk+vom0s1/FmaMdxyn0jJJP8O3GdS0TxMl9DCXaBTn3LlkzpwJuo73llvwXXIJtHAJay2jD7WhkYy6js33aWzJaOUbaHoNQfvR8Q1k56ObM1Obr5UETKVsckg2uOZTadlKWqTD7sJymZHuhsjYFEbPaNjCbkobZzLhu/RSit97j+DAgbhvvZWcMWMw11mW2u5oGkHXYDxd/smu3qvxFPwNTQ/h3jWDgo3H4d55PTbf521+qswezeHI6smMKfqQM0oX0jE4iLXp/2ZxwSm80+FittnfUoXlDmGq8VcAiBx2GGXz51P+979j3biR/DPOIP1f/4JQ6MAPbsN0sxtfh8so7rGM4u5v4s8ai73yLXK3/Y68H0/FVToHU9i4vcpE0DDRuWYYp5U/wYWFn3Oc9yY81h94v8OV/MfamzUZD1Ft2nHgJ1IMpU02/qbCYnIuuApTUWL/UyazpPOcOXMYOnQohx12GMuWpWgTtabhF4Ki998ncPrpZN57L7mjR2P57rvU5DESTSPkOIaKTvdT2HsN5Z1mETVnk1V0NwUbTyR7+9WkVa0A/dAp6dsSzmhHjqm6gbGFn3F66TPk6UfzdfojLCoYxLvZl7E97V2ih1BZ4/asTTb+6Y88iW3lGtL/vm9htYOhaRqTJ0/mww8/ZPny5XTr1o2ZM/cca/DQQw/x9ddf88knn/DRRx8xe/be98lnzJjB8uXLWbZsGQ8++CCTJ0+m9p7LoEGDmDt3LoMGDUpo5paI5udT/sQTlP3nP5gLC8k75xwy7r8fAoFURzME3eTE7x5HafdXKeq5guoOl2PzfUrOzxPI33Qy6cWzMIfadk/YhIXDa87ggvASLij6lKOqrqHYtoblOb/n5fzBfJP+D3ymolTHVBqRuGORkizz9oewrmv8wBLbZ6vR6szDps9dRPrcReiaRnDQ8ft9XKh/H7x33XTADNnZ2btr+QMcf/zxu0syzJ49m82bNzNv3jxsNhsLFizg2muv5Yknnthd0rm2lj/EbmbXrdXfUPXOVAuccw41gweTdeedZPzjH9jffBPPQw8R2s/O4/YonNYbb8HtePOnY698B6dnIRkls8gomUWN61R87vEEMkaCZqzjFxMpI3I4x1dO45jKG/jZ/g4bXPNYnXk/azIeplvgTPpWT6Jj8BRVWM5gDpnGvymCxx+FZdvPmMo8aFEd3aQR7eAm3C3xyxcTWdLZyPTsbDyPPBIrF33zzeT+7ndUX345ldOmobtcqY5nHJqNQOZoApmjMYe2x84j9rxAhx1XEzHn4M+6MHbmQJpxD2M/WGZsdA+MpntgNBXmzfzgep5NzhfZ6nidzHBP+lZP5Ajfhdj1Dgd+MiXp2txSz6zpM3HOXww2KwRDVE+6AO+9MxKZE4BbbrmFXbt28eSTT7aosudHH33EvffeyyuvvILNtqdXOHbsWP7whz8wcuTI/T42oUs9m0GrqiLz3ntxPfss4cMPx/PAAwTjtYnqM/ryOmiFjHqEtOr/4fQsxF75DhphahwDY6OBzNHopsbLlLeFaxgmwDbHG2xwzqMo7QtMeho9/KPpWz2JvNCJrTIaMPp1VEs9E8RUUkb178dS/NpzVP9+LObi0gM/qJlqSzo//vjjCSnpfKjQ09OpuOceShYvBouF3PHjybrxRrSKilRHMybNTE36cMoPe4LC3l9SkX8r5kgJ2b/cQMHG48n6ZTpW/zdtesmoBTtH+C/g7NJXOa9oOX184/nJ/g5v5p3P0ryRfO98lqDmPfATKQnX5hr/8icfwjtzOuFf98E7czrlTz6U0OdPVknnQ0nwpJMoWraMymuuwfnSS+Sfdhr2t99OdSxDi1ryqM6ZTFHPDyjptphAxigcFS+Rt/Us8raMwln2LFokWWWzjCE73I9BFfcgClcz2PMQJt3GZ+6/IAuO5+OsqZRYv0l1xHalzU37JFMySzo//vjjPPnkk5SVleFyuUhLS2PFihUNfv+pmvZpiPWbb3DfeCPWdevw//a3sUJxubmGH2pD6qcDtEgFDu+rOD0LsQW+Rdfs+DPOxue+mKBzELl5eW3+GpZYv2aDcz5bHK8QNvnJCR5DX98kevjPw6on5vzVVP+cD8SwB7i3IsM3/kZhpMYfgFCI9NmzyXj0UXSnk4q77sJ19dWUlCZ+yi2RjNQoWP3f4qxYiKPiFUxRL2FbD+h8JSXWsw13zkBdibqGQc3LZsdiNrjm4bF+jzWawRG+sfT1TSQ7/CtDZEwWNeevHLqsVqquv57i//6XcM+eZF93HZbzz8e0o22vdU+kkOMoKjrOpLD3aso7P0rEUoBl618o2DiA7O1Xklb1bpveQGbTM+nnu5TzipdzVsmrHB4YyQ+uBSzJP503c37HZsdiwqh9JomUlJ6/EOJwYC7QEYgC/5FSPnqAh6mefxMZrudfVySC65lnyLz/fnSTCe9f/oJv4sSDLhSXDIbvETpLqdk6B6dHYo6UELF0xOe+CF/WRURsxqi+msxrGNDK2Ox8iQ2ueXgtW0iLZNPLP46+1RPIjDT9zAXD/5zbWM8/DNwopewHDAL+JITon6TXancMNFW3L7OZ6iuvJLRqFaFjj8U9YwY5QmD+8cdUJzv0OPtSmf8XCnt/QVmXJwil9Se95FHyN59Mh5/GY/cuhWhNqlMmjV3vwK+r/8Dvij5kVMmLdAqewjrXkywuGMp/c8ax1f46Udp37amDkZTGX0r5i5RydfzPlcB6oEtzn0fXdWM3dCmg6zqRyCEw/O/Zk9IXXsDz0ENY164lf+RIXHPmQFhVgWw2zUYg82zKus6jsNdKKnNvxFKzmQ47JlOw6QQyC+/AUrMh1SmTRkOjU3AIp5b/mwsLv+B47zS85q2s6PAHXioYyOqM+6kyb091zENO0m/4CiG6Ax8AR0opvfU+dzVwNYCU8oRgMLjXY6uqqigrK8PpTMxd/7bA4/Gwdu1aysvLmThxIhaLMTdpWywWwrUN/c6dWK69FtPrrxM98UQi//43+pGpPxxlr4wG1Gg+PYLmeQ/TrmcwlS5F00NEMwYR7XgZ0byxYG7ZgewJzZhEUSJs1d7ha/MT/Ki9hY5OT/1MjolcRQ/9TEyYU56xqQ4mX3yDqPFW+wgh0oH/AfdIKQ90KvE+c/4QewMoLCxs8WaqljCZTESj0VZ7vabSdZ2ysjK2bdtGdnY2w4cPT3Wk/dpnHlPXsS9dStZtt2Hyeqm69loqr70WbKmredNW5oJN4VIcFYtwehZiDW4kanLhzzwfn3s8IfuxkMQSIka4hlXmHWx0LuAH50L85kJc4c708U2gt288zmiBITI2ps0t9RRCWIHXgf9KKZtyMGWDjT/Eersff/wx9UcGyeJ0OvH5fK3yWi3hcDgYM2aMoTPu7x+0qayMzL/+FefixYT69sXz8MOEjjsuBQmN0XA1ptn5dB2r/0tcnoXYvUsx6X5Caf3wucfjyxqDbs5OfcYkihLiZ/syNjjns9P+PzTdTNfAKAZY/oSr+Gg0gy5ubFONvxBCA54DyqSU1zfxYftt/Fubkf5B74/RMx4oX9ry5binTcNUVET1VVdROXUqusPRigkP/WvYGC1SicO7JL6B7Ct0LS2+gWw8QefJoCWmITTqNfSat/CDcwEbnS9QYy4jI9ydvtUT6eUX2KM5qY63l7bW+A8BPgS+JbbUE+AWKeWbjTxMNf7NYPSMTcmneb1k3nMPrvnzCXfvjufBBwnWKZmdbG3hGjaFJbA2VmW04mVM0QrC1u7xJaMXErV2NETGZIlQQ1n+R3wZeYzCtM8w6Ta6+8+hr28S+cGBhigz3aYa/xZSjX8zGD1jc/LZPvkE99SpWLZupXrCBLy33oqemfxD0tvSNWySqB9H5Vs4PQtI832KjplA+un43OOpSR8OWvMXDxj9GsKejB7LD2xwzmeT8yVCJi/uUB/6+ibR03cBaXpWyvO1hBHX+StKkwUHD6Z4+XKq/vAHnAsXkn/aaaQtX57qWG2PyYE/awyl3RZReMSHVOVMxub/ipztl1GwaSAZRfdhDm5NdcqkcYf7cJL3LsYVruaU8llYdBcrs26LF5a7kRLrV+gYpjOcdKrxVwxBdzjw3n47JUuXEnW7ybnkEtzXXouprCzV0dqkiK0nlfkzKOz9OWWHPU3IfhTppbMp2HwKOdsEjopXIdo2yylYdAe9/eMYXfI6vy36L0f4x7LFsZTX887h9dyz2OCcT0irTnXMpFONv2IooeOOo/itt6icMgXHa6+R95vfYF8IjvyyAAAVBklEQVSypE3XvE8pzUogYxRlhz9HYa/P8ebdjDn0M9k7/0THjSeQuet2LIH1qU6ZNDnhIxlccT+icDWDPPcS1cJ86p6GLDieT7NmUGZZl+qISaMaf8V4bDYqb7yR4rfeItK1Kx3+7//IvvxyTLt2pTpZmxa1dqIq988UHfExJV0XUpM+DJdnHvlbRpC7ZTTO8ufRIlWpjpkUNj2DX/l+z7nFyzi7eCldA2exySlZmj+SN3LPZZPjJcL4Ux0zoVTjrxhWuF8/SpYsoeK227B/8AH5p52Gc8ECNQpINs1E0DWM8i6PU9hrFRUFd6DpPty7bqZg43Fk7bwJq39Vm/w5aGjkh05gqOcRxK4vGVBxB0Gtgo+yr0d2PJHPM++gwrzpwE90CFCrfRpwKK1gMKpE5zNv2YJ76lTSPv2UmiFD8DzwAJFu3Q7qOdvbNTwouo41sBpn+UIc3iWYdB+htL5oXa6kxHwmUYtxD2U/2Ouoo1No+4wNznlsc7xJVAvRsWYwfasn0jVwFmYObpe6WuqpGv9mMXrGpOSLRnE+/zyZf/sbRCJUTptG9eWXg9l84Me2VsYEMmo+LVKFw7sUp2cBtsAadM1GIONMqt3jCTqHJGwDWaIk8jr6TSVscr7IBud8qiw/YY/k0tt3EX18E8iItOxIVtX4q8a/WYyeMZn5TDt24J4+Hft77xE8/ng8Dz9MuE+fZj9Pe76GiZLn2EXNlsdwel/GFPEQth4e30AmiFo7pzoekJzrqBNlZ9oHbHDO42f7MnSidKk5lb6+SRwWOB0TTd8zodb5K0oTRbt0oWzuXMr/+U/MW7aQN2oU6Y88AiFV27216a4j8Xa8i129VlHW+TEi1m5kFj9IwaaT6PDz77FXvg162/u5aJjoUnMqw8ufYmzhZxxbOYVy63re63A5iwpO4qv0WVSbfkl1zEapxl85NGka/jFjKF6xgsCZZ5L54IPknXUW1m++SXWy9slkJ5B1HqXdXqTwiE+oyrkGa2AtHbZfQcHGAWQUzcQcbJsH+riinTm2agpjC1cyvOxpskP9+CpjFosKTuK97CvYnvY+OsarEqwaf+WQFs3Npfzxxyl7+mlMZWXkjh5NxsyZ4G9by/IOJRFbNyrzp1HYayWlhz1D0HEc6aVzKNg8lJxtY3FUvAzRtvfzMWGha2AUI8vmc0HRJxxZNZki25csz5nIy/mn8E36v/CbjDONpxp/pU0IjBpF0fvv4xOCjNmzyT/jDGwrV6Y6VvumWajJOIPyw5+JbyCbjjn0C9k7r4tvILsVS2BtqlMmRUakKydUzuDCwi/4TdnjpEcOY3XmvbxUcCIrsifzi+0TdHR8pkJesJyOz1TU6hnVDd8GHAo32oyeMZX5bB98gPvmm7H8/DPVl16Kd8YM9PR9T7ZS1/DgNf/MgSg236c4PQtxVL6JptcQtB+Dzz0ef+b56OaM1GdMEo9lEz/EC8sFTR4yQ0eQFs2m2LaKvtW/52TvzGY/p7rhqyh1BIcNo/i996i64gqczz1H3vDhpK1YkepYCsQ3kJ2Cp8u/2NV7FRUFd6PpQdy7plOw8TjcO2/A5vuiTW4gc4d7MdB7B2LXlwwpfwSL7qI47UvQdDa5Xmz13r9q/JU2SXc68d51FyWvvILucJAzYQLu669HKy/HsXgx+QMHYrXbyR84EMfiA50wqiSDbs6musPlFPdYRnH3N/BnjcFe+Qa5284n78dTcZXOwRQuTXXMhLPgoJf/QvKCx6LpsSWhOlG+Tn+kVXOoaZ8GGGWY2BijZzRUvkCAjEcfJf2xx9AdDrSaGrQ6R4JGHQ4qHngA/5gxKQy5L0Ndw/1IdEYtWo3d+zouzwJs/i/RsRLIOAOf+2JqXENBa/6GPiNeR5+pkJcLTiai1ez+mFm3c0Hhpzij+U1+HjXtoyiNsdupnDaN4jfeQAsE9mr4AUx+Pxn33ZeicEpdusmF3z2Oku5LKOr5PtUdLsPm+4ScnyeQv+lk0otnYQ7tSHXMg/Z1+iP7nB3Q2r1/1fgr7Ub4yCMhHG7wc+YdO0j/xz9IW7YM044dbXLO+VATTuuDt+CvFPZaRVmXxwmnHUFmycPkbzqJDj9NxO59A/TggZ/IgIpsq4hqe2ePakGKbF+2Wobmn9umKIewSOfOWHY00HM0m8m8//7df4263YT69SPUrx/h/v1jf+7bF1r5kHkFMKURyDyXQOa5mIM/4ax4EafnBTrsuJqIOQd/1oX43OMJp/VKddImO6/knd1/TtW0lGr8lXalcvp0sm6+GVOdTWC1c/6BkSOxfv89lrVrsa5fj3XdOpwvvIDJ5wNAN5kI9+xJuF8/QrVvCP37E+3cGbTUHwTeHkRsXanMm0pl7hTSqlfg9CzEVfYk6WVzqHEMxOceTyDzt+gm9SZ9IKrxV9qV2pu6Gffdh3nnTiKdO1M5ffrujwcHDCA4YMCeB0SjmLdt2/1mYFm3DuvXX+N47bU9X6JGCa1PM1OTfjo16adjChfhrFiE07OA7F9uIFp4O/7M8/G5LybkODrVSQ1LrfZpgBFXB9Rn9IxGzwcHl1GrrNxnlGD5/vuEjhLa+jVMOF3H5l+J07MAh/cNND1AMO1ITIddRbF5JLo5K9UJG5Sqqp6q568oLaBnZOx/lLBuHdb16xsfJfTvv+eNoU8fNUpIBE0j6BxE0DmIioK7cVS8gsuzAMvmP9NRs+PPPAef+2KCjpPUNB2q8VeUxDGZiPToQaRHDwLnnLP7ww2NEpwLFx5wlEBOTqq+k0Oebs7C1+FSfB0uJc/+M8Etj+HwvoKz4mXCth743Bfjy7qQqCUv1VFTRjX+ipJkLR0l6NnZ5PzqV2qUcJD09OOo6HQv3oLbsXtfw+lZSGbRPWQU3U8gYyQ+93hqXKe2aAPZoUw1/oqSCk0YJWRs2YK2enWTRglqxdGB6SYHfrfA7xZYajbFistVvISj8i0ilk743OPwZV1ExHZ4qqO2iqQ0/kKIp4HRQJGU8shkvIaitEV1RwnO2huB6l5CwoXTeuEtuA1v/jTslctwehaSXvIo6SWPUuMaGlsymj4KTGmpjpo0yer5Pwv8C5ibpOdXlPZjf6MErxfrhg2xewnxN4ZGRwnxkYIaJdSh2QhknkMg8xzMoR04PLUbyCYTMWfjzxqLz30x4bTmnxFtdElp/KWUHwghuifjuRVFidEzM9WKowSKWLtQlTeFqtw/k1b9IU7PAlxlz5Je9gRBxwlUuycQyByNbnKlOmpCJG2df7zxf72xaR8hxNXA1QBSyhOCQWPU6bBYLIT3UwPGKIye0ej5wPgZE5qvogJt7Vq0b76J/fruu9iv6mogNkqgd2+iRx2FfvTR6EcdhX7UUXDYYY2OEox+DeEgMwaLMBU9j3nXM2j+DejmDKJ5gmjHy9HTT0jICOpg8tlsNmjhOv+UNv71qE1ezWD0jEbPB8bPmPR8DY0S1q/H8tNPe77kAKMEo19DSFBGXcfm/xKnZwF271JMeoBQWr/4ktHfoZuzU5JPbfJSFKX5EnAvwTRgAGndurX9ewmaRtA5gKBzAFrBnTi8S3B6FpBVeBuZRX/Dn3E2Pvd4gs6TQTs0iiWrxl9RlL00+V7CV19hee01areitZd7Cbo5E1/2JHzZk7AEvsPlWYijYjFO7yuErd3xuS/ClyWIWgtSHbVRSZn2EUIsBE4FcoFC4K9SyqcO8DA17dMMRs9o9Hxg/IxGzweQa7Xi/fjj2JtB7RtDQzWO6uxJaO1RQqtcx6gfR+WbOD0LSfN9io6ZQPrpsQ1k6cNB238/u01N+0gpxyfjeRVFMZisLIIDBxIcOHDPxxoaJaxZg2Pp0j1f0tZGCSYH/qwL8GddgLlmc/zMAYmj6h0ilgJ8WQKf+yIitu6pTrqbmvZRFCWxGruX8P33e40SGryXkMJRQiJE0o6gMv8WKvOmYq96F6dnAemls8ko/Sc1zlOodk8gvfSf2GrW735M5/jvwbRfU9LznYafOMFU468oSqvQMzMPepSwe6RwKIwSNCuBjDMJZJyJKbQTZ4WMbSDb+X/o2NAxoRHd/eU6NoKOE1stnmr8FUVJnXYySohaO1OVez1VOddh832Eq+wZ7FX1eviaiaq861stk2r8FUUxnP2OErZu3XN4zvr1TRolMHhwCr6D/dBMBF3DCLqGkbXjBpzel9GIoGOjOmscUUt+q0VRjb+iKIcGk4lIz55EevZs9ighz4CjhMr86Tgrl4AeafVeP6jGX1GUQ9yBRglZW7cSXrXKcPcSotYCqrPG4fLMb/VeP6jGX1GUtqjOKCE9N5fy+Dr6Ft1L6N+faKdOSRklVOVejzP6Y6v3+kE1/oqitCOJvJeQiFFC1FpA+Jh3iaZgM59q/BVFad+aei9h3TqcCxZg8vuBeqOEuqeqJWmUkGiq8VcURWlAskcJjsWLybjvPsw7d5LfuTOV06fjHzOmtb491fgriqI0WYJGCVp5Oa7nnsMUCABg2bGDrJtvBmi1NwDV+CuKohyklo4S6jL5/WTcd59q/BVFUQ5pjYwSOvbvj9ZARWVzK1Y2PjROHVAURWkj9MxMIp07N/i5/X08GVTjryiK0soqp08nWu8GcNThoHL69FbLoKZ9FEVRWlntvH7tap+IWu2jKIrSPvjHjME/ZkzKTmxT0z6KoijtkGr8FUVR2iHV+CuKorRDqvFXFEVph1TjryiK0g6pxl9RFKUdUo2/oihKO6Qaf0VRlHZINf6KoijtUNJ2+AohzgQeBczAk1LK+5L1WoqiKErzJKXnL4QwA7OBs4D+wHghRP9kvJaiKIrSfMma9hkIbJJS/iilDAIvAOcl6bUURVGUZkrWtE8X4Oc6f98OnFT/i4QQVwNXA0gp6dyKtawPxEhZ9sfoGY2eD4yf0ej5QGVMhFTkS1bPv6Gj6/c5tkZK+R8p5YlSyhPjjzHELyHEqlRnONQzGj3foZDR6PlURsPka5FkNf7bgcPr/P0woPXOJ1MURVEalaxpny+A3kKIHsAO4CLg4iS9lqIoitJMSen5SynDwDXAf4H1sQ/Jtcl4rST5T6oDNIHRMxo9Hxg/o9HzgcqYCCnJp+kNnCCvKIqitG1qh6+iKEo7pBp/RVGUdkgd4F6HEOJwYC7QEYgC/5FSPpraVPuK76D+EtghpRyd6jz1CSHcwJPAkcSW+F4upfw0tan2EELcAFxJLNu3wGVSykCKMz0NjAaKpJRHxj/WAXgR6A5sBYSUstxgGR8EfgsEgc3ErqXHKPnqfO4m4EEgT0rZ+qel78nRYEYhxLXE7pOGgTeklDcnO4vq+e8tDNwopewHDAL+ZNCyFH8mdiPdqB4F3pZS/go4BgNlFUJ0Aa4DToz/5zMTW42Was8CZ9b72HTgXSllb+Dd+N9T6Vn2zbgMOFJKeTTwAzCjtUPV8Sz75qvt1I0EfmrtQA14lnoZhRCnEauAcLSU8tfAQ60RRDX+dUgpf5FSro7/uZJYo9Ultan2JoQ4DDiHWM/acIQQmcAw4CkAKWUwVT3BRlgAhxDCAjgxwB4UKeUHQFm9D58HPBf/83PA+a0aqp6GMkop34mv7gP4jNienpTYzzUE+DtwMw1sNG1t+8k4GbhPSlkT/5qi1siiGv/9EEJ0B44DVqY4Sn2PEPuHHE11kP3oCRQDzwgh1gghnhRCuFIdqpaUcgexntVPwC9AhZTyndSm2q8CKeUvEOuYAPkpznMglwNvpTpEXUKIc4lNj36d6iyN6AMMFUKsFEL8TwgxoDVeVDX+DRBCpAMvA9dLKb2pzlNLCFE7V7gq1VkaYQGOBx6XUh4HVJP66YrdhBDZxHrUPYDOgEsIMTG1qQ59Qoi/EJs2fT7VWWoJIZzAX4DbU53lACxANrGp5qmAFEK0uGxDU6nGvx4hhJVYw/+8lHJxqvPUcwpwrhBiK7FKqcOFEPNTG2kf24HtUsraEdMiYm8GRjEC2CKlLJZShoDFwOAUZ9qfQiFEJ4D4760yHdBcQohLiN3EnCClTPnUSh1HEHuT/zr+f+YwYLUQomNKU+1rO7BYSqlLKT8nNqrPTfaLqtU+dcTfbZ8C1kspZ6U6T31SyhnEb6gJIU4FbpJSGqrXKqXcJYT4WQjRV0q5ATgdWJfqXHX8BAyK9wr9xPJ9mdpI+7UUuAS4L/77ktTG2Vf80KZpwG+klL5U56lLSvktdabK4m8AJ6Zytc9+vAoMB1YIIfoANiDpGdUO3zqEEEOAD4kt/6udU79FSvlm6lI1rE7jb8SlnscSuyFtA34ktvwvZUsU6xNC3AmMIzZNsQa4svZmWwozLQROJdbjKwT+SqxRkEBXYm9aF0opG7qhmcqMM4A0oDT+ZZ9JKf9olHxSyqfqfH4rKW7893MN5wFPA8cSWzJ7k5TyvWRnUY2/oihKO6Tm/BVFUdoh1fgriqK0Q6rxVxRFaYdU468oitIOqcZfURSlHVLr/JU2RQjxG2LL50xABLhNSvmJEKICWA1YiZUh6AyMkFLeGn/cHcAKKeWKOs/lJFZOo0/8cf+RUj5HC8WrnQ434OZBpR1SPX+lzRBC5AJ3AudLKU8lVgjNH//0t1LK04AbidVGaoq/Av+LP9cQYMtBRnQDYw7yORQlIVTPX2lLzgbm19ZjildmXVPva76i6ZUnB0spp8WfSwc+ABBC/IPYhhwvMIFYAcARUspbhRCXxh+7gtjGnTJiJQbOA64GRgohVhDbsFXc/G9RURJDNf5KW9KZ2O5shBAXA/9HbMfpTXW+ZhiwoaUvEK+46JJSDosXhPsj+6/8mk2sltB44AJiB3V3NVpJDqV9UtM+SlvyC7E3AKSUC4CJ7CmQdZQQ4n1ibwj3AQFiZQlq2dkzRdSYI4jdO4BYTaBe7F0nvm41xnVSyiiwg9iUj6IYhur5K23Jm8AiIYSUUlaw97/v2jl/AIQQQeA4IURtB+h44IF6z/eJEGKClPL5eNG/U4jVKjoj/vkTiR1dWAF0in/sKOCb+J/rvymEiJ0cpigpp3r+SpsRn0O/E1gihHgPeIzYmcwNfW0psdLdHxAr5reogaJpdwK/ic/RfwwcES+56xdCfAhcDMwh1th3FkK8CeQ1EnEX0EEIsSh+Pq+ipIwq7KYoitIOqZ6/oihKO6Qaf0VRlHZINf6KoijtkGr8FUVR2iHV+CuKorRDqvFXFEVph1TjryiK0g79P/5pHXSXZbd8AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEkCAYAAAAhJPoXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOy9eXxURbr//67Ta1aSkD0RkB1UUBHXcVd0xm1k9KijMy7jNqNevTpfBncHFR1/6tXrqDMu44LrUfG6jgLjLuqIjuAGAgICISGBkLWTXk79/jinO92drRO6kybU+/XKK93n1Kl6+iyfqnqq6jlCSolCoVAohi7aYBugUCgUitSihF6hUCiGOEroFQqFYoijhF6hUCiGOEroFQqFYoijhF6hUCiGOGkn9EKIm4QQqwbbjsFECLFWCHHdYNvRHfH2CSHeE0I8MkBlD1hZOxNCiMeFEIuSkM9hQggphKhMhl2K3rHP91k9pXEOlDGDiRAiD7gJOBoYBTQBHwPXSimXD55lQ4aZQDBdy7IrhrFSysNSYlEKsBs7T0kpbxqgIi8nDRt+iuSws1zYMmBX4AZgb+B4IBt4RwiRP5iGDQWklFullI1DrayuEEK4B6vsVCKlbJBS1g+2HYoUIaUctD/AAzwINAD19ufbgFVRaTRgLlALNAPPAVcAQXu/AN4APgdcUccswmq1O7spezgggRN6sO8j4Jao73+2jzkqatv7wB1R34+2y/UBG4HHgOFR+/cG/glstn/P58CxceWuBa6L+n6UfY6uSuCcuoC7gQ1AO7AJeC4uzWnAF0AbsMW2Jz/K/veArXaZ7wP79mLfe8Aj8d+B64FqO6/HgaxEr2sPv69PZWH15GTc3zn2vmzgXvs6tQL/AWZG5T3KTn8m8CbQAtyZ4Dk+HfjKPsdr7fRZcWkuAb6z89gMvBj1m+JtHtXN+Xgc616/EFgHNAKvAEVx6c6OKmsDcAtRz0Y4n37ke5mdXyvwNvBb297KqDTTgAX2da4F5gMjt+f57eKeeNT+TZuBbcCtdj43ADV2ubfGHfdr4DOs+7zOtmN8F9dfB16zf+OPwG/i8rncvtbNWPfgc0BZXJqjgK/t+2EZcKid91lRaUrs815Lh9fhkLh8DrePD+dzeHw+XZ6j3k5iKv+A/7EvzEnARKyHqJFYob/SPoG/AcbZ37cSJQhAEdbDeqf9/Vo7zYgeyh5tn6ADekgzB/gk6vuHtr232d8zsB6cY+3vR9g3w2W2rdOBd4EPAGGnOQzroZsMjLdvTn/cDbYWW0ixRKYZ+HWC5/RKrAfvMGCEbcMVUfvPBQJYwjgZmGLfqIX2/pOBU23bdsMS0a3EVlYR+6IetHjx3WZf34nAsfb3P/fluvbwUCdcFpaYPw0sBkrtvwwsgXnXPv5n9v1woX0tjox70DcAZ9lpdk3gHJ+D1XD5jX3MIVgP5byoNH+2f/+l9rneO+qaFwBrsJ6HsM2Obs7H41hC9SywO3AgljA/EZXmOCAEXG2XdZpt381x+cQLfW/5noTlRrvSzvd3WKIaEXqse6zZ/r0TgT2AF4AfAG9/n98u7okG4C+2HefZNrwJ3GFvO9ve9vO4Z+F4YAywF/AqsBJwx13/H7HEfixwu/2bx0XlczmWkO8KHIB1r70ftb8CSxcesc/HkVgNrYhAY92T3wEvAfvYZV2LpS+T7DTlWI2Nx+x8jsa6r9JX6IEsrFrpgrjtS4gV+o3RN6S97TniBAGrZgsCN2IJ2cweynYAbwH/BrQe0h1m55kLZNon/Srg3/b+o7GEIdx6fA+4PS6PEfaF2LOHcpZijRfECCnwR/sGProP5/Ve4B3siqWL/T8Bf+1DfhqWKJwZb1/cgxYvvsvi8vkbsZVmQte1m4e6r2U9ArzXxbVtA4bFbf8H8H/251H2tbu+j+d4LXBx3LZD7Lzy7XvfB/yxh9+5CrgpgevzOFYL0BO1bTawKer7h4ARd9zltg3uqHzihb63fD8Cno7L905ihf5xOvd2PFjC98uobQk/v93cE1/FbfsW+LqL5+zOHvIpsG0/KO76XxmVxolVcV3UQz572cdV2N9vte8JR1SaY4kV+nOwGg/OuLzeAe6xP9+CVdlG98SOJwGhH0wf/RisC744bvtH4Q9CiFysWuzTuDSfxGcmpXwXuAurq/6IlHJ+V4UKIRzAk1i1/EwppdmDjZ9gCfkhwMFYJ/lJYC8hxDCsFvy/pZQtdvrpwBVCiObwH1YtDVarFSFEkRDiASHEciHENjvNbsDIuLIvxLqwR0gpF/ZgYzyPYbWaVgkh/iaE+FXYryyEKAZ2wepGd4kQYlchxDwhxCohRCNWD2tYF/b1xldx3zdidU37dF23t6wemA64gY1x1+ss7GsVxb/jvvd0jouwztXdcfn+0z52LNb19tLDdegj30sp26O+x//+3bB6ldG8b9swZjvynUwPz6/NdODkuHOxxS47cp4TfX57YGnc92qs1m78tuLwFyHEnkKIl4UQa4QQTViNIOh8r0fuLyllEKvXEjkP9kyjt4UQ6+18wucgnM9k4HMpZSgqz/h7fTpWz21b3Lk6mI7zNBlLb6InI8Sf7y4ZzFk3wv4vtzONldAS8IOwuqhjhRBC2lVeVBo3Vld0KnCYlHJDT3lKKduFEIuxulp+4B0pZa0QYjlWi/AILL9kGA2r+zivi+yq7f+PY7XyZ2F1z31YLdn4Qb5PsFo5vxNCfBn/W3qw+SshxK5YvY3DsVqfNwsh9o9O1kMWr2P5Ky8B1mP97o+6sK83/PGm0TH4n/B1TUJZ3aFh9ZamJ5BfS/SXXs5xuNzLsVxD8WzAcpeF7UwGXf1+0cW2aBK5Bv3JNx4N63m4vYt9WyLGJPD89kKgC7u62qbZ5WViVbQfYbl6ws/nt3S+17u9v4QQI7BcRPOwXL11QCXWGIM77pj4PKLRgO+xXKfxtNr/RQL5dMlgtuhXYZ3Ag+K2Hxj+IKVsAKqw/F7R7E9nbsJqpR+ENfjzp+id9oV9FatWPERK+VN8Bt3wDpagHwH8K2rbyXY570SlXQLsJqVc1cVfs53mEOABKeWrUsqvsQbyRndR7tdYIjITeEgIEf+AdYuUsllK+bKU8r+w/H2TgEOllJuxhOaYro4TQgzHOj+3SynfllJ+h+XeKO4qfX/p43VNBn4sd100S4A8LD9x/LXq9d7o4RzXYFWQE7q5D9qwenltdHMderC5v3yLNfgXzSFYjYwftyPf7+j8/MZ/X4JVsa3u4lxEz/K5iR6e3xQwCWts4Fop5btSyu+x3GoJP2c207H861dIKT+WUq6gc2/yO2C6XZmFib/3l2DpQGMX56nKTvMtsF9cPj9LxMhBE3rb3fE34BYhxIlCiAlCiDuwBmyiuQvLHXKmEGKcEOIKYAZRNZkQ4lCsgaazpZSfARcAc8KtWCFEDlbLewLWQJQphCi1/zJ6MfUdrG76nnS00N7BGiQNENsFuwE4SQjxP3a3cIwQ4lghxKNR5awAzhRC7CGE2BOrh9HlAy2lDD+gvwAeE0L0er2EEP/PPle72a3O87BaST/YSf4MXCSEuF4IMclOd6kQohDLF18LXCCEGC+EOMC2z9dbuf2g1+uaRNYAE+3fWiiE8GBdw0XAfCHEyUKI0UKIaUKIy4QQF/SUWQLn+Frgv4QQ1wkhdrfv7V8KIf4OViVh//6bhBCX2Od6qhDi6jibDxJCjLBt3p5n9TbgV0KI2XZZOpaw3iWljG+t9oW7gNOEEJfb1/BcrAHoaOZiiepTQoh9bdfg4UKIe4UQo6H35zdFrMMac7vMfk6PxOqZ9fX+W2kfc5X9236JpQPRPIAl/g/az9zhWH57osp7GuuavyGEmCGEGCWE2E8IcbWdJ1izEouwGn6TbJtvJRESHfBIxR9WTfh3rC50A/AQXU+vvA2rSxSehncN0CQ7BlDWY9200Xn/3T5xw7DcLLKbv3N6sdFh27Y0alse1sDRoi7SH4wlIE1YXf7vgXuwB1CwKo3FWOK5FviDnf7xqDzWEjvYORbLf/g03cy+iEp7EdaIfiMd0zdPiktzJpZPsx2r+/wGkGfvO9Te14ZVKf2KuIHBLux7jy6mPMaVeR2wNtHr2sPv609ZBVjd64boa451/91u3yd+rO77W1jjItAxGPezfpzjX2I1AlrtdF8BN0TtF1junRV22TXAC1H797HL8JHA9Mq4bWcBMm7b2Vj3oh/L134rCUyvTCDfy+38fFj38dl0nl65B9bUzHo73SqsZ72ABJ7fBHSkq3sg5pmyt72FtQgt/P0ULKFuw5paeyjWcx2+P7q7/vHPQ9jN6cNyBYUHWg+LSnMU8A3WM7csKs2votIMxxLzjVHX6WVgr6g0R2L19tvt/I4ggcHY8JS/HQohxD+AqVLKaYNtiyJ5qOuq2FkQQhyCNSA+RVou3JSS9iEQhBDlWP7wd7G6xydgLcq4dDDtUmwf6roqdiaEEL/H6ilXYY2D/Q/w2UCIPJD+LXohRAnwPNaAjher23SflPLhQTVskBBCfEv3Ux2fklJePJD29Bd1XRWJIIQ4E8uN0x2TZeITKwYNIcTtWCtxS7BchAuBP0kpt/R4YLLKT3ehV8QihBiJtQS/KxqlNbNGoRgS2BMpeloTsVbGzitXdIESeoVCoRji7CzRKxUKhWKnRQm9QqFQDHGU0CsUCsUQRwm9QqFQDHHSah69ruv/wAq7udkwjN17STsCeAJrlaoDmG0Yxpupt1KhUCh2LNKtRf841tLgRLgOMAzD2AvrbT4PpMoohUKh2JFJqxa9YRgf6Lo+KnqbrutjgPuxgvm0AhcYhrEcK75Drp1sGNaKM4VCoVDEkW4t+q54CLjMMIxpWG9cCrfcbwLO0nV9A1bAqssGxzyFQqFIb9Ja6HVdz8aKT/+CrutfYS2FLrN3nwE8bhhGJVYY33m6rqf171EoFIrBIK1cN12gAdsMw9izi32/w/bnG4bxia7rXqAQ6+XdCoVCobBJ6xawYRiNwBpd108F0HVd6Lo+1d79E1ZsZnRdn4QVGKt2UAxVKBSKNCatYt3ouv4s1ktCCrFexHAj1puAHsRy2biA5wzDmKPr+mTgYSAba2B2lmEYyXrZskKhUAwZ0kroFQqFQpF80tp1o1AoFIrtJ50GY1XXQqFQKPqO6C1BOgk9VVXpseapsLCQurq6wTajR9LdxnS3D9LfxnS3D5SNyWB77CsvL08oXcqEXtf1POARYHes1vp5hmF8kqryFAqFQtE1qfTR3wu8ZRjGRGAq8H2yC8iYP5/iffelrLKS4n33JWP+/GQXoVAoFDs8KWnR67qeCxwCnANgGIYf8CezjIz58xk2axaazweAc+NGhs2aBYBv5sxkFqVQKBQ7NCmZXqnr+p5YMWq+w2rNfwFcbhhGS1y6C4ELAQzDmOb3J14XuMaNQ/zU+eXvcsQIAitX9t94wOl0Egym9/uG093GdLcP0t/GdLcPlI3JYHvsc7vdkMBgbKqEfh/gU+AgwzA+03X9XqDRMIzrezhMxg/GSilpaWmhKxvdn3Tv7vcfcEC/7A6TrjeGlBKv14vb7R7SA0wDRbrbmO72gbIxGSRhMHbQZt1sADYYhvGZ/f1FYHZfM2lpacHj8eByuTrvnDGj2+M8fS1oB0FKSVNTE01NTRQWFg62OQqFYgchJYOxhmFUA+t1XZ9gbzoSy43TJ6SUXYv8TooQgtzcXOrr61m1atVgm6NQKHYQUjnr5jLgaV3XlwF7AnNTWNZOhcPhYMmSJYNthkKh2EFI2Tx6wzC+AvZJVf47O+k4hqBQKNKTIRfrJpVz67du3cpvfvMbDj74YI466ijOP/98tmzZEpNm7ty5jBw5krvvvjtmu2maXHDBBZFjTz/9dNauXRvZf95553HUUUcxY8YMTj75ZL755puk2a1QKHZuhpTQh+fWOzduREgZmVufLLEXQvD73/+eDz/8kEWLFjFy5Ejmzu3wSN15550sXbqUxYsX89FHH3H//ffHHH/qqafy/vvvs2jRIo455hhm2fP+Ae655x4WLVrEggULuPjii7nqqquSYrNCoVCkVaybnsi94QZc3/U8nuv+4gtE3Fx8zecj76qryHzmmW6PC0yeTOOcOb3akJ+fz4EHHhj5vvfee/Pkk08CcP/997N69WrmzZuH2+3mmWee4bLLLuPhhx/mggsuQNM0ZkTNFJo2bRqPPPJIx+/LzY18bmxsRNOGVB2sUCgGkR1G6BOiuwVXfViIlSimafLkk09GxPuSSy6J2e/1enn44Ye7Pf6xxx7j6KOPjtn2xz/+kffffx8pJU8//XTSbVYoFDsnO4zQJ9LiLt53X5wbN3baHqqoYMuLLybVnuuuu46srCzOPffcPh/74IMPsnLlSl544YWY7XfeeScAL774Irfccgvz5s1Liq0KhWLnZkj5B5pmz8bMyIjZZmZk0DS7z2u1emTOnDmsWbOGBx98sM8ulscee4yXX36ZefPmkRFna5hTTjmFxYsXs3Xr1mSYq1AodnKGlND7Zs6k4Y47CFZUIIUgWFFBwx13JDXI2e23386yZcv4xz/+gcfTtzW4Tz31FE899RTPPvss+fn5ke0tLS1sjOqJLFiwgLy8vJg0CoVC0V92GNdNovhmzkxZ9MoVK1Zw3333MXr0aE488UQARowYwaOPPtrrsc3NzcyePZvKykpOP/10ADweD6+//jqtra1cdNFF+Hw+NE0jLy+Pxx9/HCF6DWGhUCgUvTLkhD6VTJgwIabl3Reys7PZsGFDl/uKiop4/fXXt8c0hUKh6JYh5bpRKBQKRWeU0CsUCsUQRwm9QqFQDHGU0CsUCsUQRwm9QqFQDHGU0CsUCsUQRwm9QqFQDHGGlNC/UjiDx8srOv29Utj9+2X7Qirj0Ye5++67qaioYPny5UmxWaFQKIaU0Bf7p6FJd8w2Tbop9ifnRVepjEcP8PXXX/Pll19SUVGRFHsVCoUCdqCVsZ/l3sBWV8/x6EP4MQnEbDMJssX1Df8cfkq3xxUEJrNf4+DGo29vb+eaa67h/vvv59RTT+3VFoVCoUiUHUboE8GBm4xQMT7HZhASpCAjVIQDd+8H95Fkx6O/8847+dWvfsWIESOSbqtCodi52WGEPpEWN0CrVsNLJQcQoh0HHk6oe4tMszjp9iQzHv2SJUv46quvuOaaa5JtpkKhUAwtHz1AplnC2JbTQArGtpyWEpFPdjz6Tz/9lNWrV7P//vuz3377sWnTJs4880zef//9pNuuUCh2PnaYFn1fmNp8BdtcPzC1+Yqk5x2ORz9v3rx+x6M3DCMm1vyll17KpZdeGvm+33778cQTTzBx4sSk2a1QKHZehqTQZ5ol/HzLS0nPN1Xx6BUKhSKVDEmhTxWpikcfz2effdavMhQKhaIrhpyPXqFQKBSxKKFXKBSKIU7KXDe6rq8FmoAQEDQMIznLUxUKhULRJ1Ltoz/cMIy6FJehUCgUih5QrhuFQqEY4qSyRS+BBbquS+DvhmE8FJ9A1/ULgQsBDMOgsLAwZr/P50uheTsumqahaVqn85VOOJ3OtLYP0t/GdLcPlI3JYCDsS6XQH2QYRpWu68XAQl3XlxuG8UF0Alv8wxWArKuL9fIEg8F+FVyjCf6Qn8GD9T6KTdmvPLpi69atXH755axduxaPx8OoUaP4y1/+wvDhwyNp5s6dy9///ncuv/xyrrzyysh20zS56KKLWL58OR6Ph8LCQm6//XZGjRoFWIukPB5PZBHWtddey2GHHdalHaZpYpom8ecrnSgsLExr+yD9bUx3+0DZmAy2x77y8vKE0qXMdWMYRpX9fzPwMrBvqsqK555sN5+5HdyTndxgZqkOU/zQQw+xcOFCFi5c2K3IKxSKHZ/CGWeQe/VtaDW1A1JeSlr0uq5nAZphGE325xlAYlHJuuGGXA/fuRy9pvMDX7odSCGYl+XmG1fvsSsnB0LMaWzvNe9UhilWKBQ7D+5vf8C1cg1Zz7+GefapaBefhVlSlLLyUuW6KQFe1nU9XMYzhmG8laKyYtjg6OikSPv76JCZ9HKSHaYYiMS7mT59OrNnz2bYsGFJtloRpkYTnOYMcJ8mkureUyi6Q7T60DZtxrGpxvrut96doT3yDCVPvEDLaSfQfMX5KRH8lAi9YRg/AlOTmWciLe4aTXBASTZSCACkEDQ44IG65PrqIblhigHmz59PRUUF7e3t3HjjjVx33XXcd999yTRZEcU92W4WC8k92W7mJnBvKRTdIiWioQnHphoc1bXW/02b0ao347CF3bFpM1pDU5eHC9OE9nay5r2E64cf2fJS9w3E/jKkYt3ck+0mXs5Ne3syH+ZwmOLHH3+832GKn3/++UiYYiDy+kCPx8PZZ5/drwpE0TvtwGKPg2ey3JgCnslyMyEYosSEDCnJkF3/90g1F3mnxDTRttRHBNtqkUcJeHUt2qYaNF9bzGFSCMzi4YRKiwmO2oX2A/bBLCsmVFpMqKyYwlMv6kjrdoMmaDntRJqvOD8lP2NICf0Xbid+uzUfxi8ES9xOrEd8+0lFmOLW1laCwSC5ublIKXnllVfYbbfdkmLvzkwAWOHUWOZ2sNTlYJnLwfcujUDUPRIQgmvyMhPKzysl3u4qA7Or7bHbuj02Kq0bEL1aokgKgQCOzXVomzpa4dHi7di0GUdNLSIQO/tPOh0RwQ7sNp7QUT8jVFZCyBZys7yYUHEhuFw9Fi/dLtA0zLN1ai8+C7M4dVMsh5TQL6hrSWn+qQpTXFtbywUXXIBpmoRCIcaNGxczm0fRO0FgpVNjmcvBUrcl6t+5NNptUc81JVMCIc5s8fN0ljtG7D1S8viWVjIk+AS0CYFPgC/8XxMdn7v43ypgi1OLfA8f3y76LtlalPhnaX7cRVndVgo9VxxdVzbh/0Pqwe8KX1vEjaI1t5K98seIfzyyffMWhIz1AZheD6Yt2v799rbEu6zYao3b283h+dDHnnw8/t3G499nKs1XnE/B5ImYKZ7+OeSvdzJJVZjikSNHsmDBgu0xbaciBKx2anYr3Wqxf+N00KZZwpptSvYIhDinxc9Uf4gpgRCjQhIBXJ3r6dRilsBbXmfSffUhoK2byqHz9s5ppNdLfTAQ+d6gCarj0rYKCPWjQnF1UQF4I5VH4hVIiTAJuB1dpvGmwt0lJaKp2WptV2/uEO+YVvlmtG0NMYflAuawnEirOzBpbETQw63zUFkJclgO9ON89pW6Bc/GfG+u0fj4nmyqvnBz7oLki74SekVaYwI/OjS+dmsR98vXLgettqhnmpLdAyHOavUzNRBiit9kdMjsVmAGwr0XxgFkSciKtBr7NiGg0JVNXf22XtMFoIveRmzF0amX0uV263OdpnWZVnYpgEEozOrWtp7cXZ22myaFdfWUbKimaEM1BZs2k7+xhmFVNWRX1ZC1aTMZVTU4W1o7lRMqLCBUVkxwl3L8++4ZJd7F5E6awBavC5mVmIsuVUgJZgBCAUHIb/1v2uRg4TwHy+aXICWE/KmpZJTQK9IGCaxzCEvQbb/61y4Hzbaoe6Vkt0CI01v9TAmYTA2EGBM06X11RQfR7r10XzGZKC7AJSG3nxVKIkisajC+UvDk57GpoSGmwuiqt9IeCuLdvIWsqhqGbdpM3sZqCqpqGF5VQ/GGGko3VlO2sQaPPeUwTNDhoKq8mDUVJWyYOoENxx3KhspSNlaUsKG8lE0lpWzOL8KFh0y/JLMdMtshw2/9edshp96JozmAxy9w+yVuv8Dll7jbBS4/uALg9IOzXeDwgzMAmt/+Cwg0P4iAQAYsIY6IdcD+7BeEAmAGOwQ8ki4oMO1tZrBrEZdIRIpHZpTQKwYFa42DiLTSl7otUW+wRd0tJbsFTH7lCzDVH2KPQIjxQTPpN2yqu8w7ClKCDNEhXoFo0bJFLPw5IHAEICMgyPI6kNUmzrpa3HU1uLfU4K6vwVNfg7ehBm9DNRnNNXhbatFk7HqWoOamxVtGk6eUJvc0lhWV0uAoY5uzjG2UUy9KaDCLCQadhNaCudISTRkQlASgJNQXccyI+RYCfPZfV4TckpAbTJf12XSD6QbplkgXVu3qlggPiCyJcEs0l8ThlmgucDglTjc4XBKXS+JxSVxOcLskbpek9vEM2tY5QIqUizwooVcMABKo0kRMK32pS6PeXtzmkpJJAZMTfAGmBEJM9VuintwAFhavFM6g3v0toU2ltFxxPb7HzgVTgD+5pUkJZtAWzm4EM0ZUo9MFLDHN8Gg0bM2MbSkGwPQLQsEO0Q2FW5rBjtZkOI/I/7htkVZnsMM2ZGfB8dBMHlUMo5o8NsV9tr5ns7XTcT5yaKCUWsrYxiQaHWU0OUtpdJXR7LbE3e/JQ3ODwwUOt8ThkmjO8GfIdEly3CEczhCavc3hsoXUFlWHC0tgw/vC25wShy20+YW5tPgaYraZLknADQG3den9Hmh3S9rcgjaXoE3remwl2t3V1s3YSvT4SdfuLkHmRe1Mv9nF5MdciBA4U+SyCaOEXpF0qjXBJ8Lk4xwPS13WTJg6W9QdUjIxaHJsW9AS9UCIiQGTvk1U7T+5PxzDmhvn0P7a8WBqEOp4BBZdnxsrpoFuxDEQK8jxgmkJeLIe3LxOWzSnjBEtLSyUTmyh6xBApwccOWaMQFotTpNMWU92ezXZ7ZvIaqshq7WarJZqMprtVnhjDa625k7lB3Ly8ReWECgsxqycyqa8PGtaYWkxoYpizIpitPwsNKck2w25XfrWAsDAxHkpLJTU1fkTSyyx4qgkgXBWXY6bOOHvd7l56joXe9/iZrfHXDhCIJSPXpGO1GqCZfbsl/C0xhqHBgTRnG7GB02OaA/as19MJgVCcZ3o1BDER4O5gfU/NFD1NdR9lcu2r8rxffX/0d1ckK9elmiukC2UAqdL4HRpOF1OHE4Nh1vizjK7bUXGtjbDrcrodFaXvrv0Vrkd2wpL8mlo3mofQ0TARW9TWUIhtNqtOKqjVmXas1Si54qLttjBZ6lp1iKfsmJCE0fSXjYdX9SMlFBZMaGSIvB2VMuFhYU019Wh2We1Q1BUWAkBeACPhDwpiT4nNZpgscdJexm8f7+fz68PcMAcFwd/oHFT9UYAACAASURBVIReMchs1YQl6FF+9U12S11Iydigyc/ag0wNmBySmU1F3VYyU/i8+0UTTY51NDnXso2f2LwyQN3SXLZ9VYHvi0kEl+4H7V7Lvvx6MvdeTsnvl9O8uoCWD6bZLhtvJL+xNfvQ4qgmoDV2KsthDiMzVEpmqIysUCmZZpn9vZSsUBmZZhkeMz+p/tb8QgjVxcVp8gdw1NT2uErTUVOHCIViDpNulzUTpbQI/9TdMI8tjppaaM8PLx4OTiUJA0H8Kv7WUsmH97dT2eKHzrffdjOkr2qyB9pSGY++ra2Nm266iQ8//BCv18u0adO44447ttvm/rJNYLXUo/zq650dTcnRwRD7t4eYEvAzJRBi90CI7Kg7tzBDo247RV4iadfqaXKspdG5libHWpqca2kQP1G/2kHzf8YSWLIPwSX7EPjPWeCzps85clrJ3bOKogu+p3yKycjdcygekYUmRgIjadVqMOREGm+Zhe+xc9FCXky/xi9r3wUgIFpo1TbR6qimxWH9b3VU02Jvq3d9h0+rBRH7AzXpsSoBu0LINO1KIKqCyDBL0Lp57ESrD62qJtLy1hqbGbZ6bdRc8c046jr7w83MDHtRTwn+g6bHzAsPlVsuFbMgb7sX+SiSx0BO84UhKvRhgf/m+aykzk0Nx6MPhyq++eabmTt3LnfddRcQG4/+sssuw+PxxES1PPXUUznqqKPQNI3HHnuMWbNmYRgGALfeeisej4ePPvoIIQS1tQPjvwRoFPB1eJDUdr+sjRL1UUGTvQIhzm6xRH2PQIjcJLXUJRKfVhMj5OHPjc51+EUjodVjLDH/fDrmkpn4v9wTs9kW9cwAhXs0U3FWO+VT2imd4qdgdAihZQJdz5vONEuYkHsYK/76X0y98gfab76VjUs6BmNdMothobEMC43t1m6TAD5tc6QiiFQIdmVQ5/4PLY5NmMIPErzbBDnVDnLWOxi+IY/89dnkrfeQs1EjsyqId2Mbroa2TuVoecMi88EDUyZZgh7nTpE52QOyyEeRPAZ6mu8OI/SLbshl83c9x44I+aFhg4OWzfboT9QsgmdOGd7NUVA8OcBRc3rvL6UqHn1LSwsvvvgiS5YsQdgPbFFRamJTtwj4xuWIcr9o/OjsGC3bJWiyRyDEGa1+ptjTGvO3U9RNgrQ4qiwRD4u5Yy1NznU0OdYS0iyBkxLMtaNx/vsYzM9Px//FVFr+M5pAg+XVd3hMiicHKTslQOmUekqnBhg+LojWl4n0NlObr6Al80emZ51L5ty+95U1XGSZFWQFy9DqtnYsq49bqSmqN+HcVIvmix3hk6KF1pJWmipDVI3303R4iKZKk8ZK67+vLAOtohyHs9h2DUX1FEKlZJnlSXcVKYYuO4zQJ8KWH1y0NwkGIixUMuPRr127lvz8fO6++24WL15MVlYWs2bNYt99t++lXD4B3zjD7hdr9ssqpxaZ8lUWMpnqD3FKa8BaVRowKehnOOcQ7TQ510da5QFHDZsLltPkXEOzYwOm6FgI45BesgMj8K6bhnvJZbR/MYXm/4xm63+KaNtqVeaaS1I8KcCoEwKUTt1G6RQ/hROCOHqu63ulcMYZ+KdNQbvifE4v+Bd1Zg8tqXDQq6rYGCkxIWi7C3pVUmQtsd9tMv6jYpfZxwe98opWTG0TwrEJp6Maj6OaVscmghlbqQ+uo961HJ+2uRdXUce4QdhdlBUqI8MsRmM7T5pih2eHEfpEWtzNmzUW35PN189nIc1Yl82vX9ySVHuSGY8+FAqxbt06dt99d66//nq+/PJLzjnnHD7++GNycnI6HR8C5mkme0e9NKMN+C5q9svXLgcrnBqmLerFIWsl6Um+AFMCJlMCIYr6KOoB0RoR8ibHOhqda+zW+TpaHBtjhMgtc8jWRlIQ2I2RvuNwbZpM+5I9aP7Prmz5qoCNy9yRnpdwSArHBxk3w0/ZlBZKpwYomhjA6e3Okv7T8WafVzF/+XM8Rx6E1tZutcarY6MYarVbew56te9ehMpt8Y6KmWIWFvTJH+6SmQwLjWFYaEzM9kJXR5e+w1VkVQIRd5FWbbuKvqLF8RamiPPvSkGGWRTVE4gbRLYrCJfsPoyBYsdnhxH6RMguNpkxt5EDr2juVvCTQbLj0VdWVuJ0OvnlL38JWC6hgoICfvzxR6ZO7fz+lkZNsB7J7/MzGB00Weq2RD1oi/pwW9SPbQsyxQ7qVZqgqLeLbRHxbnKusf3l1swWn2NzTFpPqIDc0ChK/PuSExpFbnAkOcFRuGrHYq4Zyw8ftlG9zMV3S100Vdm3mpAMHxdk10PaKZ0aoHSKn+LdgrgyBmA6nj2dMPxmH8fzrzD8+Vciu83c7IjfOzB5XCdfeKi0GJmXOyj+8IiryKywpqB3gUTSLupjKwJ73KDFUU2Tcx01jk/xaw2djnWZuXbvIH4Q2R5INsvwmAXKVbSDMqSEPky84EcPtG0vqYhHX1BQwIEHHsgHH3zAoYceyurVq6mrq4vMyImnWQikgE89Tr53muwVMDmiLRzUK0S5Kbt9HK3Bz1rbPx4r5E3OtbRrsUG0MkOl5ARHUdl2BDkhS8hzQ7uSExyJW+bS3iioXuaiepmbr5e6qF7mouGn8G3lJn/XIJX7+imd0kLZ1ADFuwfwZA/gHGt/AM+Hn5Hx6gK8b7/XZRIpBP59prDl//4xcHalAIHAKwvwBgsoCHb/PoOAaI0MGsf2DKxt21wraNVqunQVZYZKonoCpRRrY5HeHLuCKCMzpFxF6YiQMm0WNsiqqqqYDU1NTV26LgaLFStWcMQRRzB69Gi8Xsuv0Jd49BMnTqSysjLym8Lx6AHWrVvHVVddRX19PU6nkz/96U8cccQRXeb1wU9ruXbFt6w7eybTeIJz/P/gpLqOMMcSkxbHpk5C3uhcQ5NjHUGtI/qfkBpZoUpyg6M6CXlOaCRO2bG8yd8iqPnGRbUt6NVL3Wz9saOtMGyXIKVTA5RNDTDh4Ay8I+rwDhuE+ysYxL34CzJefZuMf76Ltq0Rc1gOvp8fQdZzHS34+Df7pPLFD/1hMIOumQRjZhW1RnoH1TEzjbp3FYV7AnGDyHaFMJCuonQPXrc99pWXl0MCg5JK6HdAPvhpHdeu+IYfzz4Dp/Rxu/83jA5URmaxNDl/sqb12WjSTU5wRIeQB3clJzSS3OAoskKVOLqIKhPwweZvrZZ69VIXm5a52LLSGZnJlFMWonSqn9IpAdsFEyCzoGNxz4A/XKaJ+7P/WC33N/6FY0s9ZnYWbTMOxXfi0bQfegC4XZRXTBvQN/tsD+kuUBJJdqHG+m3fdjluEK4Q/FrnUMsuMydmjCBmzCDJrqJ0P48DIfRD0nWzMyGF4AXPDGa6ricnNJK84Hh2aZsREfKc0K5khkrRegjmG2yH2uWxLfXaFU6kHR0wqyhE6dQAE0/wUTYlQMmUANnFZrf5DRhS4vriazJeXUDGG4twVNdiej20H30IvhNn0Hb4gZARO6I70G/2GcoIBBkMpyA4mYLg5G7TBYUvMk4QqRC0jp7CNtcP+LTNSBF7T2nSHTWjKHY2kbWtXLmKEkQJ/Q5OCC+t3x/EOT+9BC430uUCjwfpckV9DoFLA2EF4qpb4Yy01KuXudj8vSsShCsj3xL1MUe1RQZLc8rM9FmPIyWur5dbLfdXF+DcWI30uGk7/EB8Jx5D+9EHIzO7j6ZTt+BZMubPp/D4X+CoqqK4vJym2bPxzZw5gD9i58IpM8gNjSY3NLrbNGFXUewgslURtDiq2eJaxnrvAkIiblGZFHjNwo5KwIwdRM40y8glBdO3djCU0O+A5Mt1HMHtXNR6IX8YDdk1Xacz0ahhIhvYh5/Yh5/EdKrkVAJ2WDGvaKDSs5RJOd9Smb2cypwV5OVstYJwf+tCrvIgX3WBy2X5s91WRZLIZzF8OJ72dquycbuR9vaePuPoptchJc7lq8h4dSEZry7AuXY90umg/ZD9aZr1B9qOOdRaHZoAGfPnM2zWLDSfFYncuXEjw2bNAlBiP4hoOMkyy8kyyynqYVaRX2yLGTdoiaoQmp3rqXH8u2tXUWlO1BhB3JiBvc1rFiCS//LDtEAJ/Q6IuxkwYfw9pbxZcz1rHAdz/t+XsHVjBlU/FlC1ppCqn4qorirBb8dZd7vaKS/6iX2Hf0hl/o9UDFvJcPd6tKAfEQgg2tshEEAEQDQ325+jt8d99vcey7X7tchdIx2OjgrA5QLNBX4HWquJCEgkIHM9BMYWESrNRzqb8C58Fc/7b0UqI+l29/g557bbIiIfRvP5yLn9diX0aY5A4JH5eIL5CbiKOnoG5DRS17Y60lvo2VVUEjXFtPMgckaouMsxrXRHDcbugKz5+GMefehT8hbdhomGxIk7x8TfZLVGnF5J8W4ByvbsGCwtGN2/UAHdImVE9Glvt8Q/EAC/H+H3k5+VxbbNmxF+f8x27Eqiu89aXT3OH37CuXojjvpmJGAWZBMqGYZZkInAtPIKVzbRn+O3xUVw7PHnAK3nnENg7FiC9p9ZWjqoMWTSfRARdlwbLVdRbcdsoqhxA6uCsAaVw+E5ovGGimLWHMSPG2SFSnHJnnuY4RfgxJPv3y1mBl1vqMHYIUxDaBfafmwlFNWy2O1kX2QWTOH4IFqqr6wQEdcLWVmdoo/LwkICCQqAtrGajNcW4l24APfS7wDwT5tC84lH4zv+aMzSfsb9CYU6VQpFxx+Po6YLX5fLRcZLL5HV1BTZZGZnW6I/ZgzBceOsz+PGERw5MhK+QLFjYrmKrNk9PS1A84ttncYNwp+bnevZ7Ph3p7UnYM0qih9Ejg5xXRCYTINrZafZccX+fVLye1MqB7quO4AlwEbDMI5PZVnQEcek+YrzMUuSHxQsVWGK169fz3nnnRdJ29jYSHNzM99+27nG744Zt3Ve7ZjOaDW1ZLy+iIxXF+JeshQA/5RJNFx/OW3HH02osmz7C3E4ICMDmZERqYgar7suxkcPYGZk0HDHHfhOPhlt82acK1fiXLUK5+rVuFauxLN4MZkvvRRJL51OgqNGRVr+kQpgzBik6oEOGaJdRfnBSd2mC+KLXXwWFcW0xbGJKs8HXbqK4ltHAo2pzVek4JekvkV/OfA9kJvicoDoOCav0XLaCUkX/FSFKd5ll11YuHBhJN0NN9xAqAe3gxBWfJgdDW1LPd43/kXGqwtwf/olQkoCk8bS+KdL8J1wNKFdd0m5DWE/fM7tt+OoqiIUN+vGLCnBX1KC/2c/izlONDfjXL26oxKw/7yLFiGCHUHNQqWlBMeOJRDuAdi9AbOkRIUSHqI4ySA3tCu5oV27TdPhKqqODCSvzHiera7vQJho0s3YltPINItTZGOK0HW9EjgOuBW4spfkvZJ7w524vvuh13ThOCZZ814ia95LhIqHE6osB3f3Xe3A5PE0zvljr3mnKkxxNH6/n5dffplnnnmmWzuyy0KU7ykx18uUxPJJJmJbI9633iXj1QV4PvocEQoRGDOS5v++AN+JMwiO6/7hSBW+mTPxzZzZJ/+yzM4mMHUqgfjYQ4EAjnXrcIXFf+VKnKtXk/nCC2jNHe9bNXNyOrmBAmPHElJuoJ2CrlxFI33H8VLJAYRoT2lrHlLbor8HmAV025fVdf1C4EIAwzAoLIxdoeiLmx3RF8JRBx01dWitbQR2n9DvvLoimWGKo1mwYAGlpaXsscce3R7rdGns9gvB8XcGWDTXwbpPRadzN6g0NuF87hVKn38FsfADRCCA3HUE5lUXY556PHKPSXiEGLAXgneH0+lMznkrK4P994/ZFJQSNm1CrFiBWL4csWIFzuXLcX3yCeLFFyPppMsFY8YgJ0xATpzY8X/8+OTZl0KUjdtDIbubZ7NUe4TdzbMZUdD9TKLtJSVCr+v68cBmwzC+0HX9sO7SGYbxEPCQ/VXGt66CUV3iRFrc5RXTIp/Dy9xTFcckmWGKo3n++ec5/fTTezzeNE1M08TvrOOQG6xtgz3xQbT68Cz60FrI9M7HiHY/wfIS2s47Dd+JMwhMndzhutiS3JDR/SXlM0bcbthjD+svCtHU1NkN9O23ON94I8YNJCsrkbvuGjMTKDhuHGZxcdq4gXbUWTfpwgTtYrYUf8eE2ot7fjdCN9izbnolVS36g4ATdV3/BeAFcnVdf8owjLNSVF6EVAs8JD9McZjq6mo++eQT7r333mSamzra2vG++zEZry7Es/ADNF8boeLhtJx5Mp7f6tSOGaHeU9oFMieHwJ57Ethzz9gdgQDOdesiLqCsDRsQ33zT2Q2Um2u5gMKDwNFuIPVy7x2KTLOE04O9vAAnCaTkrjAM42rgagC7Rf/HgRD56DgmqQpUlYowxWEMw+DII4+koKAgWeYmH38AzwefkvHqQrxvv4fW3EKoIA/fr47Dd9IM/PvtBQ6H1VVO01ZU2uJyRVruHHss3nBLVEq06upI699lVwSeDz8kM6pXKF0ugrvu2nlK6NixyCz1YpGdmSFV/dcteDal+a9YsYL77ruP0aNHc+KJJwJ9C1M8e/ZsKisrI66Z6DDFYAn9zTffnBrjt4dgEM/iJXhfXRAb9vf4o2g78WjaD5quWpKpRAjMsjL8ZWX4Dz44dldjY4cbaPVqqzJYsQLv22/HLBgLlZV1zASKdgMVFaWNG0iROlL+dBqG8R7wXqrLGQgmTJjAxo0b+3VsdnY2GzZs6DHNRx991K+8U0IohPvfX5Hxytt433yn27C/isFF5uYS2GsvAnvtFbvD749xA4V7A5nPP4/W0hJJZubmxgh/wO4NKDfQ0EJdSUUHkbC/b5Px+iIcNXW9hv1VpClut+W6GTcOfv7zju1Som3a1OECCruB3n+fTMPoSBbtBooaCwiOGaPcQDsgSuh3dqTEtex7a7bMawv7HPZXsYMhBGZ5Of7ycvyHHBK7q7ExZjGYc9UqXMuXd3IDBcvLI8Kv7bkn7tJSyw1UWKjcQGmKEvodkO0ORCclzu9XWS/seG0BzrUb+h32VzF0kLm5BPbem8Dee8fu8Ptxrl3b2Q307LNojz5KeNqDOWxYpwVhwbFjCY0YodxAg0xan30pJVJKhGolRJBS4k8gRHBXOFetwfvqQjJeeRvXqrVITaP9Z9NpvvRcfMcejswflmRrFUMCt5vg+PEEx4+P3S4lhW1tNH3+eUwl4Hn3XTKff74jmdvdvRsoM3OAf8zOSVoLvdfrpbW1lSzlEwQskd+4cSOrV6/GmWALybF2feSFHa7vVyKFwL//3mw773TajjsSszCNp3Iq0hshYJddaM/IoD3eDdTQ0NkN9N13eP/5T4TZEdwrWFERCQgXXQkoN1BySWuhd7vd+P1+Vq9e3eeFSdKE5s0OpAnZxaGEwvbWa4IWIZBYAZ6zpCTf3D43SXuToK1BQ3NA5nATh7t/+YVb8qtXr6a1tZURI0Z0m9axcRPe1xZakSGjwv42/Pmq7Qv7q1AkiBw2jMC0aQSmTYvd0d4e6wayp4ZmfvZZbETRvDyCY8Z0mhIaGjGi+zeRKbolrYUerGmJwWCQjz/+uM8uC1+9YOmzWbizTKae7sPh6V5kW4TgkWw3wahtLuB3zX6y+uETD/gEK//ppX6Nk4LxQcYd3YazZvsqjcx5L6GVlVB49CHMmDGDhoaO0MQDEvZXodhePB6CEyYQnBAXe8o00TZtig0Ot2oV3n/9C8dzz0WSSbeb4OjRnaaEhsaMQWaoSQPd0Weh13VdGIYxoDFy8/LyOO644/p17EET3Bi/Hk4os52ZT27ttmV/da6HdVlu/FHdRbeUfNfiZ25je5/K/OkTN69dmo9nq8avb2xgr7Nbk9ILLb/0FqR7FfzzE8ylP9Jy2gnWXPdBDPurUCQFTcOsqKC9ooL2Qw+N2SW2bYt5P4Bz1Spc33yD9803Y91AlZWd3xEwdiwM7+tLLYce/WnR3w38d7INSRUjD/JzzF+28c+r8ll0/TCOntvQpeh+4XbGiDyAXwiWuJ1AYkJvhuCT/83m47tzyBsZ4pTXainZPdj7gX0gHIZZe/hpih56CgEERlUOathfhSKVyLw8AvvsQ2CffYiJZ9vejnPNmk5jAfFuIFlQQOHo0Z3dQLvsstO4gfos9IZh7DAiH2bK6T62rnby2QM5FIwJss/5LZ3SLKjr2NafaHfNNRqvXZrPT4s9TJ7ZyozbGvBkp67jEw7DLIXALCmi6aqLUlaWQpGWeDwEJ04kOHFi7HbTxLFpU8T9k71+PfLbb/EuXIjj2Y4wKdLjiXEDBexB4aHoBupV6HVdf9QwjN/ZnwXwsGEY56fcsiRz6NVN1K9x8s6fc8kfFWTMUX1zx/TEj+95eOO/8vC3CH5+dz176L7kThgIBsl+6OmYTfFROhUKhY2mEaqoIFRRQfthh5FRWMgWu+Em6us7u4G+/hrvG29E3EBSCELduIHMHdQNlEiLfnT4g2EYUtf1MSm0J2UIDY773208M3M4r/4hnzP/r47iydvnVgkF4KM7c/j0rzkUTgxwxov1FI5PrqvGuXwVeVfNwf2V9f5Y6XKCw4F5tk7txWelLEqnQjEUkfn5BKZPJzB9eqwbqK3Nmg0U96rIzE8+QWtriyQL5efHvCM4XAGEKivT2g2UiNDX6bp+PrAYOABIj7dG9AN3puRXj2/lyeOKePHsAn77eh3ZJWbvB3ZB40YHr/w+n6ov3Ew9s4Uj/9yIKyOJrppAgOz7nyDnnocxc7LZ+uBtZP/1sUgY5oLJEzFVGGCFIjl4vd27gaqqOioAe0qo9+23cUS9QEd6vR2LwsLB4caOJTh6NHTjBsqYPz/y7uLiuHcXJ5tEhP5srNf9XQKsAH6bEksGiJxSk1Oe2MLTvyxk/nkFnPHilj4L9Mq3vbx5ZR5mEE54YCuTT2rr/aA+4PxmOflXzsH17QpaTzqGxpv/H+bwfNpOnNH7wQqFInloGqHKSkKVlbQffnjMLrF1K65waGi7EujSDbTLLp3cQM4VK8j9858jg8bOjRsZNmsWQErEPhGhbweqgRDwILAX8HnSLRlASnYPcsID9cw/r4A3Ls/jpL/VIxJYjxVsh/duzeWLR7Mp2cPPSQ/Wk79rqPcDE6XdT869j5B9/+OY+XlsffRO2o49vPfjFArFgCMLCvAXFOCfPj12R1ubNRsoakGYa9Uq3IsXx7iB4tF8PnJuv33QhP4p4H3gDMMw7tN1/TbgqKRbMsCMm9HO4dc18u7Nw/jwjiCHzG7qMX39GstVU/O1m2m/a+awaxtxJvHt1q6vviXvyj/jWrGa1lOOo+Gmq1TsGYViR8TrJThpEsFJk2K3myaOjRtxrlxJwW9+Q1fzNRxVVSkxKZG4AkWGYfwNSK5/Ig2YflELU89s4ZP7cvja6H461XeveHn82CIa1js5+dGtHDUniSLvayPn1v+l8IRz0Bqa2PLkvWy7d44SeYViqKFphHbZhfYjjiBUUdFlklCCL/vuc9EJpNms6/ppQIau6ycDm1JiySAgBBx9awMjf9bOW7PyWP+pO2Z/wCd4a9YwXvtDAUUTg5y7oJbxxyavvnN9vpSiY35NzgNP0HrGSWx+16D9yJ8lLX+FQpGeNM2ejRk3SGtmZNA0e3ZKyktE6M8DioAvgUrggpRYMkg4XPDLv28lb0SQ+b8roH6NNUXqp0/c3D+tmKVPZ7H/pU2c8WIduRXJ8ceLVh+5N9xJ4cm/Q7T7qXv2ARruuA6Zm5OU/BUKRXrjmzmThjvuIFhRgRSCYEUFDXfcMaizbiYahvFXXdeLgXOAUcDylFgzSHjzJKc8uZV5xxdi/Ho4ebtorP24Y2HEoVf37L/vC+7FS8j74xyc6zbSco5O4zWXIbNUTG6FYmfDN3Mmvpkz+7USv68k0qK/y/4/B1gPPJY6cxKncMYZ5F59G1pNbVLyc2VIKvb1s+0nB2s/1rACFSdveatobmHY1bdReOpFIAR1Lz1Mw61/UiKvUChSTiJCn6nrugfwGIbxLNC/1xslGfe3P5D13CuUHHBSUgT/1T/ks2qBl2SKexjP+59QdIRO5ryXaL7wTGoXPYd//717P1ChUCiSQCJC/xTwCvA3Xde9wJrUmpQ4wh9AtLeT9fR8Sg44cbsE/8QH69nrty04vbLfLwfpZF9DE8OumsPwX1+KzPBS93//oPHGK4dcwCSFQpHe9OqjNwzjfuD+qE3npMyafiJCJoT8ZM17CdcPP7LlpYf7nEd2scmMuY0ceEUzX/6tiM+f0JAmhPz9a+F7Fn5A3uy5aLVbabr0HJr++0LwJnHivUKhUCRI2r9hKhGk0wHBEEhJcMwoRKsPmdm/VnN2scnM/w2x98W1LL4nm41L3L0fFIWob2DYDXeSOf9NApPGsvUfdxOYOrlftigUCkUy6NZ1o+t6/kAa0h+k24X0emg5cyabP5xP629PIevp+RQdeRruD/+9XXmHW/jnLkh8NNz75jsUH34qGa++TdOVF1L75lNK5BUKxaDTU4v+NlvsVwILgMWGYSQUg9f25X8AeOwyXjQM48btNTYa/27jI5Ecw6F6G267Gt9Jx5D3x5spPP33tJx+Eo3XX4HMy01m0Z3Q6rYy7Lo7yHhtIf7dJ7Dl6b8S3G18SstUKBSKRBGylxdf67o+DjgaOBBrSspnwHzDMDb0cIwAsgzDaNZ13QV8BFxuGManPRQlq5IV58HXRs49D5P94DzM4Xk03Dqbtl8ckfDhCc9rlRLvqwsYdt0daM0tNF15Ic0X/wZcru0wPsk2DhLpbh+kv43pbh8oG5PB9thXboVM6HUgMZHB2JVYrfoHdF13APsBZUC3Qm+/PLzZ/uqy/wbuheIZXpquvgzfCUeTf+UcCi74f/h+cSQNt85K2os6tJpahl19Gxlvv49/r93ZcveNBMeP7v1AhUKhGGD6NBhrGEYI6wUkvWJXCl8AY4H7DcP4rO/mbR/B3SdS+8YTZP/9KXLu8iERgAAADipJREFUfgjPx/+m4cYr8ekn0O93/UlJxguvM+ymuxDtfhquv4KWC36d1m+XUSgUOze9um62F13X84CXgcsMw/gmbt+FWC81wTCMaX5/CtdirViN8w+z0T76N+aRBxO8fy7sOqLLpE6nk2Cwi+GI9VU4L7ka7e33MA+aTvBvd8AgteK7tTFNSHf7IP1tTHf7QNmYDLbHPrfbDQm4bhLx0R9rGMZbuq6PBf4beN4wjA/6Yoyu6zcCLYZh3NlDsuT56LvDNMmc9xK5c++DUIimP11Cy3mndWqNd/KZSUnmMy+TO+ce67hrLqPlHB20RNabpYah7HccKNLdxnS3D5SNyWAgfPSJKNUf7f/XYK2SvauHtADoul5kt+TRdT0D60Ulgx8ITdNoPftUNr9j4D9wH4bddBeFJ52Hc8Xqbg9xrK9i+Ol/IG/WrQSmTqb2X8/Tct7pgyryCoVC0RcSUascXddHACHDMD4BWhI4pgx4V9f1ZVivHVxoGMbr22FnUjErStn6xD3U//UWHOs2UHTMrynZcwa5f7q1I4SCaZL5uEHRETqur75l21+uZcvzDxIaWTm4xisUCkUfSWQw9jbgFuAWe358T1MkATAMYxnWu2XTFyHwnfxz2g/Zn9wb7yTz5bfIemo+WcZrmCf/guErf8Tz5de0HXYADXdcS6iibLAtVigUin6RiNDvCsw2DCPsQE/NK1AGCXN4Ptv+eiuZL79lObr8AbTnX8ENtB24D9vuuhGztGiQrVQoFIr+k4jQ/wjcoev6MOA1rFWuW1Nr1uASHtnwfPIF+Zdc068gaQqFQpEu9OqjNwzjFcMwzgLOxhpUXavr+gu6rg/Zl5tKt9uKofPbU6h/8LbBNkehUCi2i15b9Lqu/xw4HcjHatFfiNXofQU4JKXWDTDS7QJNwzxbp/bis5K2ilahUCgGk0RcN3sA1xiGsTF6o67rQ+ol4dFB0gomT8RM43m3CoVC0RcSEfrnget1Xc/Gct+caxjGI4ZhrEitaQNL3YJnB9sEhUKhSAmJzKN/FLgHKLdj3ZyRWpMUCoVCkUwSEXqHYRjRq1rVklCFQqHYgUhEtN/Rdf1vQLmu6/cCC1Nsk0KhUCiSSCLTK28G/gpcj+XGeSDVRikUCoUiefT0ztgXdF13AxiG8Y1hGC8A7VivFVQoFArFDkJPLfrngDejolAeA8wDfjsQhikUCoUiOXQr9IZhvATcALyh6/oNWLHoZ8QNzCoUCoUizenJdXMzcCywEbgS+A9wpa7rcwbINoVCoVAkgZ4WTC2y//8LuH8AbFEoFApFCuhW6A3DeH8gDVEoFApFalCLnxQKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKI01NQs36j6/ouwJNAKWACDxmGcW8qylIoFApFz6SqRR8ErjIMYxKwP3CJruuTU1SWQqFQKHogJUJvGMYmwzC+tD83Ad8DFakoS6FQKBQ9I6SUKS1A1/VRwAfA7oZhNMbtuxC4EMAwjGl+vz+ltiSK0+kkGAwOthk9ku42prt9kP42prt9oGxMBttjn9vtBhC9pUup0Ou6ng28D9xqGMb8XpLLqqqqlNnSFwoLC6mrqxtsM3ok3W1Md/sg/W1Md/tA2ZgMtse+8vJySEDoUzbrRtd1F/AS8HQCIq9QKBSKFJESodd1XQCPAt8bhnF3KspQKBQKRWKkZHolcBDwG+BrXde/srddYxjGmykq7/9v715j7KrKMI7/kQEVjAEyKAyXgAgoFgRSlQByEwwqAtH4KpcERdLgXUMFERT7rRGjYqKYBrAoiL6pRExExYClKBflIhIhGAUCreVS0GIULEj9sPbA6XTOdAqzu3dX/r+kmZkzZ/Z+Opl5zpp19lpHkjREK0Wfmb9lGvNGkqT2uTJWkipn0UtS5Sx6SaqcRS9JlbPoJalyFr0kVc6il6TKWfSSVDmLXpIqZ9FLUuUsekmqnEUvSZWz6CWpcha9JFXOopekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXIWvSRVzqKXpMpZ9JJUOYtekipn0UtS5UbaOGhEXAIcAzyambPaOIckaXraGtEvBI5u6diSpPXQStFn5hLgiTaOLUlaP61M3UxXRMwB5gBkJqOjo13Ged7IyEhvsgzT94x9zwf9z9j3fGDGmbAh8nVa9Jm5AFjQfLh6xYoVXcZ53ujoKH3JMkzfM/Y9H/Q/Y9/zgRlnwkvJNzY2Nq37edWNJFXOopekyrVS9BFxBXATsGdELI2Ij7ZxHknSurUyR5+ZJ7RxXEnS+nPqRpIqZ9FLUuUsekmqnEUvSZWz6CWpcha9JFXOopekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXIWvSRVzqKXpMpZ9JJUOYtekipn0UtS5Sx6SaqcRS9JlbPoJalyFr0kVc6il6TKWfSSVDmLXpIqZ9FLUuUsekmq3EhbB46Io4ELgE2BizJzflvnkiQN18qIPiI2Bb4NvAvYCzghIvZq41ySpKm1NXXzVuCvmXlfZq4CfgQc19K5JElTaGvqZgfgoYGPlwJvm3iniJgDzAHITMbGxlqKs/76lGWYvmfsez7of8a+5wMzzoS287U1ot9kkttWT7whMxdk5uzMnN18TS/+RcRtXWfY2DP2Pd/GkLHv+czYm3zr1FbRLwV2Gvh4R+DvLZ1LkjSFtqZu/gDsHhG7AsuADwEntnQuSdIUWhnRZ+azwCeBXwH3lJvyz22cqyULug4wDX3P2Pd80P+Mfc8HZpwJrefbZPXqtabOJUkVcWWsJFXOopekyrW2BcLGKCJ2Ar4PbAc8ByzIzAu6TbW2ZuXxrcCyzDym6zwTRcRWwEXALMpltadm5k3dpnpBRHwOOI2S7S7gI5n5dMeZLgGOAR7NzFnNbdsAPwZ2AR4AIjP/0bOM5wPvBVYBf6N8L//Zl3wDn5sLnA9sm5krusjX5Jg0Y0R8ivK85rPAzzPzzJk8ryP6NT0LnJGZbwQOAD7R060bPkN5kruvLgB+mZlvAN5Mj7JGxA7Ap4HZzS/appSrwrq2EDh6wm1fAK7NzN2Ba5uPu7SQtTP+GpiVmfsAfwHO3tChBixk7XzjA7ijgAc3dKBJLGRCxog4nLJzwD6Z+SbgazN9Uot+QGYuz8zbm/f/RSmoHbpNtaaI2BF4D2XE3DsR8WrgEOBigMxc1dUIbwojwCsjYgTYgh6s8cjMJcATE24+Dri0ef9S4PgNGmqCyTJm5jXNVXYAN1PWzHRiyPcQ4BvAmUyyaHNDG5LxY8D8zPxvc59HZ/q8Fv0QEbELsB9wS8dRJvom5Yf2ua6DDPE64DHgexFxR0RcFBFbdh1qXGYuo4yYHgSWAysz85puUw312sxcDmUQArym4zzrcirwi65DDIqIYylTnHd2nWUKewBvj4hbIuL6iHjLTJ/Aop9ERLwK+Anw2cx8sus84yJifG7vtq6zTGEE2B+4MDP3A/5N91MOz4uIrSkj5V2BMWDLiDi521Qbv4g4hzL1eXnXWcZFxBbAOcCXu86yDiPA1pTp4s8DGRHT2tpguiz6CSJiM0rJX56ZV3adZ4KDgGMj4gHKjqBHRMRl3UZay1JgaWaO/yW0iFL8fXEkcH9mPpaZzwBXAgd2nGmYRyJie4Dm7Yz/ST8TIuIUyhOMJ2Vm59MjA3ajPKDf2fzO7AjcHhHbdZpqbUuBKzNzdWb+nvLX+uhMnsCrbgY0j6IXA/dk5te7zjNRZp5N82RXRBwGzM3MXo1GM/PhiHgoIvbMzHuBdwB3d51rwIPAAc1o7ylKvlu7jTTUz4BTgPnN26u6jbO25gWGzgIOzcz/dJ1nUGbexcB0V1P2s7u86maInwJHAIsjYg9gc2BGM7oydkBEHAzcQLnkbnwO/IuZeXV3qSY3UPR9vLxyX8qTxZsD91EuuevsssCJImIe8EHKVMMdwGnjT4R1mOkK4DDKSO4R4DxKASSwM+UB6gOZOdmTjV1mPBt4OfB4c7ebM/P0vuTLzIsHPv8AHRf9kO/hD4BLgH0pl6nOzczrZvK8Fr0kVc45ekmqnEUvSZWz6CWpcha9JFXOopekynkdvaoSEYdSLll7GfA/4EuZeWNErARuBzajLNUfA47MzHObr/sKsDgzFw8cawvKlhN7NF+3IDMv5UVqdvU8oocL8VQ5R/SqRkSMAvOA4zPzMMomYE81n74rMw8HzqDsFTQd5wHXN8c6GLj/JUbcCnjfSzyGtN4c0asm7wYuG9+fqNmB9I4J9/kj099h8cDMPKs51mpgCUBEfIuyuOVJ4CTK5ndHZua5EfHh5msXUxbBPEFZhn8cMAc4KiIWUxY/Pbb+/0Vp/Vn0qskYZVUzEXEi8HHKSs25A/c5BLj3xZ6g2Vlwy8w8pNkM7XSG73C6NWVvnROA91NeBHrnvm1bofo5daOaLKeUPZn5Q+BkXtgcau+I+A2l/OcDT1OW7o97BS9M80xlN8pcP5Q9cl7PmvucD+46eHdmPgcso0zbSJ1wRK+aXA0siojMzJWs+fM9PkcPQESsAvaLiPHBzv7AVycc78aIOCkzL282vDuIsnfPO5vPz6a8fN5KYPvmtr2BPzXvT3wAeIbyilbSBuWIXtVo5rznAVdFxHXAdyivATzZfR+nbEe9hLKR3aJJNgybBxzazKn/Dtit2Ub2qYi4ATgR+C6l2Mci4mpg2ykiPgxsExGLmteDlTYINzWTpMo5opekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXL/B5rQoGyf8akaAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Unmanaged Memory Plots\")\n", + "system = \"dgx2\"\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_nomanaged\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_nomanaged\")\n", + " \n", + "print(\"Managed Memory Plots\")\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_managed\")\n", + " \n", + " # plotSpeedup(system, exp, exp_type, \"index_managed\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unmanaged Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXl4FEX6xz/Vc+ckkHCrKHhwuCogl4Cg3N4gI4ioqIiI8RZYfq7rwsp64goiKqyAIJFBwBu55L4PFeRQQEDkSCAkJJPMPfX7ozthCJOQkBkIpD/Pw0O6u6q6qqe6vl1vVb0lpJTo6Ojo6FRulPOdAR0dHR2d848uBjo6Ojo6uhjo6Ojo6OhioKOjo6ODLgY6Ojo6OuhioKOjo6ODLganIISoJ4SQQoi25zsvlYFwz1s7fuB83Lscae0TQrwccrxUCDHpDHFeFULsLu+9S0OknqkQYooQYlEk8qRz7ihtXTOei8xcQBwAagGZAEKIutq5jlLKpWebqNbgrAAul1LuK382L2pqAdnnOxNl5EYg/3xnogQuxGeqc47RxSAEKWUAOHK+7i+EMAF+WYlXAkopz9vzP1uklEfPdx5K4kJ8pjrnnqiaiYQQbYUQq4QQudq/X4QQXbVrYbvpQojdQohXQ46lECJVCDFTCJEnhPhTCHGvECJRCPGZlu4fQoheIXEK0r5fCDFfCJEvhNgphLhZCFFHCPG9ltZ2IUS7MPEK8nRA+3+Jdn5fCWW9Swjxk3avbCHEeiHEDUKIeqi9AoC9WjpLtThThBCLtPLtAzxArBDCJIR4XQhxUAjh1fJ5f5H7SSHEk0KIadozOCCEGFokTDUhxCytrOlCiFFCiKml7eoLIeoKIWYLIY4JIVzac34p5LpRCPGKEGKPEMKj5XdcyPVnhBA/CyGcQogjQojPhRC1znDPU0wa56CclwshFmvl2yuE6BeSbmnr6ClmojBlsgghJgghTgghsoQQEwDLmTJWyrLHCyE+EkIcFUK4hRAbhRBdwqRT1meaJE6+c+lCiH8DIkweU7V3yy2E2CWE+D8hhFG71kAIkSOEeC4kfEMtzcGlKH/B87cLIb7R3q0/hBD9i4SrpdWtbO13XCqEaB5yvYOWTg8hxBotzCYhRGPt30ot7fVCiEZFnsF0obY5LiHEb0KIF4QQIiRMwTv8uBBiv1ber4QQKSFhLhdCzBFCHNLuszVMGWxCiI9D6sgHQoj/iCLmHSFEH6G+U26t3o0RQsSGXD+rugaAlDIq/wADcBwYA1yp/bsHaKddrwdIoG2ReLuBV0OOJerX+kNAA+AD1C75POBh7dw4IA+oViTtPcDdwFXAXOAQsEjLx1XAbNQG3xQuT8AN2nFPoCaQUkxZawJeYChwOdAQuB+4VnsOd2rp3KiFrarFmwLkaHm7XgtvBN5CNVX11vI5AggCtxZ5LunAQKA+8LR2rmNImK+B34GOQGNgMnACWFTK3/Br7Xldrz2bjkDfkOtTgQygv5aHVsBzIdefATppz6Q1sBpYFnL9tDqgHT8Q7XKG3PsQ0A+4Gvi39pybl7GO7gNeDjleCkwKOX5Xe053AdcAb2u/++4zPP/SlH2Wdv+uqPXuPdS6eE05n+lcrZy3aM90upbnRSFhXgX2o75PlwM9gD+BUSFh+qF+5DQFrMAWYHYp61/B8/8DsKO+668DfuBKLYwA1gE/A21R36GZQBaQrIXpoKXzk1aeRsAaLS/LgVu1Z7cSWFfkvR6m5f1y4AHACQwICTMFta6lAU2ANtozmRoS5lpgCPA37XmnamUIfd5jtd/kTtS6+B8t3d0hYR7WytUfuAJor5VhWnnrmpQyqmKQpP0AHc7wQ5dGDP4bcpyinRsX5l63F0n72ZAwN2rnXgg5V9DYNwmXJ6BuSWUIk069Yq63DXddq0jZQFzIuRjUl+fJImHnAj8WeS5ji4TZCfxH+/tKLUyogJhQxa+0YvBL6G9R5FoDLf17y1AnCp5TneLqAOEbroiXM+Teo4qcXw1ML2Md3UcxYgDEAm5gYJE0NlI6MSip7AW/QY8iYTYDn5TjmRak2znkuhk4WPBMtXqaD3Qrks6DQHaRc5NRxXqy9qyqlLK+FDz/50POGVEb5EHa8a1amEYhYSzAYeAV7biDFubukDC9tXO9Qs7do52LKyFP7wELi7zDRwFLyLnhwOEzlO0rYGJIHfEAjxYJs5ZTxWAf8ESRMO21PCeVp65JKaNnJpJSZgGTgPlCiHlCiOFCiKvPMrlfQtI9CgRQFTH0Xl6genHxODkWsCXMuaLxysoWYD7wqxBirlDNI5eUMu4OKaUz5LgB6ou3vEi4ZahfaKH8XOT4IFBD+7ugu7u24KKU0odaMUrLf4ERQoh1Qog3hBDtQ6411f5fUFxkrXs+XzND5KJ+eQFcVoY8QHTLuabI8aqQNCNBfdTGaXWR8yvDhA1HacpetK4s5/S6cjbpFuZZSukFNoSEbwzYgNlCNQM6hRBO4CMgMdRMAjyF2og/iNqzLOtgdmFepZR+1C/ogrw2BjKllNtDwnhQewtFn0GZ2gMhhKK1Wz8L1VTqBJ7g9Pq7Q7tnAaHPEiFEjFDNvtuEEMe1dHqEpFPwzq/lVNaEpJGihR9T5HnPC0mjXHUtqmMGUsqBQDNgIXAzamM5SLsc1P4vaoc0hUnKV4pzktPL4ytyvbhz5XoOUh147o7aBd0A9AJ+F0LcXoroecUlW+RYhDnnDROnaFmKxik1UsrJqBXwQ9QZKfOEENNLE1cIcSnwPerXTB+gOWoXGNSKXxaiWs4ihNbHstTRM6V3tvkrTdnD3fNM9ysp3dPGBsJQELY3qhmx4N+1qL214yFhGwC1tXs0KEXaZclrwXFRwj2DsrYHLwB/RzVDd0Yt3yROr7/h8hf6DN9CNTGNRDVlXo/6bhRNp6TfrCBPz3Dq874O9XlvpZx1LerrDKSUv0opx0gpuwP/Ax7XLhXMwKhdEFYIUR2oE+08lYGCH9lwpoBSZb2UcrSUsj3ql/yAsqaDaoLwoIpnKO2BbaWIX0DBl1LrghPawF6zMqSBlPKwlHKylPJB4FGgnxAiAdUUAdClmKg3on45PiulXCWl/I2Qr6UIUt5ytipy3BrYof0diTq6G/X3v6nI+TZlSKM4CupD+yLn21G2ulJcuoV5FEKYUX/T0DBu4Aop5e4w/wJavBjgc+AL4DngAyHEleXIW7i8JhcZ+LUALSjfMwD1uf4gpfyflPInKeVu1Ib3bNL5TEo5U0r5C+oYyFUh1wvqSOsi8QrrppQyHdX0eXUxz9tNOeta1KaWCiEaoA5QfYNaiNqolXQzgJTSJYRYBQwVQuzU8vIaakNYUTiGap/sIoTYBng0k9QpCCHaoNouF6DaKq9EHSz6nxZkP+pXZg8hxEwtnRPhbiilzBdCjAVGCSGOonaRe6MOCHUubcallLuEEN8A47Xe2FHUL50ESvnlIIR4H/UL5jfUwb+eqL9lrpQyRwjxGerLbUXt0lYF2kgp3wN2afd5QQt3HfBKafNfWiJQzke1+rcR9eutNfCslna566iUMk8I8SHwbyFEOuqzfBR1cC+jtOkUk/YeIcQs1N9gEGo9G4w6kHl/iZFLTne3EOJrTj7TdFQ7eHxIGKcQYjQwWptcsxD1+VwL3CClHKYFHaedHyylzBVCdAY+F0K01kxP5eVHYD0wQwgxBHXQ9R+o9XVCOdP+DegvhOiIavp5EGiJOohb1nTuEkLMRm1PnkdtD9OhsI58xMk68jvqhJmGnPwgAfg/4H9CiGzgS9ReTUOgu5RyUHnrWjR7BnmojeLnqIWbjWrLeiokzCOoD2e1Fu5j1Ma0QiClDKLOArCjNoI/FRP0BGoj8hVqI/gJ8BkwSksnHbW7ORy1fF+d4db/B0xEtdlvQ22kHpBSLi5jEQYAv6LaFZeiVuiFqF90pUFoefgV1Q4di1rxChrZAag24n+jfk3PRZ11gZRyC+qsiUGoX+8vojWyUaA85RyO2lvdgvqyPySlDLWNR6KODkd9eaehNlxVgPFlTKM4HkMdr5qOahO/CXUixc5ypvsI6ofIt6i93IOov28hUspRqF/7j2n3Xqkd7wMQQthR624fKWWuFm0A6iyd18uZv4I8SNQZgzuB71DNtDVRB7+PlTP5Uahl/wr1YycJddZPWXkOVaiXAItRn+UXRcIMQ/1wnoFaR5JQB6cL67CUchpqW3SbFmYD6oyugyHpnHVdEyffa52LHSGEAfWl+VpK+cL5zk+0qCzl1Lm4EUL8CGRJKXudMXAE0FcgX8Ros3+qo/Zo4lG/UOqhfnFcNFSWcupcvAghrkWdobcGdWC5P+pgc49zlQddDC5uDMDLqDM4fKimlI5Syq3abJ/tJcQdJKX87BzkMRIUW87zmiudEhFCzEMdRwzHCm3SSWVBoo73jEU13+8E7pFSzisxVgTRzUSVFG3GTb0SgqSH2Hl1dCKOEKIO6oyzcLiklAeLuaYTBXQx0NHR0dHR9zPQ0dHR0dHFQEdHR0cHXQx0dHR0dNDFQEdHR0eHCja11G63fwLcDmQ4HI4mZwh7Kao//SqoUwuHOxyO76OfSx0dHZ2Lj4rWM5gCdCtl2JcBh8PhuAHVK+YH0cqUjo6OzsVOheoZOByO5Xa7vV7oObvdXh/Vt0YK6mYaAx0Ox07URRoJWrBE1B2rdHR0dHTOgorWMwjHx0Cqw+FohursrKAH8CrwgN1u/wvVs2bq+cmejo6OzoVPhRYDu90eh+qLe5bdbv8Z1UNmwYbqfYEpDoejLqr/jml2u71Cl0dHR0enolKhzERhUIBsh8NxfZhrj6KNLzgcjjV2u90KJFNOH/E6Ojo6lZEK/SXtcDhygL12u703gN1uF3a7/Trt8p+oG8pgt9sbom5mcTRsQjo6Ojo6JVKhfBPZ7fY0oAPqF3468E/UnYwmoJqHTMDnDodjpN1ub4S6AUwc6mDyUIfDUezm7Do6Ojo6xVOhxEBHR0dH5/xQoc1EOjo6Ojrnhoo0gKx3UXR0dHTKjohEIlETA7vdXrBRtgS2AgMcDkeJG5QfOnR268aSk5M5dqy8e19fWOhlvvipbOUFvcxlpXbt2hHLR1TMRHa7vQ7wNNBc8zFkQHUZoaOjo6NTAYnmmIERsNntdiMQg+4uotx06ZXM30cmkH5UH+rR0dGJLFFpVRwOx0HgbdS1AIeBE/q0z/KzbaeZz+fE0rprDVKHGXRR0NHRiRhRmVpqt9uTgNnAfUA2MAv4wuFwTC8S7nHgcQCHw9HM6/We1f2MRiN+v79ceb4QsNQwF/5tMkkMBniwT5ARzweoVeM8ZuwcUVl+5wIqW3lBL3NZMZvNEKEB5GiJQW+gm8PheFQ7fhBo5XA4niwhmiw6gCylJC8vjzPlsbJUoFXrzPy228xb79dGSvX3F0LSspmX2VMzz3Puok9lG1ysbOUFvcxlRRtArtCzif4EWtnt9hjAheo2YmNZE8nLy8NisWAymSKdvwuSbp2gXWsvBsNu3hjbgGAQhID6l/s5lqmQXC14vrOoo6NzgRKtMYN1wBfAZtRppQqqK+oyIaXUhaAIsbFm/tYwg349N/Ld58fofZeLtNkxtO5anTfeiyf7REQ+EnR0dCoZFckdxWlmotzcXOLj489Tdiouu3fvZefObdx2220IIdi918A74xP4ep6NxIQggx528tgDecTGVpjfNiJUNhNCZSsv6GUuK5E0E+nTUS5ADAa111QwTtLg8gAT3s5iwewMWjT18ubYBFp3q87HU2NxlbjMT0dHR0flohMD25w5VG/Rglp161K9RQtsc+ZELO3jx4/Tv39/2rVrR6dOnXjsscfIzDx14Hb06NFcdtlljBkz5pTzwWCQgQMHFsbt06cP+/btO+0eY8aMoU6dOuzcubPEvEgpyVAEf0+w0CU5FoDG1/iZMv4436QdpdHVfv71ZiJte9Tg05kxnOVELR0dnUrCRSUGtjlzSBw6FOPBgwgpMR48SOLQoRETBCEEgwcPZsWKFSxatIjLLruM0aNHF15/++23+eWXX1i9ejUrV65k/Pjxp8Tv3bs3y5YtY9GiRXTt2pWhQ4eecn3r1q1s3ryZOnXqlJgPj4CFSpD21eP4PNbMNrPhlOtN/+bj80mZzJp8jLq1Avx9ZBVuvqM6s76yEQiU8yHo6OhclFQkR3UlkvDKK5i2by8xjHnTJkSRT2DF5aLKCy8QM2NGsfF8jRqRM3LkGfOQlJREmzZtCo+bNm3Kp59+CsD48ePZs2cP06ZNw2w2M2PGDFJTU5k4cSIDBw5EURS6dOlSGLdZs2ZMmjSp8Njj8TBixAjGjx9P7969S8xHhqKwRUg8QiBF8ebCNi28fDn9GEtWWnhzbDzPjkji/UlxvPhULrd1dqNcVJ8COjo65eGCEYNSUZwtJAo2kmAwyKefflrYwA8ZMuSU61arlYkTJxYbf/LkyXTu3Lnw+O2336ZXr15ceumlZ5WfXAHxYcaLhYBb2nno2NbD9wutvP1+PE88X5XG1/gY+nQOt7b3UIKe6OjoVBIuGDEozZd79RYtMB48eNr5QJ06ZH7xRUTz8/LLLxMbG8uAAQPKHHfChAns2rWLWbNmAbBx40Z+/vlnRowYUar4AtXzXygta8QzIM/Lo3leqgZPVwUh4LYubrrd6ubL7228Mz6eh56sRrPrvQxNzaFtK31QQUenMnNRGQpyhw8naLOdci5os5E7fHhE7zNy5Ej27t3LhAkTUMpoa5k8eTJz585l2rRp2LS8rl27lj179tCqVStatmzJ4cOH6devH8uWLQubRq1AkGulglVKzNrU4DYeP/+Nt9Cyehz/SrBwRAn/uW8wQK87XCz7JoM3/pnNoSMG7ns0Gfsj1dj0i76mQ0ensnJRiYGrZ09OvPkm/jp1kELgr1OHE2++iatnz4jd4/XXX2fLli188sknWCyWMsWdPn0606dPJy0tjaSkpMLzTz31FJs3b2bdunWsW7eOWrVq8dlnn3HzzTcXk1KALBzMPbafvnleGnsDTMpy8WOGk25uH5NizbSuEcfwRCt/GsKLgskED9jzWfl9Ov8afoLfdhu58/4UHnyyKr/uuGA6jDo6OhFCX3RWBn777TduueUWrrjiCqxWKwCXXnop//vf/84Y1+l0cs0111C3bt3CMlksFr799tvTwrZs2ZKpU6dyzTXXhE1r8/5NDP31RVrf24pRrv+cdn2fQfBBnIVZMSYCwD0uH6lOLw38xburyMsTfDIjlgmfxHEiR+H2ri5eHJLLlfUrjs+nyrYgqbKVF/Qyl5VILjrTxeACZMWfy3l16z/468F9zMiZRTNfi7DhDiuCD+PMTI8x4xHQ3e3nmVwPTUoQhRM5go+nxjHx01hcbkHP2108/2Qul11y/uekVraGorKVF/QylxV9BXIlR2i/fYAA9pR7uDf5TtJippMjTpwSrlZQ8q8cD+sznDzl9LLCYqRr9Tj6V7WxwVx0CFolMUHyUmoua+ZnMPDBPL6db6P97dUZPjKRw+l6ddHRuVjR3+4LEInWmxNglEayRRYvVxlGq5pNeabKkyy3LCXAyS/5akHJ8FwP69NzGZbj5meTgbuTY+lVLYZlFgPh+obVqgZ55aUcVv2QTr978/l8dgw3davBq28kcCxTrzY6Ohcb+lt9gSNQaONpy9yj32PP78MK6zIGVOtHuxoteDN+NLuNuwrDJkh42ullXYaTV0+42WdUuL9aLLcnx/KD1Ug441HN6kFG/+MEK77P4K4eLv43PbbQQ+qJHH2Bgo7OxYIuBhc4PuFls3kTf/Ndx6snXmPNkc2MP/4xjX1NmBT3IV2rd6BX8u18FjOVEyIbgBgJA/O8rE538ma2iyxF8GjVGDqlxDLXZiTckPEldQK8+1o2S746SqcOHsZ+HE/rrjV476M48vJ0UdDRudDRB5AvQPbu3cvWrVvp3r17ifs9HFUy+No2ly9iHPxu2olZWujk7kKvfDttPe0xamsO/cDXNiPj4iz8bjJQzx/kSaeHe/N9FDd5dttOI2+NS2DhUivVqgZ46jEn/e/Lw2aNfHkLqGyDi5WtvKCXuazoA8g6pSIlWJ1H8wbx/dFFfHX0B/rk9WONeSWPVutPuxoteD3h3/xu/A0j0NPlZ/HRPCYdzychKBlaxUabGnFMijXjClPVdA+pOjoXF7oYVAIEgia+a/lnzihWp2/mg+OT+Jv3OibHTqR79Vu4J/k2psVMIUdk0d3t5/tjeczIzKOeP8g/E620qB7H2Dgz4YYIdA+pOjoXBxeVGCQldKF61Tqn/UtK6HLmyKUgmvsZtGzZkvbt29O5c2c6d+7M0qVLI5Lnopgx09XdnY+yJrMqfRMvn3gVn/DyapX/o3XNpjyV9DhLLAu5yeNhdmY+c4/lcZ0vyBsJVlrWiOeNeAvHw7i6KPCQOu3DTBITgjw7Iolb7k7hm/lWgvrWzDo6FZ6o+B2w2+1XAzNDTl0BvOJwOP4bjfsV4PM3w2jYhRAn7RRSmvH5m0ck/YL9DArcWI8aNYrRo0fzzjvvAKfuZ5CamorFYjnFm2nv3r3p1KkTiqIwefJkhg4disPhKLz+8ccfF7vqOBokB5MZkDeQAXkD2W78lTkxs/jKNod5tu9IDqRwl6snvfJ7M/14Q7aYFMbFWRgXZ2ZirJkH8r084fRSM8Qpnu4hVUfnwiXqA8h2u90AHARaOhyO/SUELXEAOS7mFYyGkvczAC8m42aEOFkmKRV8/hsAc7Gx/IFGOPPP7BW1KN999x2ffvopM2fOZPz48fz666+89957mM1m3G43qamptGjRgoEDB54Wd8uWLQwePJhVq1YBZ3ZBEcqf+5fzy8//4MEH9mAySXz+xmTlLChz/sPhxcsy6xLm2Gbxo3UhfuGnsfdaerns3OG6m2NKMu/HmfnSZsIA2PN9POn0cFng9HoUCFDoIXX/AWO5PaRWtsHFylZe0MtcViI5gHwuPJLdCuw5gxBECDPBYHUUJQMhJFIKgsEUShKCsyXS+xmA6rAO4MYbb2T48OEkJiaeMR+R7PmAakbq7O5KZ3dXMpVMvrF9yRybg5GJ/+A/CSPp6O5Er3w7z+TeysdxsThiTKTFmLhL8390VYiriwIPqXd2czFzbgz//Sie+x5N5qaWHoY9k0Oz63wRy7eOjk75OBc9g0+AzQ6H4/0w1x4HHgdwOBzNvEWmoRw4cKDQzXNpUUQ61aq0RggPUlrJzF5DUFY/+wIUw4gRIzhy5AiTJk0qsxvrCRMm8N133zFr1qzC8h08eJA6derg8Xj45z//SV5eHuPGjQsbP7RnYDSZkeJ3EDXLXaaS+FVsJU2ZxkwljaMig2SZgj3Yh07BR5ivXMVEJUi+gLuDgmEBA03l6c/E7YaJnyq8+Z6BjGOC7p2C/OvvAa5rUro6aDQa8fsrjuO8aFPZygt6mcuK2WyGC8FRnd1uNwOHgMYOhyP9DMEjts4gLubv2CzTcHkexJk/+swRysjIkSPZsWMHU6ZMKbMb68mTJ5OWlsbMmTNPcWMdyo4dOxgwYABr164Ne/3P/Sv45eeX6d9vD2azxOtridvTB7f3DqBs4llWfPhYYVnK7BgHi60L8QkfjXyN6ep6kEylL5/HVCFHEXRw+3na6aGl9/QpReE8pL70VC4Nrij5hahsJoTKVl7Qy1xWLqR1Bt1RewVnEoKIku96Fp+/JfmuZyOedjT2M8jPzycnJwcAKSVfffUVjRs3LkWKFvLyn0ZRMkiIe47kKjcQFzMco2ELhPU4VH5MmLjF05nxWRNZc2Qzr2a/hkEaeTdhGDNir6C15yF65W9hq0mhZ3IsPavFsLSI/6PYWEnqQCdr5qfz7BO5LFlhoeNdKTw7ogr7D4R3oKejoxNdot0z+ByY73A4JpcieIVfgRyt/Qz279/PwIEDCQaDBAIBrrzySkaNGkWNGjXCprVv32a2/vIC997bCo/vP4DEZFyH1TIDq/k7hHDj8zfC7bkft/cepKwSsWdQHL8bf2NOzCy+tM3mqCGDKoHaXOH/D78bb+OYwcLfvAGednro6vaf9gWSeVxh/P/imJoWiz8AfXvl88ygXGrVOHVOamX7aqxs5QW9zGXlgtjPwG63xwAHgCscDseJM4XnAhCDisLevbv5dcs/6XHbWxiMtU+5JsQJrOYvsVrSMBm3IqUFj/c2XJ4++PytiXZn0I+fFZZlzIlxsMi6AI+AFP/z5IsnOW5I4ipfgKecXu5y+U6bvXAkQ+G9D+NJmx2DosCDffJIHeikWlVVFCpbQ1HZygt6mcvKBSEGZ4EuBqWktL6JjIZfsVrSsJrnoCg5+AP11LEFT2+CMroDzgDZIovvbF8zO2YWP5u3gLwXk3wFp3Ipl/oDDHF66R3G/9Gffxl4d0I8X3xtw2qRPNY/jycGOKl/RbVK1VDoDWPloKKIwUW1AlnnVPyBJjjzX+NY9mZOOMcSDNYiLuZ1qlVpQWLcw5hNCyCsj9LIUEUm0S//IeYc+5YFGYt5wplMVdkJE304bNjKsCo2bqxh4eNYE/kh1fnSuuE9pL7+rqJ7SNXRiRJ6z+ACpLQ9g3AYlD+wWmZitTgwKBkEgjVwe3rj9vQhELw8Sjk+SYAAqywr+MLmYJ7NhUs8i6QdMcF8BuTlM8RpJrFIlQznIfXBPnlYyzZ+f8GhfyVXDvSegc55IRC8gjzX38nMXk927if4/X8jxvoB1aq0pUr8vVjMcwBX1O5vwEB7TwfGZn/ApiP/5c3sLTT2PYVbWcH4+GT+VtPAwKQ/OaR4CuMUeEhd/p2v0EPqTd1rMM2he0jV0YkUuhhcgBgOH8YwZgzV27bFNmfOWaZiwuvrygnnFDKz1+PMH4ZBOURiXCrJVZoRF/N/GA2/RjTfRUmQidyf358FR0ezLD2FfnljMbOc722NuLFGHF1SNrLYsq1wm8+WzeUpHlKH/0v3kKqjEyl0MbgQ8ftBSgwHD5I4dGg5BEElKGuR736azBMrycpx4PXdgs2SRtXEriQldMNmmYoQpZkQdvZcEWjAmyceZufhVryTvZx6gY1sM7bnwWrNuLbmYv4TP4PDqGbEAg+pn07IJCFe95CqoxMJLkoxyFB3+4N1AAAgAElEQVTS6VutF0eVjIimG00X1m63m+HDh3PTTTdx6623MnTo0FLlSXG5iH/99XKXTUsNn/8mcvLe51j2JnLz/g34iY8dQXKVpsTHPo3JuJZoLWgD1YzUJ78pqzKuY9HRo7Tx7CJb3M778QOpb1pM72p/51vrV3iFm1vbe5jnOMbH7x5HAE88X5Xu9mQWLbNQcYbCdHQuDC5KMXg/7r9sMK9jXFxkPWYXuLBesWIFixYt4rLLLmP06JPuLkJdWK9cuZLx48efEr93794sW7aMRYsW0bVr11Ma/Ndeew2LxcLKlStZvHgxL730UqnzZSgy8B4JpEzC5RlAVs5Cjp/4HrfnXizm+SQl9KJqYntirB+giMiKbVEa+mOZlXkJm9I99MnLROEeVlvGM7hqFZrWfIiXE4fxi2UTPbq4WPzlUd77Txa5ToWHnqzGXQ8ks3Jt5B0U6uhcrFwws4lGJbzCDtOZXFirLph/Nm9GComQCtd7b8B8Bq+lDX2N+EfO+XNhnZeXR/Pmzdm4cSOxsbFnvO+fy5ez9R//4KE9ezBLiRQC55Ah5D32GMGUlDKXo/TkYzV/i9WShtm0HikNeH2dcXn64vV1INpOcEVyNd7w5DIp1kCeYsIol6CIN6jvP0yvfDt35/ci2VObmXNjePfDeI6kGy5oD6n6zJrKgT6bKEocNPwVciSLHEeOcC6sJ0yYUOBFsNCFdTghgFNdWO/bt4+kpCTGjBlD9+7duffee1m/fn2p8iEtFnw33EDc+PHUaNWKhJdfxvBXdMoMMbi9drJz55KZvYx89+OYjBupEv8Q1aq0JNb2BooSPU/l1RAMzfWyKd3FiBw3icH2ePmBg8p03kjYRNsaLRhY434SHvqMxT/s59VhJ9i5y8id96fw4JNV+XXHufDYrqNzYXLB9AxKQ4aSTocarfGIk9MSrdLK0vQ1pAQj68Y6ki6st2zZQvfu3Xn//fe555572Lx5Mw8//DCrVq0KW/4/V69m64gRPOBy4R42DFfPnhj27CFuwgRivvgCpMTVsyfOIUPwN2gQqSIXgw+zaRE2ywzMpqUIEcTra4vL0xePtxtgjdidin5BuQSkxZiYEGvhkFGhRuAQ8DrZhinEB+O4zXUn3Y/14ecp7ZjwSXyZPKRWBPSv5MqB3jOIAu/H/ZdgkcHNAMGIjx2MHDmSvXv3MmHChDILweTJk5k7dy7Tpk0r3Mugbt26GI1G7r77bgCaNm1K1apV+eOPP8KmEahVi8Dzz5OxciWunj3Vc/Xrc+Ltt0lftYq8hx7C+vXXpHToQNLjj2PaurUcpT0TJry+7pxwTiPzxFqc+S9iUPaTGDdEm6L6CoYz7lB3dtgkPJLnY1WGk3eyXNhkDdINY0nxH+JK37/42vYND192B1/+3w08uvJfPDL4kO4hVUenGC4qMdhs3oRPnLoKySe8bDZvjNg9ouHCumrVqrRp04bly5cDsGfPHo4dO0a9evXKnL9gnTrkjBxJxvr1OFNTsSxfTkq3blR94AHM69aVOb0y3TtYh3z3c2SeWE1WThpeX3tslmlUS+xMUsJtWC3TEeRG/L5moI/Lx/KMPD44nk+8tLHGMpDY4H7uzJtHSuASPrhkJGmv1eWG9XfR8ZGf+PoHK+1vr87wkYkcTr+oXgMdnbPiojITRZtoubAG2L9/Py+88AJZWVkYjUaGDRvGLbfcEjatsrijEDk5xE6dSuzEiRgyM/G0aIEzNRVPx46ci93phTiO1TwHm2UGRuNvSGnD7b0Tt6evtl1n6fNQ2u50EFhkMTI23sJPZgM1AkHuy0/HID/h25gZHDD+ie2vK0l5azx/pN2KQRE81CePpx476SG1IqCbTCoHFcVMpIvBBcjZ+CYSLhcxaWnETpiA8dAhvE2a4ExNxd29u7pZcdSRGA0/YbOkYbF8hSLy8Aca4PL0xe25FymTz5hCWV8aCaw0Gxgbb2G1xUhSIMgjeR5u8K3hB9sM5lm/JfdACtbX3yTX0QurVTLwgXyeGOAkMeH8vxd6w1g5qChioPePKwnSZiPvkUfIWLWKrDFjUPLzqTpoECkdO2KbORN80Z56KfAHmpKb/xaZWT+R43wHGUwkPmYUyVWakxA3ELNpCRA5vxICaOcNMCszn6+O5tHUF+CdBBtPJHWkSmA83x3dwpiE52j23/cwrWmEp+sXjP04nmZdk3j7Y7PuIVWnUqGLQWXDbMZ1331kLF3K8Q8/BKuVpOefp/pNNxEzeTK4ouekrgBJLG5vH7JyvyYzewku9wDMxrVUiX+AaomtiLW9jaIciOg9m/sCfHrcxfwMJx08fsbHmemQksJPpn68kTWLlYmfMuy/m7hsSQ88bRbw7nvJXNs9hpc/O0q+p+KYjnR0ooVuJroAKY8L69OQEsuSJcSNHYtlwwYCycnkDRxI3oMPIhMSIpPhUuHBYlqA1fI5ZtMyALz+9rg9ffB4uwKWiJoQdhsUxsebmWMzIYB7830McXqoFwiyybyBD7dvZtGYDviW3YKh1mFuTV3Jy7dfQn3DpRG5f2nQTSaVg4piJtLF4AIkomIQgnndOuLGjsW6dCnBhATyHn5YXdVcrVrE7lEaFOUvbJaZWM2fYzAcIhhMwu29F6vtSY5lRna9yF8GwQdxFj6PMeED7nT5SXV6uMYfxCVcvP/zFia/24ATG65FXPYHV70wnSe6J9DD14M4GRfRvBRFbxgrBxe9GNjt9irAJKAJ6ljeIw6HY00JUXQxKCXREoMCTFu2EDduHNZ585BWK/n9+uEcNIhg7dpnjhxRAphNK7BaZmAxLUAIHz5/U1zu+/F470RyZtcdpSVdEUyMM/NpjJk8RdDV5SPV6eEGXxAp4YuVTv4zLpb0bXUQV28nZvi/uaOTn17ue2nlbYMSBYur3jBWDiqKGERzzOA94AeHw3ENcB2wI4r30okgvr/9jayJEzm6ZAnu228ndvJkarRpQ+JLL2HYu/cc5sSA19eBHOfHHMveSFC8jhC5JMS9SLWkG4iPeQmjYROR8KJaIyh5OcfDuvRcns/1sM5i5PaUOPpUi2GNxcC97eLY+Lngo3czqeu/hLwBM5h12wju3zSV9imteDf+LfYZzuWz0dGJLFHpGdjt9gTgF+AKh8NR2hvoPYNSEu2eQVEMBw4Q9+GHxKSlgc+H6447cD71FP5GjaJ+71DUL6ijGI0bsVnSsJq/RggXfv/V6hRVby+krBqRezkFfBpr5uNYM0cNCs29fp7O9XKLx08wAHO/s/HOB3H8ecBEYvNfcb38DMrNP9Lc04JeLjvdXbcTL8tXd/Wv5MpBRekZREsMrgc+Braj9go2Ac84HI68IuEeBx4HcDgczbxF9jA8cOBAocuGsyFdEfw3zswms5EFx/LOHOEMHD9+nGeeeYZ9+/ZhsVioV68eb7zxBtVCbOqjR4/mo48+4plnnuH5558vPB8MBhk0aBA7d+7EYlEHQ19//XXq1avHgQMHeOSRRwrD5uTk4HQ62bZtW9h87N+/n61bt9KvX79zIgaFHDmCYdw4lI8+QuTmEuzRg8CwYchWrc7J7Y1GI35/iE8hmQPMQsjJCDYgMQN3IsUA4BYQ5e/4upBMUYKMMQT4U8B1QcGwgIG7pSDoE0xNUxg9xsDBw4L67fbCy8M52MKBTdq4I3g39wf70152wEDZ13KcVt5KgF7msqE5xqzQYtAcWAvc5HA41tnt9veAHIfD8Y8SokWsZ1AgAjNjzUjAKwQHD+WUOZ2iZGVlsWPHDtq0aQPAqFGjyM7O5p133gHU/Qw2bNjAmDFjSE1N5dZbb2XIkCGAKgaLFi2iU6dOKIrC5MmTmTdvHg6H47T7vPLKKwQCAV577bWw+TjXPYOiiOxsYqdMIXbSJAxZWXhatyb36afxtmsX1VXNJX1BGQw7tN7CbBQlm0CgLi5PH9xeO8FgnXLf2wvMtZkYF29mr9FAfV+Ap5xe7nH5CHhg2sxYxk2MI/O4gRYd06k1fBxrmr9PjnKCWv7a3O3qRa/83lweqB+R8l6s6GUuGxdCz6AmsNbhcNTTjtsBwx0Ox20lRCtRDF5JsLDdVPLXlRf4y6CQYVCfjQxpmFp7ilfeRr4AI3M8xV4vjkjtZ3BKGbxemjVrxowZM7j22mvD3vd8i0EBIj+fmOnTifvoIwxHjuC9/np1VXOXLlBGB36loXQvjRuLeT42Sxpm0wqkFHh9HXF7+uLxdYIz7G1xJgLAt1Yj4+It7DAZuMQfZLDTw335PgJ5gk9mxDLhkzhO5Cj06Oqk5QvfsvraiaywLCUogjT1Nqdnfm9ud91JvCx56q7eMFYOKooYRGUA2eFwHAEO2O32q7VTt6KajKLK7yaFdINACnGKEESDSO5nEMqCBQuoWbNmsUJQkZAxMeQ9/jjpq1eT/eabKFlZVH30UVJuvRXb7NnqXs3nHCse711k537OsezV5LufxmjcTmL8QJKrNCfWNgqDsvusUzcAd7n9LDyax5TMfFKCQUZUsdG6RhzTqpt45HEna+an88ygXJatiOFf3e4j4YlvmLnpF4adeJlckcPLVYbRsuYNPFtlCCssywhEcNW1js7ZEs2ppdejTi01A38AAxwOR1YJUSKwn8FJ81AQ1TxUQCTMRKFEcj+DUPr370+HDh149NFHi41fUXoGp+H3Y/v2W+LGjcO0cyf+Sy/FOXgw+XY7WMu/r8HZf0EFMJuWYrWkYTEtRAg/Xt+NuD19cXvvAGLOOk8SWKX5P1plMVIlGOQxp5cBeV4CmQrj/xfH1LRY/AHo2yufpwflcKzuT8yJmcXXti85oWRTM1CTu/PvpaerN/X96v4TGUo6L1R/mjEZ4yK+F0dFRu8ZlI0KbyY6SyI2ZhBOFCIpBiNHjmTHjh1MmTKlzG6sJ0+eTFpaGjNnzjzFjTXAkSNHaNu2LevXr6dq1eJnxVRYMSggGMSyaBHxY8di/uknAtWr4xw0iPz+/ZGl2NazOCLRUCgiA6vlC6yWNIyGPwjKODyeu3B57scfuI7yvFebTAbGxZtZaDURG5Q8lOfl8Twv/sOCsR/FM2N2DEYDPKh5SI2r6uJH60LmxMximWUJARHgBm9Tnpz/Arfa78XkNuOzeuHTHORN3jNn4CJAF4OyoYtBKSkQhY0Rmk0E6n4GGzduPGVzmtIyffp0Jk+ejMPhOGUGUgFjx45l27ZtfPTRRyWmU+HFoAApMa9eTfy4cVhWrCBYpQrORx8lb8AAZBEhLA2RbSgkJuN6rJYZWM3fIoQbn78hbs/9uL33IGXZ81fAdqPCuHgL31iNWIC+eT4G53kI7Fd4d0I8X3xtw2qRPNY/r9BD6lElg69sc9izfj+j7v6AmPyTohm0BQhMzaoUgqCLQdnQxeA8Ec39DADatm3LqFGj6NixY4lpXTBiEIJp82bi3n8f2/z5BGNiyH/wQZyPP06wRo1SpxGthkKIHCzmL7FZ0jAZtyClBY+3Oy5PX3z+Npzt0Noezf/RbJv6G6n+j7wEdym8PT6eb36wkZgQ5IkBTh7tl0dsrMR0RS2E5/R322v18MWhD7jV3ZmkCK2lqIjoYlA2dDGo5FyIYlCAcedO4saPx/bll2A0kn/ffTgHDyZw2WVnjHsuGgqj4Vesls+xmuegKCcIBC7D5bkPt8dOUNY6qzQPGgQTYi2kxZrwAre7/aTmeghuVXhrXAKLllmpVjXAU485eWpEYrHpvPrus6y/eQVx19jo4ulOZ3dXagXPtYuQ6KKLQdnQxaCScyGLQQGGffuImzCBGIcDAgFcd92lrmq++upi45zbhsKFxTwPm2UGZtMapFTw+m7B5bkfr+8WoOzPPUMRTIw1MzVW9X/U2e3j6VwvcqPCm2MTWLnWwp/ZEM74KIVESPWdz0nKZk37pazpsJTMm45w5ZX16erpVqY1DBUVXQzKhi4GlZyLQQwKUI4cIe7jj4mZNg0lPx9Xt244U1PxXX/9aWHPV0NhUPaqvQWLA4OSQSCYgttjx+3pQyB4RZnTyxIwOdbM/+IsZCuCth61p8AKA2P7JzMj79T5TflGHyOmvcSb9YejrLUg1pgJrFEw71NlIzvpOOvaL2d3u+3YWhu5vv51NA40QUSmjTin6GJQNnQxqORcTGJQgDh+nLhPPiH2k09QTpzA3b49ztRUvK1bF65qPv8NhR+z6UdtQdtihAjg9bXSpqjeRvhv+uJxCpgeY+ajODMZBoWmXj+ba1ThJh+k5ampuYC+sbDKIrnqcj91agWoUztA3doBrrIEuSrDQNIfXiy/KCTuUwe9s5OO83P79eS3yaZWixSuaXAVBuVcbG1afs7/b3zu0cXgdHQxKCUXoxgUIJxOYqZNU1c1Hz2Kt1kzclNT8XTqRHJKSoVpKBSRjtUyS5uiuo9gMAG39x7cnr74A2VbMOgGPo8xMSHOwl8pJ1clK0ZJUAA3BKBhkO4HA/x1yMBfhwxkZZ/auBsMkhuqBrjFHOBv3nwaHzNSN0t9d04knmD/Tbsx3xTkkhtrY2ooKuweh7oYlA1dDCo5F7MYFOJ2EzNzJnETJmA8cABfw4aIESPIuPlmMFSkr9wgJuNabJY0LObvtSmqTbTewj1IWfyAcFF8QL3GtcEs1ca6nw9e8kEN9R395UguyUH17/x8wcHDqjAcPGzgr8MGDhb8fcjAkQwDNX2CNn5o64c2fqin7d6Zbfbx+6X55DcyIG8MkNDCS526fpISZTRdS5UKXQzKhi4GlZxKIQYF+HzYvvqKuPffx7RrF/569XAOGUJ+r15QxgV/0UaIbKzmL7FaPsNk3I6UVjzeHrg89+Pzt6I07+yV9urktwqeIgKhVA8EaegL0sgfUP/3BWjgD542nO33w5GMk+Kw/5Dkzz3ZJG030uRAIq2z4qkXULsHxwWsNsIGW5A/agfw1g9QSzNF1a0doE6tAHVrBahRPYDRGIEHVQK6GJQNXQyKoUuvZJpd5+XZwU5qpER+E/NoubAGWLhwIW+99RZSSqSUPP/88/To0SNsPiqVGBQQDJKyZg3y3//GvGULgZo1cT7xBPn9+iFjzt6dRHSQGA1bsVrSsJrnoii5+AOX4/b0we3pTVAWv7bi1NXzAm/Ia/7PE252mBS2Gw38blIK3a2YpORKvyoMBQLR2B+kWjD8ux0gwEbTBpbv2oprYSzXrmhK65+v49JMtR5nGyXrzLBUClYZYacCUqimqFo1AifHLWppQhEiGjZb+doTXQzKhi4GxVCncW3MJolQ4L678yIuCtFyYS2lpHHjxsyZM4drrrmG7du3c/fdd7Nz586wfo8qpRigvTRHj2JZsYK4sWOxrFlDICmJvMceU1c1J5beJHPucGE1f4vVkobZtA4pDXh9nXB5+mhTVMN/amcogg9TEpmqBMK6VPEBfxgVtpsUdhgNbDcZ2GFSOGI4WV+qB4I08gVp6A/QyBegkS9I/SK9CInkV9NWFljn8cuRLdRafSmtl3ag/ZIu1Nyvuv72xgc5dLmfnSlB1tkkG/MVDhxWTVGBwKntUNWkk8JQVCjq1g6QVCVYoilKF4OyUSnF4JX/JLD9t5IbvjUbTpoNhFDLVT1ZrYTmEjwXN7rax8i/l913UaRcWEspadKkCVOmTOHGG29k7dq1vPjii6xcuTLsfSu1GIS8NOYNG9S9mhcvJhgXR97DD5P32GMEU1LOYy6Lx6DsxmqZic3iQFGOEQjWCJmiWu+08MnJyWw/nlkmlyqZimC7UWGHyVAoFKG9CLPWi2ioiUMjX4BGIb2IPcbdLLDOY4H1B44fyqLVspvp+uOdtF7akcQ/1ZXPMimAbO3F38JLxtU+9trgYLqhcHD7kGaa+uuwAZfr1I8Zmy1YaHaqU/t0wWjSKInsbF0MSosuBsUQKgYhyRIfJ2nS0FdsvLMRg2AwSN++fenSpUuJHkaL47nnniMxMZFXX30VgBUrVjB48GBiYmLIy8tj6tSpNG/ePGxcXQxOxfjrr8SPH4/1m2/AYiGvb1/ynniCQN265yGXpcGH2bRYW9C2BCGCeH1tcHnuJ8Y6HpPx9O3Cff7GZOUsOMu7wR6jUmhiKuhFpIf0ImoUjEX4AoXjETHBAyyxzWeBdR7rzWupvf8Seizuye0/3kvD5ddj/Us1z8mqAWQrL8E2HmRrL/IqPyggJWRlK4XjFgWD3Qc1oTh42EDm8dNnRdWsflIkipqj6tQKEBNTYdqsiKCLwelExExUgNkkURS47548nn3CSfUIjyFE0oW13++nX79+vPjii9x4441s2LCBJ598kqVLlxIbxsunLgbhMezZo65q/uILkBJXz544hwzB36DBOcxl2VDEIawWBzbLTAyGP5HSDPgR4mR9ldKMy9MXZ/7oiN67oBexvaAXYTKwy3h6L6KRL8Blfif5Yg2/m2ay3vI9XuGhyZ7reWjRE7Rb2olaKy5DOaiavIoTh3C4XOqsqALBOJ4dz+97PIW9i8Ppp5uikqqc7E3Urnm6KapqUsmmqIqGLganE7Exg2iKAETehfUvv/zCM888w9KlSwvD3Xzzzbz33ntcH2Ylri4GJaMcPEjcRx8R89lnCI8Hd48e6qrmCr1hUBCTcRU2y2Qs5vmnNGZSmsnMXlviwHOkKOhFbDepIqGORyhkhPQiUgIBqgeO4BebOWiYh1fZQHzwCPf+1puei+6n8YqmmFfFIA6FiEPrIuJQTPNV9Df2+yH9qMLBQ8bC3sRfh7TexRlMUXVqnuxdhJqjap6DWVFlQReD04nIbKLm13ujJgIQHRfWGRkZtGvXju+++44GDRqwa9cu7rrrLlatWnXangegi0FpUTIziZ04kdgpU1Byc3F37Kiuam7ZMoq5LD/xMc9htcxGiJM7oAUCdfD4OuP1dcbraw2c22m1x0J6ETs0odhlVPBpqqVIHwZ+Iyh+xih3cr1Xod/OG+jxY0fiVldFWW0+KQ7VwvQctOasrL+xlJB1QnDwkLFYc1RRU5SiqKaowoHuIlNo69Y+t6YoXQxOp8KvM4imC+s5c+Ywfvx4hPZyvfjii3Tr1i1sWroYlA2Rk0Ps1KnETpyIITMTT4sWOJ9+Gk+HDlREe4Ii0qlWpTVCeJDSgtM1FLNxHWbTcoRwE5QxeH034/V2xuO7FSmTz0s+fcDuEIHYZlTYag6SpYRM5JCHSJIHudYj6Pn7pbRfWo2aK00YiohDsLUX2dpD4m0xZCUfi1DzpuJyo4mDMewivcPpBvz+001RoeMURQe6q1WNnCnqohcDu92+D8hF3UPc73A4wo+GnqTCi0FFQReDs0O4XMSkpRE7YQLGQ4fwNmmCMzUVd/fuFWxVM8TF/B2bdToud/+QsQIXZtMqLKaFmM2LMChHkFLgD9yAx9sZj68zgcA1RLQlPQuOKYJtJoUllgxWWrLZa4zBLS5H3QEXDNJPfZ+XW3eZ6bDEQMMVRqqvMGM4rP4GoeIQbOOFK4s3K0WCQACOZCgcOmwsND2dMivqkIH8IqYoqzV4UiTCzIyqWT1ASa9m6Jqoxg2rVgoxaO5wOEpbSl0MSokuBuXE68U2dy7x77+P8Y8/8NWvj3PIEFw9e1LiG3wOUUQ61ZKeJjNrHEEZbg9kidGwDbNpIRbzQkzGXwAIBOri8XU6b+ak4vjNsIfPYzeyyHqE/cYEJI1R5PX4hdarkdB8t+SeZWZaLoYrlxuJOaSJQ3KAYCtNHG7yQoPoikNRpITsEwXuP041Rx3SehnHMk83RdWoHqRuLX9Yc9Qtd1UvXBP1UN8gTzx89KzWROliUMnRxSBCBAJYv/+e+HHjMG3bhr9OHZyDB5Pfpw+UcTwoGpSlvIo4gtm8WO01mFZo5qRYzZzU6byak4pySDnIQm3K6jrzHvyiEQmBtlQLdsRvbMQBYvAhqPeH4OYlBrouUWi+1EjSIfXrPJAcgNCewzkWh3AUmKIKexcF4xYhs6KKmqIKUITEZD67hbIXihjsBbIACXzkcDg+DhPmceBxAIfD0czrPXWP1wMHDpR5kLYysH//frZu3Uq/fv0qlRgYjUb8fn/kE5YSMX8+hjfeQFm9Glm9OoGnnyY4aBAkJJw5fpQ46/JKF/AjQn4PfI/gEBIBtESKHsBtQOMKMV6SyTHmKd/xjfIlS8RiPMJDNVmbVsFHuEzegV80ZJtQ2IokZq/gpqUG2iw1cPMSAzUOquLgqS4JtA+S2D6Iub1EuaZCFO0UAgE4nA5//iX484DgoSdPn86kKJI2LSSLvyr9b25WV9NWeDGo7XA4Dtnt9urAQiDV4XAsLyGK3jMoJXrPIHqY160jbuxYrEuXEkxIIG/AAHVVc9Vzv+9wZMorMRp+DTEnbQEgELhENSd5O+P1t6IimJOcwsnmlI3M9jpYal2MU3ESF4yjo7sTXdzdaOzrxD5jHDuMBrYZFU4cMFFnhYlWywzctNRAbU0ccqoHSW/rQ7b2ULWFm4T657/nUJRT1kSZJYo4u+nwF0TPIBS73f4q4HQ4HG+XEEwXg1Kii0H0MW3Zorq6mDcPabWS368fzkGDCNY+d3sOR6O8qjlpERbTojDmpILZSdXOnFCUKCizBw+rLStZYP2BRdb5HDdkYpYW2nna08XVnVs8nakarIoX2GVU3W4cOWDEutpC3ZVGblxqpJZmVjpWI8jv7fwcv8mLsZWHSy73cXkgWIxXqHND6JqogjGDs5kOX+HFwG63xwKKw+HI1f5eCIx0OBw/lBBNF4NSoovBucO4axdx48djmzMHFIX83r1xPvkkgcsvj/q9o19eF2bTSm120uKTs5P8TfH4CmYnXc25/KwOV+YAATaZNzDf+j0LrD9wyHgQgzRwo7clXV096OzuSq3gqSKdIQT7/jLhXmshfpWZK1aYSNHEIb1mkLUdAvzR1o+zjYfky/009Ado6AuQdI5m2oeuiWp0Mc8mstvtVwBztUMjMMPhcLx2hmi6GJQSXQzOPYYDB2RnY48AAB8JSURBVIj78ENi0tLA58N1xx04n3oKf6NGUbvnuS2v6nbbYl6I2bQQk3ErUGBO6ozH2xmfvyXRNiedqcyhXlYXWn9gl+l3AK7z3kAXdze6uLpxRSCM+xEJ3r0Gjq+zElhrodpKMwlHTorDqg4BVnUIsKudn5jLfTTSnPk19ge53B8k0hOPa1zfBcPRzNPOB1Kqkf5z6X1QnVcxsNvtwuFwREM/IyIGYpUZY/9qCI9AWiT+aZnIm7xnjlgKormfwaJFi3jrrbfw+/1UqVKFd999l0svvTRsPnQxOH8oGRnETppE7NSpKE4n7k6dyE1NxVeMU8HycD7Lq4jDIbOTVmrmpDi8vvZRNSeVtcx/GHazwPYD863z2GL+GYArfVfR2d2Nru4eNPY1QYRrKyWw14BYY8G3xoKy2ow1XW3yM2sGWd4hwMoOAVZ2DHCoQZCrC/aLKNw3IkCVcrSCtes0K/baoYObSp/OeRaDdx0Ox3ORuHkRyi0GYpUZ40NVESELRKQtiH/q8YgIQrT2M8jOzqZt27Z89dVX1K9fn9mzZzNnzhw+++yzsPnQxeD8I7KziZ0yhdhJkzBkZeFp04bc1FS87dpFbCpLxSmvC7NphTrOYP7/9u48Sqry3Pf4d1d3DT0w2oI0DgRREYEABiRoiBicPTHL43mJMcYYz+VmnYhxwmiWORqiVy/JzYpRYyJRE3BIHjmaRGNCxIiIqDEqdqOiBmMWk6IyQw817PvHrm66m27ooXZVD7/PWrXo3rtq7/elu+up93mnpRRFPsT3PZKp46lvTCcdTS7ek7pS56ZDVv8We5GMl2F46tCgxVB7JsfXT6aorc/4PvBeEZEX4ngvxPBeiBPJBocdh2RYNT3FUzPS/OWUDO8d5YMHlanmO86N6UArolcEgxDtNxgU/Xd/vDf3/8bnvRjD8/f9f/E9H39q28HAH5MkPa9w+xmsWrWKK6+8kmeeeQYIgs7YsWOprq5mcCujWBQMug9v925KH3yQ8l/8gqIPPqB+woRgVvNpp0EHV7NtqTvWFzIUF61uJZ10OHXJmdl00lQaZht3VK7qvCWyhb/Gn2JJyZOsiD9HvVfH4PRBzKw9ndNqz2Ba3UnE95fyahEcIivjeJuDt/naoWne/1ySlz+f5k8zMvz1WEhHgvedhO9zTIulwFtrRXTHYHDADnXn3L1mdmn2aw9YYGb/mYub51prgaDheK5DXiaTYeHChZx22mkAjS2ABolEggULFrT5+vvvv59TTz0VgJEjR7J582ZWrVrFhAkTeOyxoLtlw4YNrQYD6T78sjJ2z57N7osvpnTxYsrvuovBl15K8uij2XXZZdScey7daonMLouQSo8nVTOe3TVXZ9NJS4lHn6Ik/hClifuy6aSmo5Py/zs8ODOY82tmcX7NLHZ5u1gef4YliT/xZMkfsLKHKM+Uc3LtFzi99kw+X3cKZX6LpeI94Mg0mSP3wFf3kG4IDivjRF+IcczyOKMXJ7gIyBySZtu0et75XIrnT06xYrTHkkQxDxftDYjDUw07zgXB4Zv5/M9op/b8lo5s+MLMfOfckSGWp03t+eQeHTkMr66VlkHcJ7V4386arrjhhhsoKyvjkksu6fBr7777bt59910eeeQRAPr378/dd9/NTTfdRF1dHTNmzGDAgAEU96o3kV4uHmfPhReyZ9YsSp54gvI77mDQ5ZfT70c/CmY1OwfZxQ17k4w/jNq6i6itu4iW6aRE7I+hpZM6otwv56zaf+Os2n/bZ8jqE6W/b3XI6j6aBoeLssFh7d6Ww6AVcaY+WsJU4KpD0mQ+W8e2afW8OT3F34+hcee5Z+LFpD2vWwaDA6aJnHOPAEuAlcBngTPN7PwQytLt+wwa5Ho/g5Y++ugjTjjhBFavXk1pK5u9K03UA2QyxJcupd9Pf0rstddIDx3Krtmz2XPRRfitbFjUmh5V331kWoxOWg00pJNOpa5+ZqvppHzWeX9DVk+rPZNTa06nMjO8fRdrERwiK+N4H2XXVjok3bhcd81n63h3lM/ET5/KwR/uW88PhlaQeXVJu+uQ1z4D51wpwZIRRwFvA780sz25uHkL3X40EYSznwEEexoMGTKETCbD3LlzKSsrY968ea1eR8GgB/F9Ys8/T7877iC+YgWZgQPZdeml7L7kEvw2Pgw06JH1bUPE29hkdNLzweikTL8gnZSc2ZhOKlSd2xqyOr5+AqfXntn2kNW2Lwhri4N9HF6IE3kh1iw41B6VIvpijOLk3vfxPaU+X3miFjtma7tvk+9gUAT8B3AwcDcw0cxezsXNW+j28wzC3M/gmmuu4eWXXyaZTDJ9+nRuuummxnu0pGDQM0VffZXyO++kZMkSMqWl7Pna19g1ezaZoa3vYNbT69u2PU0muy2lKLIZ34+QTB1PcexLbN06jXTmKAq5hkSnh6y2pWVw+EOi1dfXJHyK1m5q92XzHQweBp4FvmZm05xzS81sZi5u3kK3DwbdhYJBz1a8Zk0wq/l3v4PiYvbMmhXMam4xr6S31Hf/MhQXVRGPLW2RTjoiOzqp9XRSPnVpyGobYsPbXtakfsPGNs+1lMtg0J6xbweb2c+B2lzcUKSvS40ezbY77mDzc8+xxzlKf/tbhpx0EgPnzKH47bcpefRRhkyZQjSRYMiUKcFSGL1WhFR6ArtrrmHrjiVkvLXs2H0rqfSRlMQfYFD/C6gYOJ7+Zf+bRGwxnrcl7yWszAzn4t3f4MFPHuGlD1/n/279McekRvNg2SIuqPh3pg6dyPUD5vJM/GnqqGvXNf146x/C2zqeD+1pGTwE/B64ApgPnGdmF4VQFrUM2kktg94l8sEHlP/iF5QuWkSkpgY/EsHL7F20LFNSwvb584PNd3q55j/jPS0muzWkkz4TjE6qP5V0ZhSFSic1HbLadJXV/Q5ZzcrVYJd8p4kSwH8CxwJrCOYZhNFKUDBoJwWD3snbsoWh06YR2blzn3OpYcPY/Pe/F6BU+dX2z7ghndQwOukNoGk6qWGyW2H+Htq7ympTuRjsktdJZ8BoM7szuy/B14ERBEFBRHLIHzwYb9euVs8Vb9rE0OOPJzl2LMnx46kfN47k+PFkDjkkz6UslCCdlKqZwO6auUQiG7LLcC+lJP4ApYl7s6OTTqYueSr1yRl5newWJ86Mui8wo+4L3Lz9tmZDVp8e9FSrQ1b9E+vZ8P4qrh5yOT/efAcHZ1rb3jR/2hMM/h/wBWAeQUfy/QTzDUQkx9KVlRRv2LDP8cyAAdRNm0a0upr400/jZVv06SFDGgNEQ5DIDBvW/bb6yrFMZjg1dRdTU3cxe9NJweikRPzxbDppcjBsNc/ppCKKmFI/lSn1U7lhx/ebDVmdN+B7zBvwvcYhq2uK3+RFbyV3lP+EeTv+T17K15b2BINS51wciJvZw8657jh5TqRX2HnddQy49loiNTWNxzIlJWy/+ebGPgNv926ib75JtKoqeKxeTXzZssZ+hvRBBwXBIdt6SI4bR3r48F4cIEqpT55OffJ02JOhuOj1xnRSeektlJfeQio9ojEwBEtx5yed5OExLjmeccnxXL3zO82GrP6w/62Nz1tc9lvm7LqioK2D9gSDBwg6kG/M9h/8M9widV6u1ghvS1eWsAa4/vrrefnll/E8j2g0yvXXX8/nPvc5IJh1fPnll7Nu3ToSiQTz589n0qRJXS6z9CwNb/j9bruNoo0bSVdWsvO665p1HvtlZdRPnkz95MmNx7yaGorfeINodTWxqqqgBbF8OV46DUB68OAgODQ8xo8nfdhhvTBAREilJ5KqmcjummtbpJMWUZr4JZlM/2w6aWbe00kj06P45q7L+Oauy7h6wOU8Xvo70l6aDJmCtw56zKql7ZGrlQDb0pUlrAF27NhB/+wG62+88QazZs2iuroaz/O46qqrOPzww7niiiv429/+xpVXXsmKFSvwWvljVQdy39Dl+tbUEH3rrcbWQ6yqiuK338ZLBRuuZwYOJDluXND/0BAgjjiioAEizJ+xx26i0eeyrYanKYp81CSd1DA66UjykU7aHPmQk4d+ljpv71DUhJ9g2YcvdKh1kO8O5G6h/3//iOib73T69QedP7vNc8kxR7Nj3jUHvMagQYMaAwHApEmTWLhwIQB33XUXa9euZdGiRcRiMR566CHmzJnDggULGpewbggEEAS6pm/0jz/+OC+99BIAU6ZMIR6P8/rrrzNhwoSOVVSkQUkJyUmTSDZtYdbWEl2zhmh1dfCoqqJ8wQK8ZBKATP/+ezupG1JMI0Z0eTnu7sCnjPrkGdQnzyAYnbSqcbJbeenNlJfenE0nNezsNoWw0kl3lv+ETIu1lNMFbh20GQycc4PMrP2LZPQxnV3C+oc//CGPPfYY27dvZ8GCBXiex5YtW/B9v9ly1cOHD2fjxo0KBpJbiQTJCRNINv29qqsj+s47e/sgqqspu+8+yuuDYY6Zfv2CANGkDyI1cmQPDxARUulJpGomNUknPZVNJ/2a0sSCJumkhtFJ+19LqiNejb1C0ms+jDTp1fNqrHDDh/fXMrjVOTcIeBf4C7DSzFIduXh2XaO/AxvM7JzOF5N2fXLfX5rok8X3dOX2++jsEtZz585l7ty5rFixgltuuaVx7wKRgonHG/sSuPDC4FgySfHbbxPLth6i1dWULVyIVxtMMcqUle0NENkgkTrySCjK9W7B+RGMTvo6NXVfb5ZOikeXkoj/Ad8vIpmaHASG+pnZ0Umd98THe/swu0v6s81gYGbfBHDOHQWcCszObm7zEvComa1vx/W/DbwF9D/QE3uSefPm8c9//pNf/epXRDr56eikk05i586drFmzhvHjxwNBB3VD62DDhg0N+UCR/ItGSY0dS2rsWLjgguBYMknxu+82pphiVVWUPvAAkYYAUVpK8rjjmgeIUaN63OY+TdNJOxvTScHopH6lP4DSH5BKf6rJ6KTw0kn5dMCfkpm9S9A6+Fn2k/4JwDBgv8HAOXcocDZwC3DV/p6bK+mDD2pzNFGu3HbbbVRVVbFo0aIO7WXg+z5r165l1KjgE8Xrr7/OJ5980rjp/TnnnMPChQsbO5Bra2sbg4RItxCNkhozhtSYMdTMmhUcS6Uo/sc/mvVBlD78MJH77gMgk0iQGjOmWR9E6qijoMcMfGiaTvoOkcj6JqOTGtJJA1qkkwYWutCdEtpoIufcYuBWoB9wTWtpIufcbIK9EjCz4+vrm+fQ1q1b1+E9A8LUlSWsM5kM559/Ptu2baOoqIhEIsHcuXOZPn06EOxnMGfOHNavX09JSQm33nork5sMHWzqX//6F9XV1Vx44YV9ajRRcXExqVSHMpU9Wo+tbzoN77xD5LXX8F59Fe+11/BWrWqcXe0nEvjjxuFPnIg/aVLwOPZYiMV6Vp39XcDTeP4fgT/hsRmfIuBEfO9M4GzwjjngZbpS51gsBvlam6gznHPnAGeZ2X85506mjWDQgtYmaicNLe0belV9MxmK3nuvWR9EtLqaSEOAiMVIHnssRZMns/Poo4M00zHHQAd3EiycDMVFr2XTSUuJFr8FEKST6oMtP5OpybSWTurKzznfC9WdYWZ/ds6NAq4Efmtmyw/wmluBi4AUkCDoM3jUzL66n5cpGLSTgkHf0Ovrm8lQ9P77zSbKxVavxtu+HQA/GiU5enQwgqlhyY3Ro3vEXtJ700lPEYuuxPPqm6WTShN3Ei3ed4m3ZOo4tu5o/wTZfM8zuAb4M/BdYAHwU6D1/EWWmV0PXA/QpGWwv0AgIn1NJEJ65EjSI0dSe+65AFQcdBBbX3mlsfUQq66m5I9/pOzBBwHwi4tJHX10Y/9Dcvx4ksceC90onQyQyRzaZHTSLmLR5cRiTxGPPk0i/nt8H3zfw/P2fhj3/RjJ1GcKVub2BIN+zrnDgbSZveCc2x12oWT/utGscZHc8jzSI0aQHjGC2i9+MTjm+xStW9csvZRYsoSy3/wmOF1URCqbWmrspD7uOPxuEiB8yqlLnkVd8ix2kqa4aBWJ2O8oSdzf4pkR9tRcUZAyQvuCwa3AzcDN2bWJXuzIDcxsGbCswyUjeNPzfb/VJRn6Kt/3SWfXmxHpEzyP9OGHkz78cGrPyXY9+j5FGzY0jmBqWM211Cw4HYmQOuqoZhPlkscdh1/W+mYz+VNEKn08u2qOBy9FSfxhPC+J78eoqZtFxu/eC9V9CrjOzBoS+teFWJ5mEokEe/bsoazgP8DuY9u2baxbtw5AQVL6Ls8jfeihpA89lNozzwyO+T6RTZv2dlJXVRF/9llKFy8OTnseqVGjms2DSI4di19eXpAq7Km5gpL4b7PfFbZVAO0LBu8B851zA4DHgcVmlpeNSGOxGPX19axdu3a/k7sikQiZJtsE9la+77Nlyxa2bt3KoEGDKO5hk3lEQuV5ZCorqa2spPb00xsPRz74gGhVVRAkqquJr1xJaXZfad/zSI0cuW+A6B/+PNmMP5SaulmUJB4oeKsAOjC01Dk3GPg5cAawBLjdzFbksCz7jCZqsG3bNp5//nlazkNoUFpayp49e3JYlO6tpKSE8847r0/VGfrA6JoW+lp9IX91jmze3KwPIlZVRdGmTY3nUyNGNJsolxw7Fn9g7ieTRbwPOWjQ5Xyy9Y5OBYO8jiZyzp0JfBkYRNAymJ29+e+B6bkoxIEMHDiQs88+u83zffGPpq8FQJFcygwZQt3MmdTNnNl4LPLxx836IKKvvELJH/7QeD51xBHN+iDqx43DH9S1xesy/lD8yNNk/MK/f7UnzzAO+K6ZNduLzzn3v8IpkohI/mUqKqibMYO6GTMaj0W2bNkbIKqqiL7+OiVPPNF4PnXYYc07qcePJzM4f5vl5FJ7Jp0dQTBnoBy4GLjEzH4ZQlnaTBMdSF9sGajOvV9fqy/0jDp7W7c2zoFo2Dio+P33G8+nhg9vPlFu/HgyFRX7XKfk0Uf3u6Nde+R70tm9wGXAz8ws7Zy7AAgjGIiIdHv+oEHUT59O/fS9WXJv2zaiq1c364Mo+dOfGs+nhw0LdpTLtiCK1q+n/803N+51XbxhAwOuvRagwwEhV9oTDIrMbI1zruH7nryjhYhIzvkDB1J/0knUn3RS4zFvx45mASJaVUXiqafw2sjGRGpq6Hfbbd06GPzVOfdzoNI5dzvwVMhlEhHp8fz+/amfNo36Jlvlert2EX3jDQ4677xWcztFnUyV58IBP+Wb2Q+AO4HvEaSMfhZ2oUREeiO/vJz6E04gPXx4q+fTBdzQqs1g4Jx7xDkXAzCz1Wb2CFBHsAWmiIh00s7rriPTYu2kTEkJO6/L2wIP+9hfy+A3wJPOuYEAzrnTgUXA1/JRMBGR3qrmvPPYPn8+qeHDg1nQw4ezff78gvUXwAGGljrnpgE/JJhxPA34spltC6ksGlraAapz79fX6guqc0flcmjp/tJEPyBYemIDwR7GrwFXOefm5eLGIiLSfexvNNHS7L9PA3floSwiIlIgbQYDM3s2nwUREZHC0QQyERFRMBARkfbNQO6w7PaYy4F49h6LzezGMO4lIiJdF1bLoA44xcw+DUwAznDOTQ3pXiIi0kWhtAzMzAd2Zb+NZh/t21JNRETyrt3bXnaUc64IeAUYBdxlZt9p5TmzCXZOw8yOb2tbywMpLi4mlUp1obQ9j+rc+/W1+oLq3FGxWAxyNOkstGDQILucxWPAHDNbvZ+nagZyB6jOvV9fqy+ozh2VlxnIuZJdvmIZwWxmERHphkIJBs65g5sscFcCzATWhHEvERHpulA6kIFhwK+z/QYRwMzsiQO8RkRECiSs0URVwMQwri0iIrmnGcgiIqJgICIiCgYiIoKCgYiIoGAgIiIoGIiICAoGIiKCgoGIiKBgICIiKBiIiAgKBiIigoKBiIigYCAiIigYiIgICgYiIoKCgYiIoGAgIiKEtNOZc+4wYCFwCJAB7jGz28O4l4iIdF1YLYMUcLWZHQtMBb7lnBsT0r1ERKSLQgkGZrbJzF7Nfr0TeAsYHsa9RESk60JJEzXlnBsBTAReauXcbGA2gJlRUVHRqXsUFxd3+rU9lerc+/W1+oLqXEie7/uhXdw5Vw48C9xiZo8e4On+xo0bO3WfiooKPv744069tqdSnXu/vlZfUJ07qrKyEsDLRTlCG03knIsC/wM82I5AICIiBRRKMHDOecC9wFtm9uMw7iEiIrkTVp/BicBFQLVzblX22HfN7MmQ7iciIl0QSjAwsxXkKI8lIiLh0wxkERFRMBAREQUDERFBwUBERFAwEBERFAxERAQFAxERQcFARERQMBARERQMREQEBQMREUHBQEREUDAQEREUDEREBAUDERFBwUBERFAwEBERFAxERISQtr10zt0HnANsNrOxYdxDRERyJ6yWwa+AM0K6toiI5FgowcDMlgNbwri2iIjkXihpovZyzs0GZgOYGRUVFZ26TnFxcadf21Opzr1fX6svqM4FLUchb25m9wD3ZL/1P/74405dp6Kigs6+tqdSnXu/vlZfUJ07qrKyMmfl0GgiERFRMBARkZCCgXPuYeAF4Bjn3Hrn3KVh3EdERHIjlD4DM7sgjOuKiEg4lCYSEREFAxERUTAQEREUDEREBAUDERFBwUBERFAwEBERFAxERAQFAxERQcFARERQMBARERQMREQEBQMREUHBQEREUDAQEREUDEREBAUDERFBwUBERAhp20sA59wZwO1AEfBLM7strHuJiEjXhNIycM4VAXcBZwJjgAucc2PCuJeIiHRdWGmiKcA/zOw9M6sHfgOcG9K9RESki8JKEw0H1jX5fj1wQssnOedmA7MBzIzKyspO37Arr+2pVOfer6/VF1TnQgmrZeC1csxvecDM7jGzz5jZZ7Kv6dTDOfdKV17fEx+qc+9/9LX6qs6dfuREWMFgPXBYk+8PBTaGdC8REemisNJELwNHOec+BWwAvgx8JaR7iYhIF4XSMjCzFHAZsAR4Kzhkb4Rxr6x7Qrx2d6U69359rb6gOheM5/v7pPJFRKSP0QxkERFRMBARkRCXo8gH59x9wDnAZjMbW+jyhM05dxiwEDgEyAD3mNnthS1VuJxzCWA5ECf4fV1sZjcWtlT5kZ3J/3dgg5mdU+jyhM059z6wE0gDqeyQ817NOTcQ+CUwlmD4/TfM7IVClKWntwx+BZxR6ELkUQq42syOBaYC3+oDy3zUAaeY2aeBCcAZzrmpBS5TvnybYABGXzLDzCb0hUCQdTvwZzMbDXyaAv68e3TLwMyWO+dGFLoc+WJmm4BN2a93OufeIpjt/WZBCxYiM/OBXdlvo9lHrx/14Jw7FDgbuAW4qsDFkRA45/oD04GvA2SX7qkvVHl6dDDoy7JBcCLwUoGLErpsuuQVYBRwl5n1+joDPwGuBfoVuiB55AN/cc75wC/MrFsMuQzRSOAj4H7n3KcJfse/bWa7C1GYnp4m6pOcc+XA/wBXmNmOQpcnbGaWNrMJBDPZpzjnenX/kHOuoR/slUKXJc9ONLNJBKsdf8s5N73QBQpZMTAJuNvMJgK7gesKVRgFgx7GORclCAQPmtmjhS5PPpnZNmAZvb+f6ETgi9kO1d8ApzjnHihskcJnZhuz/24GHiNY/bg3Ww+sb9LSXUwQHApCwaAHcc55wL3AW2b240KXJx+ccwdnR1zgnCsBZgJrCluqcJnZ9WZ2qJmNIFjK5a9m9tUCFytUzrky51y/hq+B04DVhS1VuMzsA2Cdc+6Y7KEvUMD+vx7dZ+Ccexg4Gahwzq0HbjSzewtbqlCdCFwEVDvnVmWPfdfMnixgmcI2DPh1tt8gQrC0yRMFLpPk3lDgMeccBO9LD5nZnwtbpLyYAzzonIsB7wGXFKogWo5CRESUJhIREQUDERFBwUBERFAwEBERFAxERIQePrRUpCXn3OeBGwk+6KSB75nZSufcduBVgrWNvgFUAjPN7Ibs624ClpnZsibXKiVYFuLo7OvuMbNfd6FsAwkW3etTkwWlZ1DLQHoN51wF8H3gS2Z2MvAloCZ7utrMZgBXE6z50x43As9mr3US8M8uFnEgcF4XryESCrUMpDc5C3igYb0mM9sJvNbiOasI1jhqj2lm9p3stXyCfRVwzv2UYDntHcCFBAsGzjSzG5xzX8++dhlwH7AF+BRwLjAbONU5twz4DzP7qONVFAmHgoH0JpVANYBz7ivAfwEvmtk1TZ4zHXi7szdwzk0GysxsunPuq8A3aXvl2EEEy2dcAPw7wcbnh/f2pSWkZ1KaSHqTTQQBATN7CPgqUJE9N8459wxBgLgNqCXYPa1Bgr0ppf05kqDvAYJdyEbRfH8Fr8nXb5pZBthAkCIS6bbUMpDe5ElgsXPOzGw7zX+/G/oMAHDO1QMTnXMNH4gmAfNbXG+lc+5CM3swu0jgiQTrx5yWPf8ZYC2wnWANJYBxQFX265ZBIgkUdaWCImFRy0B6jWwO/vvA751zfwV+RrBndGvP/YRgKfDlwHMEeytvafG07wOfz+b4nweONLO/ATXOueeArwA/J3jzr3TOPQkcvJ8ifgAMds4tds4N7mQ1RUKhhepEREQtAxERUTAQEREUDEREBAUDERFBwUBERFAwEBERFAxERAT4/3pI+jvKKYlBAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEkCAYAAADHDTFTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXl4FEX6xz81M5nJJIFwhIRLQBAIp3IIciPnolyCDLeCioiIrPrTRV1dF1xE13VFOQTRIKBAc8jpKpfciAICyhG55QyEcCSZe6Z/f/QkmYSQZEIukvo8Tx6Y7uqqerurv1X11tFCVVUkEolEUrLQFXYGJBKJRFLwSPGXSCSSEogUf4lEIimBSPGXSCSSEogUf4lEIimBSPGXSCSSEogU/ywQQtQQQqhCiLaFnZecIIR4RwhxvLDzcTsy5k8IMUII4S6MtO8gno6+MlHV9ztHZUQIcVoI8fc7TT8H+cuze+qza1hexCUpOHJa1qT4Z81ZoBKwG0AIUdX3QnQs1FwVHxYDVQo7EwGyE61MXCjsjNyGu/GeSgoBQ2FnoCijqqoHuFTY+SiuqKpqA2yFnY9AUFXVSREuE3fjPZUUDgXa8hdCtBVC7BBCJPr+DgghuvvOZdp9FkIcF0K84/dbFUKME0IsFkIkCyH+FEI8LoQIF0J87Yv3pBCiv981KXEPEUL8IISwCiGOCiE6CCGqCCG+88V1WAjRLpPrUvJ01vfvj77jp29j5zNCiHOZxLPA79hIIUScEEL4fkcJIeYKIa74bNghhGjvF14IIT4XQpwQQth8Nk4WQpiyuN/lfPFsEUKUyerZ+ML3EUL86rs/14UQPwshmvidryWEWCKESPCFOSiE6Ok7V1YIscD3PGxCiFghxCsp9t0mvXQuipTfQog2Qoh9vjR+EUI0y3BdFyHEb0IIuy8PHXLqovCVgZO+azcIIe71O3eLa8hXZlUhRA3f73Run9ukcb8QYqcvjT+EEJYc5Cuntj8khNjqu8fXhBDfCCEiM8aTi3gf9t3LlHv6cCZ5zK6M/s1Xbmr4HfuHEOJqVvfLL+w7Qnvf+wjt/UwWQvwohKiVIdwjQoi9QgiHEOKyEGKGECLU7/xc37MdJ4Q4J4RIEkLMEUIECSGeE0Kc8d272UIIo991XYUQm33l+4bvvWmRIW1VCPG8EGK+7x6cFUK8liHMECHEbl8c8UKItUKIOhnCNBFC/ORXRh4XGdw1QogwIcRUIcR533P7VQjRL0M8AZe1FApM/IUQemAVmgulqe/vHcCai+jeBL4D7gfWAPOARcB6oAmwFpgnhCif4bpJwEzgAeAIsBD4Cvjcd90R4BshRNBt0m3q+7c/Wtf/wduE2whUEULU9f3uDFwBOvmF6QT8qKqqKoQwAz8CpYAevrx8B6wXQtTzhRdAHDAEqAf8FRgJvJFZBoQQ1YDtwEWgm6qq12+T15TwFYElaPekAdAK+Bhw+53fCZQFegONgLcAry8KE/Ab0Beoj3av/wmMyCrdTNAB7wHj0e73NUARQhh8+ahC+nL0EvBRDuOuBDwPDATaod3vFULcvoIKFN+z/A64DrQEngReBSKzus5HdrZXBNYB54AWQC+gIbDsDuOtjPYe7fWdfwWYmold2ZXRD9Cey0IhhEFoDam/AyNVVT1HzqgEjAGGAq2BMsCXfvlojPb8t6K9x08CPYHPMsTzINAc6Ir2zgwDVvri7AEM9/097XdNGDAdeMgX7hjwfSY68g+/9P8NvJ+hsjShlf+mvvQ9wNqUikYIEYJ2766gPccngJfxKyO+MrkaTeMGoj3nmcAiIURnX5g7KWugqmqB/KGJhgp0vM35Gr7zbTMcPw684/dbBT72+13Bd+zTTNLqmSHuv/qFedB37BW/Y018xxpmliegalY2ZMj3KeB53/+/RhPCm0B937FzwLO+/4/w/TZkiGOTv62ZpPEScMzv9zu++9UYOI9WkHU5fD4ptte4zflJaO6O0ACe+VRgfcb8+f0eAbgz/FaBpn7HHvIdq+v7/S/gNKD3C/MXX5hhWeTlHV+Y+/yO1fEd65JZ/nzH2vrfF6Cj73fV25SRZ4AkoKxfHA19Yf6eRf5yYvskXzkx+oW53xem/R3c03eBM/7lD01QU+8pOSyjaMJzEZiB1lOeGkB5eQetsVHB79ggtAZGsO/3fODnDNf18YWp7vs9F7ic4T6tBeIBk9+xlcDSLPKjQ6soh/odU4FPMoQ7CryXRTzlfNe18f0e5Ssj4X5hov3LiK+c2f3D+I5/Cay4k7KW8ldgLX9VVa8Bc4AfhBD/E0JM8GsZB8oBv3ivoNWsBzOk5eTWGvCA3/9T/LYHMzmWs5oza34kraX/MPADsA3o5LO7CtqLA1pFVBG47uuiJgkhktBap7VTIhRCjPJ1J+N8598DqmdItwJaq2ShqqpjVVX1kjMO+vL4uxDiWyHEeCHEPX7nmwE7VVVNzuxiIYTO90z3+7q6ScBzmeQvO1TSP6fzvn+jfP/WB35RtfGYFHblMO4rqqqmunVUVf0DTRDqB5jHrKgPHPGVwZR0fgdu5ODa7GxvAPykauMOKXEf8MXd4A7irY8mqP6zhLZniCNHZVRV1cvAU2it96vAawTGBd877Z9XQdo72QCtfPuzxRfG/zke8b9PaO92rKqqjgzH/Fvb9/rcOceFEDfRGmvh3FqG92f4fZ60e4kQ4gHfO3RKCJEI/Ok7lRJPShlJLROqqh5Fa8Gn8CBgBM5nuN/DSLvfd1LWCnbAV1XVUUKIqUA3tO7QJCHEC6qqziLNfZCxC56ZC8aVg2Mqt7q1XBnO3+5YXlSKm4CpQogGaF3ln33HOqNVVmf9hEiH5nJ6LJN4rABCiAFoLfkJaIX9JjAArSXsz3U0Ie8jhPhYzWF3W1VVjxCiB1qh64Lm2poihBigquqalGBZRPEK8Dpa93UfkIjWM3k0J+n74c0g7Jk9k4z5uJOtaf3Lm5eclb/s4sttfnJje3bHcxJvZnnO+DvbMupHB7QyHoUmnpezyFtGnBl+5/YeZKYH2WnEGrTGwFi0XosTrRI0ZrguszzqINWls8533VOkNSgPZYgnuzKiQxPxzFzLKenfSVkr+Kmeqqr+rqrqR6qq9gC+AJ71nUqp7SunhBXaQFZRmraWctP1OQi7Ea279xKw1deq2oT2YnQhrdUPsAeoCdxUVfV4hr+UKYXtgV99926vqqrH0FwOGXEB/dD871uEEDlueasaP6uqOllV1fZolcxI3+m9QBv/gbUMtAe+V1X1C1VVf/VVbLVvE/ZOOAw86BtDSqFVDq+tIPwGD32DcOXRRA00kYrMEHdTAuMQUF/4DbD7GgDhAcZzu7hbZRikvN8X96E7jLdlBrszrlvISRlFCNEF+D+0caEzwFd5Oabiy2uHDMc6oIng4dxG6vPr1wemqKr6g6qqh9HcLoF6Aeqh9b7fVFX1R1VVj6C5of3vwWGgnhAitUz4vAH+kzL2+H4HZ3K/U3oSd1TWCnLA9z4hxPtCmz1RXQjRCq3LeBhSp6jtAF7zjWA3QxvIddw+1gInHs3H1k0IUVEIUfZ2AVVVvQjEog3CpAj9frTWZW/Si//XaGMEa4UQ3YQ2O6ilEOJ1IURfX5hYoJHQZkLUEkKMRxP5zNJ2ARa0ArRFCFEzO8OEEK2FEG/50q3mG1RqTNoLNQOtvKwU2syRe4UQPX29hZT8dRTarJE6Qoh30Qah8poZaC3KmUKIer6BtpTeT3atICsQI4RoJoRojjbY/xuwwXf+RyAErUday9fbGhtg/r5B6/Us8JXjh9D8tHkx/XIaUBqYK4RoKLRZaPOB7aqqbruDeGeiCdZs3z3tzK09ymzLqBCigi8/H6qq+h0wGG3g9OU7yFtG/g00FUJ8JISIFkL8BfgU+NpPFHPDNbQG6Chf+W2FNvkh0Od2Bk2zxvnKUGe0sS//svk1mo7ME0I0FkK0RGsI2/zCbUIrl8uFEI8JIWr6yu04IcQoX5g7KmsF2fJPRmsJLgL+QJuhsBN4wS/MU2g3Zacv3Gy0waMigc9/PhZNWM8Cv2ZzyUY019om3/UqWms69ZjvuB2t9bIHiEG7P8vRZgKc8QWbhfZixfjSbYk2QHa7vLrRZjlsR6sAsmuF30BrQa9Em+XwJVohneSL7yJaazARbYbBITSBSGnRTPLZthLNB18W+CSbNANGVdXzaJVna7TKdCrajBLQWmpZcRGtTC1Da2jYgMfUlJE8VY1FG4wbBPyOVh4znU2VRf6swCNoPYqf0e7hfwnM9XG7uOPQXKZVgV/Q3BS/o7no7iTe82gzh1qQdk9fzhAmyzLqa93PRSuvb/muOYU27jPZV9neMaqqHkR7/h3QxjHmow3mPneH8XrR3Ki10Nymc9FmuwWkP6qqxqP55buivSMfovWEvH5hUspIFNpzXOBLKwlfGfaVyd5o9/gjtEHltWhu1BMZ4slVWRO+ci+R3LUIba75FqCxqqq/FXZ+JJJA8blnTwO9VVVdXSBpSvGX3G0IIcagtfouoPlp/wtcU1X1oULNmESSQ4S2IPE8miutOtoaiSi06bcF4uqWe/uUEIQQb/hPGcv4V9j5C5DqaG7BWDR/9TYCn1UkKUB840i3LX9CiKGFnccCpjza1PejaGMLf6Kt1SiwMU7Z8i8hCCHKoc0+yhT/+e8SSV4jtNXENbIIEqeqamIBZUeCFH+JRCIpkUi3j0QikZRApPhLJBJJCUSKv0QikZRApPhLJBJJCaRIfcnLYrF8ibaV7GVFURpmE7Ya2vL8Mmh77UxQFOW7/M+lRCKR3P0UtZb/XLS92XPC3wFFUZQmaMvxZ+RXpiQSiaS4UaRa/oqibLVYLDX8j1ksllpoWxlXQNuYa5SiKEfRNkAq7QsWTtH9oLZEIpEUOYpayz8zZgPjFEVphrZBUkoL/x1gmMViOYe20di4wsmeRCKR3H0UafG3WCxhaLs3LrFYLPvRdras5Ds9GJirKEpVtJ3t5lssliJtj0QikRQVipTbJxN0wHVFUR7I5NzT+MYHFEXZZbFYgoEI8mDrXIlEIinuFOmWsqIoN4FTFotlAIDFYhEWi+V+3+k/0T6JiMViqQcEk/Y1MIlEIpFkQZHa28disSxE+2p9BBAH/APtoycz0dw9QcAiRVEmWiyW+sDnQBja4O9riqKsK4x8SyQSyd1GkRJ/iUQikRQMRdrtI5FIJJL8oSgN+MouiEQikQSOyD7IrRQl8efChdyt04qIiCA+Pj6Pc1O0kTYXf0qavSBtDpTKlSvnOl3p9rmL0cVdofTr7xHRbXBhZ0UikdxlFKmWvyRn6OKuoP/nf4n6agmoXoTTVdhZkkgkdxlS/O8idHFXKP3uVIJXrcfjEehVKfoSiSR35Jv4WyyWl4Bn0AZyfwNGKopiz6/0ijP6M+cwr91I2Mdz0CVbAemvk0gkd0a+iL/FYqkCvAjUVxTFZrFYFLRtl+cGEo+qqiQnJ5PdWgSbzYbb7c5tdosmNju6hOvorl5Dl2zFWqsaZz55G+s3W7h/40YMuDHgTQ1u3P4zzjYPgsjVwL9EIilh5KfbxwCYLRaLCwghF1suJycnYzKZCAoKyvPMFXlKlYLICukOGVUVW+NWvDn/JWp+9jVPORcRhBsDHiIGjsH5QAOSXnwKe9f2oJN9A4lEcnvybYWvxWIZD/wLsAHrFEUZmkmYZ4FnARRFaeZ0OtOdP3fuHMHBwfmSv7uZLdtOMfqlSEKSw3nL8R/aeX5hRtCTvOGZQTXnnzjuq43+9TF4B/aGYlRxGgyG4tfDy4KSZi9ImwPFaDRCLuf554v4WyyWssAyYCBwHVgCLFUUZUEWl6kZ5/knJiZSqlSpPM/f3c7W7X/yxjsnOHVxKMLoZEhvJy2bu1i23EiVHd/zN9t0GnuPcLNcZRzjhuMa3hvMd38lWtLmgJc0e0HaHCi+ef65Ev/88g10AU4pinJFURQXsBxtX35JHiGE1lJQnUaWHz1L8IDFzI9JYPzmtix441uerh7DbzeqUOGf7xPasDfXJsyDG4mFnGuJRFJUyC/x/xN4yGKxhFgsFoG29fKRfEorHebly4ls0YJKVasS2aIF5uXL8yzuhIQEhg8fTrt27ejSpQvPPPMMV69eTRdm8uTJVK9enY8++ijdca/Xy6hRo1KvHTRoEKdPn74ljY8++ogqVapw9OjR2+aj1D3xBL06iZFnh/PJyY+5d1MvXiw3hm6RHdhV62ueH3ONiTsac3n5F/zzkW/4WW1Eg/lTCWvYi1jLLOJ+u54n90Mikdy95Iv4K4qyG1gK7EOb5qlD+xxjvmJevpzw117DcP48QlUxnD9P+Guv5VkFIIRgzJgxbNu2jQ0bNlC9enUmT56cev7DDz/kwIED7Ny5k+3btzN9+vR01w8YMIAtW7awYcMGunfvzmuvvZbu/G+//ca+ffuoUqVKlvkoc8nGPX+qTGq3gWFfG/j+yo9MT5hNqDeMCWVf4eGo1swNm0OjB28w6vO61PjtI76ZsJRfKrSnw47Pif7Lo+xu9TEb5iZgs8nZQRJJSaQobemcpc+/9NtvE3T4cJYRGPfuRWQYNAZQjUaczZrd9jpX/frcnDgx4AyvXbuWefPmsXjxYqZPn87vv//O1KlTMRqN2O12xo0bR4sWLRg1atQt1x48eJAxY8awY8cOABwOB48//jjTp09nwIABfPXVV0RHR2ea7p9btxL75ps8efIkXrOZGx98gK1fP1RUtpu2MjPsU3abdlHOU44Ryc8wPHkEpdVwAC7vPIfj3fk0ObgSneplaUgffu3xDK1HVqX5A64iPVO0pPmDS5q9IG0OlDvx+RevFb6ZCH+Wx+8Ar9fLvHnz6NatGwBjx45Ndz44OJjPP//8ttfHxMTQtWvX1N8ffvgh/fv3p1q1agHlQ2ezUWrKFGz9+iEQtHN0oJ2jA3uMv/BZ2Kd8VPoDZofNYFjyCEYmP0Nk66rw3etcPf8UiZMW0vd/yxi0bDkrV3bnxXufp87QaB7vbaVSlDf7xCUSyV3LXSP+OWmZR7ZogeH8+VuOe6pU4erSpXman7///e+EhoYycuTIgK+dOXMmx44dY8mSJQDs2bOH/fv388Ybb+QqL/rz5xHXrqGWLZt6rLnzQeYkzOOw4Xc+KzWd2WEziAmbg8U6iFFJY6hSpSphn/2VawkjMM5aTPcvF9Hn2A/8+G5r3vzPCzjaPYTlMRvdO9sJNuUqWxKJpAhTrFYCJU6YgNdsTnfMazaTOGFCnqYzceJETp06xcyZM9EFuJgqJiaGb7/9lvnz52P25fWnn37ixIkTPPTQQ7Rs2ZKLFy8ydOhQtmzZkqM4BVCxWTPKjBuH8eefwc+VV9/dkE+uzWTd5S30sfZjUcjXdIpsw6tl/spxwzHUcmVwvD6aa/vXcuPtl2hd5hjfJw7h40192Dh+J807VGDCxHD2HQyi6HgIJRLJnXLX+Pxzinn5ckpNmYL+wgU8lSuTOGECtn798iyTU6ZMYc+ePenEO6csWLCAmJgYFEWhfPnytw3XsmXLgHz+iePHY7h4EfPy5egSE3HVrYt12DCs/fujhoenu/ai7gJfhM1iUcjX2IWd7vYePJc0jkauxloAh5OQZWsJnf4VQafPcr70vUzkBWLE49SoJbD0tdG/t5WoCgXvFipp/uCSZi9ImwPlTnz+xU7885PY2Fg6depEzZo1U1ceV6tWjS+++CLba5OSkoiOjqZq1aqpNplMJtasWXNL2GzFf+dOjk6YwDC7PV3lJqxWzCtXEjJ/PsYDB/AGB2Pv04fkYcNwNWmSbt+fBF0CX4V+wbzQGG7qbtDO3oExSeNo4XwIgQCPh+C1Gyk1bS5Bh2K5WaYic8o/y1vxT2LXm+nY1oGlr5VuD9sxGQO+lbmipAlDSbMXpM2BIsW/hHHq1CliY2P5y1/+ctswQb/9Rsj8+Zi//Rad1Yqrfn2Shw3TZgX53dNEkcg3ofP5MnQ28forNHU25/nEcXR0dNYqAVXFtHkXYdNiMP20D1d4GdY3fIL/uzyKI1ciKFPaS99HbVj6WmncIH9nC5U0YShp9oK0OVCk+JcwciL+KYjERMzffkvoggUEHTqENyQE22OPYR02DFfjxqnh7NhYGrKY2WEzOW84R7SrHs8ljuMRe0/06AEI+uUApabFELxhG97QEGIfHsC/DaNZuKsGdocguraLAX2t9HvURmQ+uIVKmjCUNHtB2hwoUvxLGIGIfyqqStD+/YTOn0/wypXo7Hac99+PddgwbH36oIaGAuDCxWrzCj4Lm8aJoONUd9dgdNJY+lr7Y0Kb9mM4coyw6XMxr1wHBj3X+/REqT2a2Tvqse+AEb1epVM7zS3UpYMdYx65hUqaMJQ0e0HaHChS/EsYuRJ/P8SNG5iXL9d6A0eP4g0Lw9a/P8nDhuGuXx8AL17WB//AjLBP+N14kIqeijyT9BwDrUMJUUMA7SMzYTPnEaKsBpcbW6+uHO7zNHMPPcDSVSHEXdFTtoyHfj01t1DDene2W2NJE4aSZi9ImwNFin8J407FPxVVxbhnjzY2sGYNwuHA2bQpycOGYe/dG9VsRkVlh2kbM8I+YbdpF2U9ZXky+WmeSB5JuFoG0D4vGTpnIaHzlqJLSsbeqQ03nh/JevdDKCtC+GFjME6XoH5dF5a+Vvr1tFG+XOBuoZImDCXNXpA2B4oU/xJGnom/H+LaNUKWLiVk/nyCTpzAGx6O9fHHsQ4bhrtOHQD2Bv3CZ6WmsSl4A6HeUIYmP8lTyaOo4I3U4rh+k9CvlhA65xv0CddxtHiApBdGcqlpW1Z+H8KSFSHs/92IwaDSpYMdS18bndrZc/zJgZImDCXNXpA2B4oU/xJGfoh/KqqK8aefCFmwAPPatQiXC0fLltrYwCOPQHAwRw2H+SxsOmvNqzAQxADrQEYljeEej7Y1hbDZCFm4ktCZ8zBciMNVvw6JL4zA/mhnjp4KZsnKEJatMnPlqp7y5dLcQvXrZu0WKmnCUNLsBWlzoEjxL2Hkq/j7obt6FbOiELpgAYbTp/GULYttwACShw3DU6sWp/WnmB02k+UhCl689LL15bmkF6jt1noKuFyYv/2esOlzCTp+GneNqiSNeRLrgJ649UZ+3G5CWRHC+h+DcbkFjeo7sfS10fcRG+XK3uoWKmnCUNLsBWlzoEjx99EzohtHjIduOV7P2YA18evuOIMJCQmMHz+e06dPYzKZqFGjBu+//3661bqTJ09m1qxZjB8/npdffjn1uNfrZfTo0Rw9ehSTyURERARTpkyhRo0agLawy2QyYTJpM2refPNNOnbsmGk+Ckr8/TKPcft2QhcsIPiHHxBuN47WrbWxgR49uBR8lS/DZvNNyHxsOhvdbD0Yk/QCjV0PpF4f/MNmwqbNxbj/EJ6oCJKeHYp1WH/UsFASrgtWrA1BWWHmt8NGggwqXTvasTxm5eG2Dgy+HahKmjCUNHtB2hwoRfFLXoVCU2czgtT08wqDVCNNnc3zJP783s9/9uzZrF+/nvXr199W+AsFnQ5n+/Zcmz2buF9+4eaECejPnqXc888T9eCD1H43hrd+G8HWyz8zLvElfjLt5LEKj/JE+UHsMu5A1QnsPToRv+Yr4hfOwF27JuGTphLVsiel/j2TCG8CTw1N5vsl8axffpkRQ5LZvc/IiLHlad4pion/Lk3s8btmD0KJ5K7grmn5Tyr9NkeCst7P34mT/cZ9qCLNJqHqeMDZBCO3n2xez1Wft24W7n7+2W3p4E+Bt/wzw+vFtGULIQsWELx+PcLjwd6+Pdbhw4nv1opvwhfxZdhsrugv08TZlDGJ43jY0QWdr70R9OvvhE2Lwfz9ZrzmYKxDHiNp9DC8VSoC4HLBpm3BKCvMbNgSjNstaPaAl349b9Knh42yZYpMuc03ZCu4ZCDdPnkg/gCn9Ce5or+MKlSEKqjgieReT80sr8mN+Hu9XgYPHky3bt14+umnA7oW4KWXXiI8PJx33nkH0MQ/xdYHH3yQCRMmEJ5hU7YUioT4+6G7eJGQRYsI/fpr9Bcv4omMxDpoEAnDHmdJ7R3MCpvBOcNZ6rrq8VzSWB6x9cLg203c8MdJbcHYiu9BCGz9HiHx+Sfx3FcjNf6rCTqWrzGzbHUpfjuswxik0q2TnYGPWWnfKs0tVNyQQlgyKFbib7FY6gKL/Q7VBN5WFOXjLC7LkwHfy7o4Oka1wiEcBKvBbI7blToVMS954403uHTpEnPmzAl4W+eZM2eydu1alixZkroz6Pnz56lSpQoOh4N//OMfJCcn8+mnn2Z6fVET/1Q8HkybNhE6fz6mTZsAcDz8MDeeGMKyRxP5LHwmx4L+oJq7Os8mPU8/64DUVcP6cxcJ/Ww+oQtXgMOJvUcnksaNxNW4Xmr0ERERbN52HWVFCMvXmLl2XU9UBQ/9e1mx9LVRu9adLSIrakghLBkUK/H3x2Kx6IHzQEtFUc5kETTPZvu8Xfp1vgmdz5DkJ5h4c3L2FwTIxIkTOXLkCHPnzk0doM0pMTExLFy4kMWLF1PW7+Mr/hw5coSRI0fy008/ZXq+yIq/H/rz5wn55htCFi5EHxeHp1IlEocOZtXoSkyv9jUHjfuJ8lTk6aRnGWQdRqiqbS+hi0/QFox9paC7mYS9fUuSXhiJs3VzIipUSH1JnE7YuDWYxd+GsGmbCY9H0KSxk4GPWen9FxvhpYtMjzbXSCEsGRTnAd/OwIlshD9PeSHprzzobMm4pL/medxTpkzh4MGDfPnllwEL/4IFC1iwYAELFy5MJ/xWq5WbN28CoKoqK1eupEGDBnma74LGU6UKia++Stzu3STMmYOrbl3C//Nfhtf9Gz/2jmTRhgnUdNVkcvhE2ke14JOwj7guruGNKEfihLHE/byWm2+MI+jIcSIszxHRawRi1TrwalNAjUbo0cXO3OkJ7N0Ux1uv3sBqFUz4ZxmadKjI8/9Xhs3bTXg8hXwjJJIiSkG0/L8E9imKMi2boEV+nn9+7ed/5swZRo0ahdfrxePxULt2bSZNmkRUVFSmcd0NLf/M0J85o/UGFi1CHx+Pu2pVtk7oyH+HnmVDmS2EekMZbB3O00nPEun12W6zE6KsJux4ek5VAAAgAElEQVSz+Rj+PI+rbi2Snn8SW59uZFwarKrw2+EgFn8bwoq1Zq7f1FExysPjva1Y+lqpVePuqglkK7hkUCzdPhaLxQhcABooihKXyflngWcBFEVp5szwofWzZ88G/LWsksCZM2eIjY1lyJAhhZ2V3OF0IlavRj97NrrNm1ENBn4f24EPX/WwtPJWDBgY5n2SFz0vcy++wXq3G8Oy7+D9aegOxaJWr4rnpWfxjhgI5uBbknA4YO06wVeL9KzbJPB6Ba0e9DJ8oJfH+3gJL13ANucCg8GA2128xjGyQ9ocGEZty9wiKf59gLGKonTLQfAi3/IvKtytLf/M0J88SejXX2NevBj9tWv80b4K//l3JIua/45HeOlp68PopLHUdUdrLaTLlzFt2E6paTEY9x7EE1GO5GcGk/zkANTSmZeVuCs6lq3SFpEdOxlEcLCXR7rYGdDXStuWTgIcry8wZCu4ZFBcff6DgYX5nIbkLsZTsyY333qLuD17uDZtGtWd9zCr5a8cvldlzIoarDeu5ZHIzowu+xR7xM+g0+Ho1p74lV8Sv3Q2roZ1KT1lOlEtHqXUe5+iu3L1ljSiKnh5/ukkflx1hTULrzCgj40NW4IZ/EwELbtG8sEnpTh1Rl8I1kskhUe+tfwtFksIcBaoqSjKjRxcIlv+OaQ4tfwzw3DsGCHz5xOydCnX9DeY/nZZZo6ycT3ETitHG8YkjqO1s632mUkg6LcjhE2bS/DajWAyYh3Ym6QxT+C5p/Jt07A74IdNwSjfhrBlpwlVFbRspn2Apmd3O2GhhT9bSLaCSwbF0ucfIFL8c0hxF/9UbDbMa9YQumABjqN7+OJ5PR//LYi4cnbudz7AmKQX6WzvmrpqWH/iDGEzvyJk6Vrwqtj6/oWksU/irlsry2QuXNKxfHUIi1eEcPK0AbPZy6Nd7Vj6Wmn1YOG5haQQlgyk+EvxzzElRvz9MBw5QvmlS3EsXcDCx5L48O9BnK7moo79Pp6zjedRW+/UVcO6C3GEzf6akAXL0Nns2Lp3IOmFkbiaNsoyDVWFvQeCUFaEsOp/ZhKTdNxTxc2APjYG9LFSrWrBzhaSQlgykOIvxT/HlETxB+0lufrnn5hXrsS4cD6r6hzg328IDjdUuSc5imedf6W/zYIJbfaPLuEaoTEKoV8uQnf9Jo7WzUkaNxJHu5Ygsn5fbDbB9xuDUVaa2bZLcwu1elBzCz3a1U5oAbiFpBCWDIrrgG+hEHdFR/8ny3P5St6al5CQwPDhw2nXrh1dunThmWee4erV9AOMkydPpnr16nz00Ufpjnu9XkaNGpV67aBBgzh9+nTqebvdzoQJE2jTpg2dO3e+ZcdPiYYaEoJ18GCur/qOTiN+YPMHQ1AsJir+FsdbZV+nY+kHmGP4mCSRhLdcWRJfGU3cz2u58fZLGE6eofzgsUQ8MlwbH/De/lOSZrPKYz1tLPw8gd3rLvPaize5GKfnpTfL8kCHKF7+exl+2mOk6LSdJJLAKJbi//HMMHbvNfLxZ2F5Gm9+bun8r3/9C5PJxPbt29m4cSOvvvpqnua9OOJu2JDEKR/Q/N2DrNzyHqufqkHDXYm8F/lvOpRqxCfWV7kmElBDQ0gePYy4nau4/sGb6G4mUe7Z16jQ8XHMi1eB05VlOlUqexg/Oont313m23nx9HnExpofgun/ZARtekTy35lhnLsgZwtJ7i7uGrfP2++V5nBs9h97dTph30EjqirQCZUm9zsxZnNZ/bouJr5+M+AM59WWzsnJyTRv3pw9e/YQGhqabbol2e2TZfdYVQnav5+ju6byaYuNrO7tJdSqY9jhtjxZ7l2ign0Dvx4PwWs2UmpaDEGH/8BdOYrk0cOwDnkMNSRniwqtVsF3G4JRVoSwY7e2zUeblg4GPmblkS52zOY7f6+kC6RkIH3+eST+J0/ruRyvR1UFQqhERniomc2y/tyIf15u6Xzo0CFGjRpFjx492LlzJ6Ghobz22mu0aNEi02ul+GePuHGDP7fO5LOKX7Hk0ZvoPTBoZ22eCXqTKjW7aoFUFdOPOwmbFoNp9694yoaT/PRgkkcORC2T8yXAZ8/rWbrSjLIyhD/PGQgL9dL7LzYsfW00b+LMbnjhtkghLBlI8c+DAd+4KzpadY/C4Ui7F8EmlV0/xBFZ4fb+3dyQl1s6Hzx4kB49ejBt2jQee+wx9u3bx4gRI9ixY0em9kvxDwBV5eLhtXzhfZ9vOpzEFQT91pdlzOWnubfNc6i+7UOMv+wn7NMYgjduxxsagnV4f5KeHYo3qkKOk/J6YfdeI8qKENb8EIzVpuPe6m4sfa3072WlSqXAyqAUwpKBHPDNAz6eGYaa4f3yeslz3//EiRM5deoUM2fODFj4Y2Ji+Pbbb5k/f37qvkVVq1bFYDDQt29fAJo2bUq5cuU4efJknua7RCIElRr05O+NtrHlzI8891Nrvm97nc5PfMgLf0Tzx9znMPzxB84HHyBh3lQur1+EvWs7Qmd/TdRDvQh/7V/oT5/NUVI6HbR60Ml//3Wd/Vvi+Ojda1SM9PD+1NK07BrF4FHlWLHWjM2ezzZLJDmgWIn/3gNGnK70laDTJdiz//afcAyU/NjSuVy5crRu3ZqtW7cCcOLECeLj41M/7i7JGyqUqsMr9y5h683feOXwIHa1FfR4YzVPeB/m4HudCF6+DHfNe7g+fTKXty3HaulNyJLVRLbrR5nn38Bw6I8cpxUaqjLwMRtL515lx//i+OtzSZw8bWDsa2Vp0qEir70Tzt4DQXK2kKTQKFZun/wmv7Z0Bm2nzldeeYVr165hMBj429/+RqdOnTKNS7p98oZkkYyifs6csM+4VDqRpr/A/30SSufQIdiGDMNz333o4q4Q9vk3hMxbii7Zir1TG5LGjcTZoknA6Xm9sPNnI8rKENauC8Zu11HrXheWPjb697ZSKSp9t1W6QEoG0ud/F4h/UUGKf97iwMGK4KXMCvoPZ0rFEX0YXn0Pep95COfgJ7D36IGw2gmdqxD6xUL0CddxtHiApBdG4ujUJtsFY5mRmCRY84MZZYWZn/eZ0OlUOrR2MKCvle6d7Ny4qePFCZF8OuVyno9XFWWk+AeGFP8ShhT//MGNm/8Fr2Wm+b/Emo9R7ayel9/zMHRVOdS+g7EOGYI3MoqQhSsI/Ww+hgtxuOrXIfGFEdh7dgF97ub6nzyjZ8mKEJasCuHiJT3hpb1EVfBw7KSBoQOSef8fgU9DvluR4h8YUvxLGFL88xcVlR9NG5gR9im/mvYSmWDkxQ9cjJqhYmzSHuvw4dg7dMS8ZgNhM74i6Php3DWqkjTmSawDeoIpd2NMHg/s2G1i3iIz/9toRnunVerUcnN/QxcNorW/+nVdlAkvMu9tniLFPzCk+JcwpPgXDCoqPxt/YmbYp2wL3kK41cRzs4J44V9JlNVHYh00COvgwQT9fpywaTEYDxzGExVB0rPDsA7rhxqW/YK9zHh9YmkWLg/F5RLodCoVIz243YLL8Wk9i6qV3b7KwJ1aIdxTxZPrNQVFBSn+gSHFv4Qhxb/gORh0gM/CpvGD+TvMLiMjVkXx0kvnqHoOHA8/TPKwYaimUpSaMQ/Tjl/wlilN8siBJD81EG+5stkn4COrtSoAh2ODOHQ0iENHDRyKDeLEKQOqqoUtXcpL/bppPYQG0S5q13LntiNSKEjxDwwp/iUMKf6Fx3HDMWaFTWeleTk6VTBwdzT/98ol6u6Kx1OpEslDhuBq2ISQxWswf78ZrzkY69B+JI0ehrdyVLbxvz6xNIuWh6absmwMUhncP5nJb93q+7daBUePG3wVgvZ35A8DNps2i9tgUKld052uQqhf10XZMkXmvU9HUXjGBY0Ufyn+OUaKf+FzTn+WOWGfsThkIW5c9DzVjP+brNL8y70gBPYuXXB06obxl8OYV/wAOoG1/6PaF8buq3HbeLv1j+DQ0Vub6g2inaxbljPbPR449aeeQ0eDOHw0KLW3EHclzW1UpVJ6t1GD6KLhNipKz7igkOIvxT/HSPEvOlzRXSYmdA5fh35Fki6JTtda83JMJTpO2Yr+yhXcVatie7Q3uusuQlauA4cT+yOdSBo3ElejelnGndf2XonXpXcbHQ3ixGkDXq+mHaXCMrqN3NS5z1WgbqOi+Izzm2In/haLpQwwB2gIqMBTiqLsyuKSPBd/XdwVwj6eg3HvQeLX3fl35BMSEhg/fjynT5/GZDJRo0YN3n//fcqXL58aZvLkycyaNYvx48fz8ssvpx73er2MHj2ao0ePYjKZiIiIYMqUKdSoUYOzZ8/y1FNPpYa9efMmSUlJHDp0KNN8SPEvetwUN5gfOpe5oXNI0CfQ0t6SFze24JEp+wjevgPVYMDe4WHUUlEEb9yFLjEZe4eHSHphJM5WzTJdK1AQ9tpsgqPH/NxGsUEcjr3VbVQ/2kUDX8VQP9pFuXxyGxXlZ5xfFJb4G3KVYs6YCnyvKMrjFovFCITkY1rpSBH90MWrQfUistmvPaek7OffunVrACZNmsTkyZP5z3/+A6Tfz3/cuHGYTCbGjh2bev2AAQPo0qULOp2OmJgYXnvtNRRF4Z577mH9+vWp4d5++208noL9ZKDkziithjM2aTwjk0exOOQb5oTNZOiju2nQtREvnJhEv0/PEbp4CfqEBNzVquNs3pygg7FEDBiNs2kjEseNxNGlHQX9wWCzWaVJYxdNGqe9Ix4PnD6rT60QDh8NYsdPJpatSnuFK1d0p3MZpbiNCut7x5LAyZeWv8ViKQ0cAGoqipLTBLJs+Zd++0OCDmezt4rThf7cBfSXta9rCT/bHK2a3fYyV/063Jz4fznMZhp5tZ9/OhOcTpo1a8Y333xDo0aZf3NWtvyLPk6crAhZxqyw6Zw2nKKW6z5GXx/NwGUmwuctxLRrF6ohCFe9pugvJaK/koCrbi2Sxo7A1qcbGAxERESQcOhInvZe74T4qyluo7SewvFT6d1G9eq40o0l1LnPRXAAW2DdTc84ryhWbh+LxfIAMBs4DNwP7AXGK4qSnCHcs8CzAIqiNHM6neniOXv2bOrOlzkR/6DfYxGJSZneibwW/7zcz9+fNWvWMHXq1HQ9gYycOXOG2NhYhgwZEnC6dzMGgwG3213Y2QgIDx5W6r7lI937/KY7yD1qNV70vMTww60Im/M1ugULENeuoVasAWooIu4qakRZcN2AG0lACOj1CI8Hp+NMYZtzCzYbHDoqOPC74MAhwcHfBQcPCZKt2luo16tE11a5v6FK44Yq9zdQadxAJaJ85vHdjc/4TrkTm41GIxQx8W8O/AS0URRlt8VimQrcVBTlrSwuu2Ofv+5yvM/dswq86d09F87vDciG7MjL/fz9GT58OB07dsyyQpEt/7sPFZXNpk3MDPuUvaZfKO+J4KnkUQy9aiFy9VZC58/HuGcPqgjRKgH0aENlae91Xpfh/MLrTe82OuSbcXQpLm22UaWKntQxhJS/alU9REbevc84txQ3n/854JyiKLt9v5cCE/IprVS8kRHcnDyBpL8+c9tKIC9I2c9/7ty5ud7Pf/HixbcI/6VLl9i1axdTp07Ny+xKigACwcOOzjzs6Mwvxt3MCPuUf5d+j8/CpjNs5JOMsHxBxUNXqNBtEELVpV7lT/jL72Ad3BdXs8YFPjYQCDod1KzuoWZ1D726p3284GqCjkOxBg77VQo/bjfh8Wh2hoV6ub8h1KlVOtduI0nOyc/ZPtuAZxRFibVYLO8AoYqiZPVV8ryf7ePrCRj3HMgzf+mUKVPYs2dPuo+x5JQFCxYQExODoijpZgil8Mknn3Do0CFmzZqVZTyy5V88+D3oNz4L+5Tvg7/DpJoYaB3Kib0x9FdCeSImBJ0Hgp3+FYDWE1CD9LhrVcHesSWOzu1xRUejlitXWGbcETY7/HE8rTL444SZA79DslWr3PR6lfvuTRtYrh/tomG0m3Jli89Op8XK5w+pfv85gBE4CYxUFOVaFpcU+Xn++bmfP0Dbtm2ZNGkSDz/8cJZxSfEvXpwwHE9dNezxuhEqVIjXMWFS+krg+tujCd60k6Dfj6O7bkUgAA9gx1PWjKthXdzR0biio3FHR+OuUwc1pMAm2eUJERERXL4czxn/2Uax2hTUi5fS3EYVozxaheDnOqp+z90526jYiX8uKPLiX1SQ4l88uaA/z/T4F1lU/6fU1znqko6//yOUYf+rTsJPP6SGFTcSMSsrCVnxPUEHYxFeL6pRBx4rwpMEuFGFwFOtmlYZ1K2b+q+7Vi0ICiocI7Mhq2eccE1zG/lPQT120pDqNgoNSVmkltZTqHOfC3NwQVoQOFL8pfjnGCn+xZvXb/RjWe3dePxG5Bo4G9He0YG2jg40dTbHSNqyW3H9JsE/bMa8ej2mbbsRbg+eyPK4762EalLRX/wTw8mTCN/aETUoCHetWrjq+vUU6tbFc889hT6WEOgztjv83UaG1J5CUnKa26hWDXe6VcsNol2UL1d03EZS/KX45xgp/sWby7o4Oka1wiEcGFUjI5KeZp9xL78a9+IRHkK9oTzkbE07e0faOtpTw3OvzwUEIuE65u9/JHj1ekw79iA8Hly1qmN/pBPORrXROa0Yjh4l6OhRDLGxGM6mfZzeGxKi9RDq1k3rKURH461QIVdfK8sNefGMvV7481z62Ua3uI0iPdqqZb+/GoXkNpLiL8U/x5w8eZI//vhDin8x5u3Sr7MwdAGDk4cz8eZkABJFIrtMO9hm2sw20xbOGv4E4B53Ndr5egWtHW0opZYGQHf1GsHfbcK8ej3GXXsRXi+uOjWx9eqKvXdX3Pfdi0hMxPDHHwTFxqarFPR+99lTtmy6HoI7OhpX3bqopUvnud35+YwTrou0mUaxmtvojxNpbqMQs5f6ddOvWq5bO//dRlL8MxH/pKQkjEZjykKGEo+qqly/fp0TJ04QHx8vxb8Yc1kXxyuRL/LR5U+p4I3MNMxp/Sm2mbawLXgzPxl3kqxLRq/qaeJsRjtHB9o5OtDQ1Rg9enSX47WKYM0GjD/tQ6gqrnr3YevZFVuvrnhqVU8Xty4+XqsMYmMxxMamVgq6pKTUMO7KldNVCq7oaNz33QfBuVfLgn7GdgccO5F+1fKho2luI51Om22UcSwhonzeuY2k+Gci/qqqcuPGDeLj49Fn8X1UnU6H11t0fHj5gaqqeDwezp49y40bNyhVqhRdunQp7GwVKCVJ/CEwe504+dW419cr2MrvxoMAlPGWoY2jPe3sWmVQ0VsJ3aUrmL/bqLmGft4PgKtBXWy9umgVQY17Mk9EVdGfP5+uhxB09CiG48cRvtX5qk6H+95703oIvl6Cp0YNMGS/rKgoPGOvF86ez+A2OmrgwqW0/EdV8KROPU2pEO6tlju3kRT/TMQfNNHbvXs3J0+exGAwIDLxPZrNZmw2W0HksdBJqQS6d+9Oubt0bnduKQrCUJDcib1XdVfZYdrKVtNmtpu2ckV/GYDarrqpA8ctHC0JuXAT89qNmmtor1ZhOBvXw97L1yO4p3L2iblcGE6fTusp+CoH/enTqftrqSYTrtq107mNXNHReCtXTjeeUJSf8bXrwm9LbO3v2EkDbnea26heXXe66afRtd2YzbfX2LgrOl6cEMmnUy4TWSHwBmyxFn/QBO/ChQtcv3490xZ+eHg4N27cyO/8FQn0ej1RUVHUrVu3yL4k+UVRFob8IK/sVVGJNRxhq2kL24O38ItxN07hxKQG08LRkraO9rRzdKTemXDMqzdgXrMe46/aduLOJg2w9eyKvVcXPFUqBZSusNkwHD+O4ciRNPfRkSPoL11KDeMtVSrVZeSKjia0RQviK1W6axatOZxw7IQhQy8hiMSkNLdRymwj//GEChGajr0+sTQLloQy3JL5l9qyo9iLf3aUNFEAaXNJIL/stQkbu4272G7awjbTFo4HHQOgoqcibR0daGvvwMPH7qPi6j0Er16P8eARAJzNGmPr1RXbo51z9EnK2yGuXyfojz/SVwpHj6K7fj01jCcyMnXmkatePa3HUKcOamjonRlfAKhqRreRVjmcv5jmNoqM8HBfTRe792rbW6R8pznQ1r8U/xImCiBtLgkUlL0X9OfZZtrCdtMWdpi2c0N3HaEKGrnup52jA92P1qf58kuErd5E0KFYABwtHtBcQ492xhtV4c4zoaro4uIof/Ei1p9/TpuKGhuLzp62P5C7evXUqaipg801a8JdMCkkxW2U4jrasNnEtRs6QGT5neaskOJfwkQBpM0lgcKw14OHg0H7fbOItrA/aB9e4SXMW4pWjjb0PtSI7kvdVFz1C0FHjqMKgfOhpth6dsH+aGe8FW6zV3MOucVmjwf92bNaZeA3pmA4cSJt0ZrBoC1a85+KGh1dJBat3Y64KzpadY/C4UjT7dy0/qX4lzBRAGlzSaAo2HtT3GCnabtWGZi2cN5wDoDq7hoMONCM/kow9b49junYGVSdDmerZto6gkc64S1fNuD0cmyzw4Hh5EmtMvBzHxn+/DM1iNds1ioDvwVrrrp18UZGFtiitdvx+sTSLFoeitOVlo/ctP6l+BeBl6SgkTYXf4qavSoqp/Qn2Ba8lW0mbW2BTWcjSA2i/6+NeWJxeVosu0zoiUuoej2ONs0119BfHkYtVyZHadypzSIpKW3Rml+loL9yJTVMyqK1dJVCnTqo4eG5TjdQuvWP4NDRW11VDaKdrFuWc/ul+Bexl6QgkDYXf4q6vQ4c7DX+wnbTVrYFb+Zw0CFQoe2v5RmzqAadliZT5tR1VIMeR7uWWo+ge0fUMrdfGZxfNuuuXr1lKmrGRWueSpVSB5dTB5pr1YIAt24PFDnPX4p/QEibiz93m71XdJfZYdqWutDsqi6e+/cbGP1NFXot1RNx2ooaZMDR/iFfRdABtXT67VsK1GZVRX/hQlplkFI5HDuWbtGap0aNdG4jd3Q07hwuWssJUvyl+AeEtLn4czfb68XLEcNhtgdvYatpM3uDfqHRPpVBi8KwKKWIOuvCazTg6Ngae6+u2Lu2Ry0VVjRsdrvTFq35rWTWnz6N8K0zUk0m3Pfdp1UG9eqlVgqeDIvWcoIUfyn+ASFtLv4UJ3uTRTK7jTvZZtrKNtOPROw7x+OLg3l8SSiVz4HbpMfaqRUhQwZypeUDqKFF8CM0NhtBx4/fupL54sXUIKmL1uqm/7CON5NFa+blyyk1ZQr6CxfwVK5M4oQJ2Pr1CyhLUvyL0UuSU6TNxZ/ibO85/VltbUHQZpy/7uKRJR76LQmm0kU9TrOe+C73E/To47g7t0cNyV+f+50ibty4ddHakSPpF61VqJDObaS7eJGw6dPTrWHwms3c+OCDgCqAIin+FovlNJCI9p05t6IozbO5RIp/AEibiz8lxV43bg4E/cp242aS9m+ikXKavstMVIzTYw/Rcbr7fRh69iOkQy+K/Ge5UlBVdJcv3zLAbIiNRZfFPmTuKlW4/PPPOU6mKIt/c0VRclp6pfgHgLS5+FPS7AXN5uNXj7HLsJXLe1Zxz8r9dFvuIfKKnuQw+P2Ryth7dqFq2ycwmwJfR1DoeL3o//yTyDZtMlVsVQgunjuX4+juRPzzZrhaIpFI8ogyall6uPrA/X1Q71c5+nYs6/cspOzK7bRceY7yyjxulJ7L7p6ludLnISq2GkRd/f3oKJqredPhmz3kqVIFw/nzt5z2VM7BLqp5RH62/E8B1wAVmKUoyuxMwjwLPAugKEozp296VaAYDAbcbvcd5PbuQ9pc/Clp9kL2Nttdifyx+Uu8y1ZQd8Upwq+pXA/3su4xwdn+D1Cx0+N0MHQnktxvPFcQ6BYuRP/88wirNfWYGhKCZ8YMvIMH5zge34euipzbp7KiKBcsFksksB4YpyjK1iwukW6fAJA2F39Kmr0QoM1OF7Yd3+Ncu5Rq3x0h9IaHhLJeVvazs6dfFKZWXWjjeZhmzgfTffC+qHDXzfaxWCxCUZSALrJYLO8ASYqifJhFMCn+ASBtLv6UNHvhDmx2ODFu3YVzzRLK/bCH4EQXV8t7+ba/nbWPq7haPUgbz8O0t3eghqdm6gfviwKFNc8/Nz7/j4CXsgpgsVhCAZ2iKIm+/3cDJuYiLYlEIskekxFn1w7QtQMJdgfBW3YRtOo7nvx6G8/MdhIfeZhl/ffwT8skzrSKpLW7A+0dHWntaJv6wfuSRr64fSwWS03gW99PA/CNoij/yuYy2fIPAGlz8aek2Qv5YLPNTvCPOzGvWodxw1b0NgcJUQaWP25l0cBEdrf20tjThPb2jrRzdKCR63703P574flBkV3ha7FYvlAU5Wnf/wXwuaIoz+QmsWyQ4h8A0ubiT0mzF/LXZmG1Ydq4HfOq9Zg2bUdnd3Cjkpnv+gtmDzrH7lZOSlOGNo62tHN0pJ29A5W8+T/7pii7fWqm/EdRFNVisdTKTUISiURSmKghZm0foV5dEclWTBu2YV61jkGf72TwtHIkVinNtn6lmD1oD6+3WgNloLarTuo3jls4H8KsFu3VxoGQE/GPt1gszwA7gVbA1fzNkkQikeQvamgI9j7dsffpjkhMInj9Vsyr1tNj1i4e+RRs9zTk18cq87XlBgtazScmbA5G1cSDzha0s3ekvaMDddzRRWrgOFByIv5Pos3FHwvEAk/ka44kEomkAFFLhWHr9wi2fo8gbiQSvG4L5lXraDXjJ1p/4uHj6g041jealQPcLHxwP1PCJzGFSUR6olJ7BW0d7SnnvXXztqJMTsTfAVxC26NnJtAE+CU/MyWRSCSFgRpeCtuAntgG9ERcu5FaEdSbtoX6Uz28em81rvR+kU2Pm1ne9BCbgtezPGQJQhU0cDWinUObRdTE2YwgggrbnCzJyXroBUAZYLCiKG7gvfzNkkQikRQ+atlwbAN7k/D1NOL2/8D1D97EU7USFT9dytB2X7Go8RWOvmKz5skAAA68SURBVDKWddtmMT7xFYLVYGaHzWBwRH+aVWzA6LIjWRAylzP604VtSqbkpOVfQVGUzywWiyXfcyORSCRFEG+5sliH9sM6tB+6+ASC//cj5lXrKP1JDO0+9vJQnZrYevfgaq9JbG54lu2+D95vMK8DoJq7Ou0cHWjn6MhDjtYMLt+fI8ZDaQn4JhXVczZgTfy6ArEpJ+J/2WKxDATMFovlMeBidhdIJBJJccUbUQ7r8P5Yh/dHdzme4O82YV69nlL/mU3pD2dRtd599O/VDWuvMZyo7dG+WxC8hW/NS/k6dB4G1UBZTzn0qh6P8KTGG6QaaerMbuf7vCMn4v8U8AywD6gKjMrXHEkkEsldgjcyAusIC9YRFnSXrmD+biPBq9ZR+oMZlP5gBuUb1KVR766M7DkJW40o9hn3sN20hY2mDVwxXE4Xlx4d45L+WmB5z4nPP1pRlGnAPwEzUCNfcySRSCR3Id6KFUh+ahBXV3zJpV++48Y7r6CajJR+bxpRbfpSucdTdJ56nL8dfpL/xW+kX/IA9Kq2mrjqBRPLRzWiXpfxBZbfnLT8/wN0RtubZwsQgzbfXyKRSCSZ4K0cRfKoISSPGoL+3EWC12zAvHo94e9OJfzdqTibNOCD3q0496iRITEGnogJweS9iM5ZcFt456TlH2KxWEyASVGUhUDuNt2XSCSSEoinaiWSnxtO/Np5xO1axc03XwSbgyr/nMOWFuGM+iwEs10UqPBDzqd6rgQ+s1gswcCp/M2SRCKRFE881aqQ9PyTqGVKowptdbBOLZxVwtm6fRRFmQ5M9zs0It9yI5FIJCWAazPfI+zjOYQuXgVeFZHLrxjeCXfBRy8lEomkeOGNjODm5AnE7VqFd+RA1GATqrFgVwTfVvwtFkvZgsyIRCKRlDS8kRF4PnmXuF2rSB7cF2eDOgWWdlZun/d8FcAxYB2w07e9g0QikUjykJSeQEFyW/FXFOU5AIvFUhvoCjzr+5jLbmC5oijnCiaLEsn/t3f/sZJW9R3H39e9rFqhohlSGcFApVUJhB9FJV2zIt3SVTdq2viNILZqk5um1NACUSHilqqp8Q/jmkL1FqhYUPyySEx1g/1Bl7UVbV0lQl1NW0ri7sXgLRGRWLa7TP94nk2vt3B3Zu49Mztz3q9ksnfmPs9zvifcfOZw5plzJK21fj7w/Tea0f91EbEOeCVwPHDY8G+P/wawLzO3rLJWSdIaGWgD98w8SLOpS78uBfYAde6QLElHqGJ3+0TECcDrgetLtSFJGs5hR/4RsTkz74yIU4A/Aj6Xmbv6uPbHgHcDx6xw7TmaXcLITDqdTn9VLzM7Ozv0uZPKPk+/2voL9nmk7fZxzBXAncBVwF8AHwdevtIJEbEFeDgzd0fEeU93XGbOA/Pt096wO9h3Oh2GPXdS2efpV1t/wT4PqtvtDt1uP9M+x0TEi4CDmXkP8Hgf52wA3hARDwK3AudHxM1DVylJWlP9jPz/FPgg8MF2bZ+vHe6EzLwSuBKgHflfkZkXr6JOSdIa6if8Twbem5kL7fPRfhNBkrTm+gn/B4CPRMRzgb8GtmfmI/02kJk7gZ1DVSdJKuKwc/6Z+YV2yuZ3gE3AgxFxW0S8qnh1kqQi+rnV87XAW4Dn0Yz854AZmjX+NxatTpJURD/TPqcDV2XmvqUvRoQbuUvShOon/D8HXB0RR9NM/bwjM6/PzO+VLU2SVEo/9/nfQPNt3W67ts+FZUuSJJXWT/ivy8zvDniOJOkI1k+Q3xURnwC6EbEN+NvCNUmSCuvnVs8PAH8GXE0zBXRd6aIkSWWttIfvbRGxHiAz78/M24AnaLZ0lCRNsJVG/rcCOyLiWICI+A3gr4DfHkVhkqRynjb8M/N24P3AlyLi/TRr+V+w7MNfSdIEWmna5wPAZmAfcBnwLeCyiPiTEdUmSSpkpS95/V37798D146gFknSiDxt+Gfm3aMsRJI0On5hS5IqZPhLUoUMf0mqUD+reg6s3et3F/DMto3tmbm1RFuSpMGVGvk/AZyfmWcAZwKbI+LcQm1JkgZUZOSfmT3gJ+3To9pHr0RbkqTBzfR6ZTI5ItYBu4FTgGsz8z1PccwczbaQZOav7N+/f6i2ZmdnOXDgwCqqnTz2efrV1l+wz4Nav349NNvqDqxY+B/Srg10B/CuzLx/hUN7CwsLQ7XR6XRYXFwc6txJZZ+nX239Bfs8qG63C0OGf/G7fTLzR8BOmqUiJElHgCLhHxHHLVkN9NnAJsAF4STpCFHkA1/geOCmdt7/GUBm5hcLtSVJGlCpu32+DZxV4tqSpNXzG76SVCHDX5IqZPhLUoUMf0mqkOEvSRUy/CWpQoa/JFXI8JekChn+klQhw1+SKmT4S1KFDH9JqpDhL0kVMvwlqUKGvyRVyPCXpAoZ/pJUoSI7eUXEicCngRcATwLzmbmtRFuSpMGVGvkfAC7PzJcB5wKXRMSphdqSJA2oSPhn5kOZ+c3258eAPcALS7QlSRpckWmfpSLiJJrN3L/+FL+bA+YAMpNOpzNUG7Ozs0OfO6ns8/Srrb9gn0dpptfrFbt4RBwN3A18KDM/f5jDewsLC0O10+l0WFxcHOrcSWWfp19t/QX7PKhutwswM8y5xe72iYijgNuBW/oIfknSCBUJ/4iYAW4A9mTmR0u0IUkaXqk5/w3A24D7IuLe9rWrMnNHofYkSQMoEv6Z+Y8MOQ8lSSrPb/hKUoUMf0mqkOEvSRUy/CWpQoa/JFXI8JekChn+klQhw1+SKmT4S1KFDH9JqpDhL0kVMvwlqUKGvyRVyPCXpAoZ/pJUIcNfkipk+EtShYrs5BURNwJbgIcz87QSbUiShldq5P8pYHOha0uSVqlI+GfmLuCREteWJK2ec/6SVKEic/79iog5YA4gM+l0OkNdZ3Z2duhzJ5V9nn619Rfs80jbHXmLS2TmPDDfPu0tLi4OdZ1Op8Ow504q+zz9ausv2OdBdbvdodt12keSKlQk/CPis8A9wEsiYm9E/G6JdiRJwyky7ZOZF5a4riRpbTjtI0kVMvwlqUKGvyRVyPCXpAoZ/pJUIcNfkipk+EtShQx/SaqQ4S9JFTL8JalChr8kVcjwl6QKGf6SVCHDX5IqZPhLUoUMf0mqkOEvSRUy/CWpQkW2cQSIiM3ANmAdcH1mfrhUW5KkwZTawH0dcC3wWuBU4MKIOLVEW5KkwZWa9nkF8O+Z+UBm7gduBd5YqC1J0oBKTfu8EPj+kud7gVcuPygi5oA5gMyk2+0O3eBqzp1U9nn61dZfsM+jUmrkP/MUr/WWv5CZ85l5Tmae054z1CMidq/m/El82Ofpf9TWX/s89GMopcJ/L3DikucnAAuF2pIkDajUtM+/AL8UEScD+4C3ABcVakuSNKAiI//MPAD8AfBlYE/zUv5ribZa8wWvfaSyz9Ovtv6CfR6ZmV7v/03FS5KmnN/wlaQKGf6SVKFiyzuMQkTcCGwBHs7M08ZdT2kRcSLwaeAFwJPAfGZuG29VZUXEs4BdwDNp/l63Z+bW8VY1Gu035b8B7MvMLeOup7SIeBB4DDgIHGhvAZ9qEXEscD1wGs3t8O/MzHtG0fakj/w/BWwedxEjdAC4PDNfBpwLXFLBshlPAOdn5hnAmcDmiDh3zDWNyqU0N0zU5DWZeWYNwd/aBtyZmS8FzmCE/70neuSfmbsi4qRx1zEqmfkQ8FD782MRsYfm29TfGWthBWVmD/hJ+/So9jH1dylExAnA64EPAZeNuRwVEBE/D2wE3g7QLoWzf1TtT3T416x90zsL+PqYSymunf7YDZwCXJuZU99n4GPAu4Fjxl3ICPWAv4mIHvDJzJz22z5/Efgh8JcRcQbN3/ilmfn4KBqf9GmfKkXE0cDtwB9m5o/HXU9pmXkwM8+k+ab4KyJiqj/fiYhDn2PtHnctI7YhM8+mWQ34kojYOO6CCpsFzgb+PDPPAh4H3juqxg3/CRMRR9EE/y2Z+flx1zNKmfkjYCfT/znPBuAN7QegtwLnR8TN4y2pvMxcaP99GLiDZnXgabYX2Lvk/2S307wZjIThP0EiYga4AdiTmR8ddz2jEBHHtXdEEBHPBjYB3x1vVWVl5pWZeUJmnkSzNMpdmXnxmMsqKiKeExHHHPoZuAC4f7xVlZWZPwC+HxEvaV/6NUb4+d1Ez/lHxGeB84BOROwFtmbmDeOtqqgNwNuA+yLi3va1qzJzxxhrKu144KZ23v8ZNEuFfHHMNWnt/QJwR0RAk0ufycw7x1vSSLwLuCUi1gMPAO8YVcMu7yBJFXLaR5IqZPhLUoUMf0mqkOEvSRUy/CWpQhN9q6e0XES8GthKM7A5CFydmV+NiEeBb9KsDfROoAtsysz3tef9MbAzM3cuudbP0Syz8MvtefOZedMqajuWZpG6qr6cpyOTI39NjYjoANcAb8rM84A3AT9tf31fZr4GuJxmzZx+bAXubq/1KuA/V1niscBvrvIa0ppw5K9p8jrg5kPrHWXmY8C3lh1zL80aQf341cx8T3utHs2+AkTEx2mWl/4x8FaaBfY2Zeb7IuLt7bk7gRuBR4CTgTcCc8CvR8RO4M2Z+cPBuyitDcNf06QL3AcQERcBvw98LTOvWHLMRuB7wzYQES8HnpOZGyPiYuD3ePqVVZ9HsxzFhcBv0WzU/aJpX6pBk8FpH02Th2jeAMjMzwAXA532d6dHxD/QvCF8GPhvmt3BDnkW/zdFtJIX03x2AM0uW6fws/sLzCz5+TuZ+SSwj2bKRzpiOPLXNNkBbI+IzMxH+dm/70Nz/gBExH7grIg4NAA6G/jIsut9NSLempm3tIvqbaBZf+WC9vfnAP8BPEqzBhHA6cC325+Xvyn8D7BuNR2U1oojf02Ndg79GuALEXEXcB3NnsdPdex/0SyNvQv4Cs3ewI8sO+wa4NXtHP0/AS/OzH8GfhoRXwEuAj5BE/bdiNgBHLdCiT8Anh8R2yPi+UN2U1oTLuwmSRVy5C9JFTL8JalChr8kVcjwl6QKGf6SVCHDX5IqZPhLUoX+F9uNDk9w5XHwAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEkCAYAAAAWxvdmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXmcTfX/x5+fc/c7G2ZBJN+Wb9oXWpXdoELCbfm2qZ9KpSwlIQkRlUh9VZSk9ah8UVmzUyFJJUpRUvYZM3Pn7vfz++Oc4Roz5t6Ze2cw5/l4zIN7zufzPp/POZ/zeZ3P9v4IKSUGBgYGBgaJQqnqBBgYGBgYnNwYQmNgYGBgkFAMoTEwMDAwSCiG0BgYGBgYJBRDaAwMDAwMEoohNAYGBgYGCaXaCo0QoqEQQgohrqnqtFQHSrrf+u/bK+n6lXat6oQQYrsQYkgc7AwTQmyNR5oMKg8hxN1CiGBZ4aqt0AA7gLrANwBCiPp6ZdSiIkaFENfodhpWOIUnP3WBj4/XawkhtgohhiUmOfEnXmU4Ri4DXqrE6xmcgJirOgFVhZQyBOyqqusLISxAUFbjFbNSykq7/5V5reIIIQRgllIGqioNiUJKubeq02BwAiCljMsfcA2wCsjX/74H2unnGgISuKZYnK3AsIjfEugNfAS4gT+BbkAa8J5u93ega0ScItu3AfOBQmAz0ByoB3yh29oEXFtCvGsirh35t/0Yee0MfKdfKxdYA1wSYTPyb6ke521gkZ6/7UAYSAYswHPATsCvp/O2YteTwIPAdP0e7AAGFAuTDszQ87obGAFMAxZF+fzqA58A+wCPfp8fjzhvBoYCvwE+Pb0TI84/CmwACtAE/EOgbmn3OyJft1dGPmO9FrC0hGfZUD93pn6vcoEcYAFwQUTcu4Eg0FIvJ37ghijv8TBgG+AFfgLuL5aPZGC8nl6fXpYGlaMMbweGAxOAA/q9fAEwRYSJpmxuB4bEaNcGTAIO6vdvEjAa2FrM9i16mfLqdscBSRHlYAcwISJ8FvAPMCbKMh9NeUsBXgf26ulYB2THof4RwGS096moLIwCbBFhhqHVkZ11m25gCXBGRJiawLtodaUH2AL0B0REGEW3vRft/fwQ6IP2oRuZ17ZodbhHf+ZTgfRiaR4B7Imw07e4nRLvdTQPJIoHZtIL1TjgLP2vS9GNJTah2QXchfYy/1d/cHPRXt4zgYn6DU8vZvs34Ebg38BM4G+0ir2LfuwTvSBZSkoTmlBI4CagDpBZSl7roL10A4B/AefohewC/T500u1cpoetpcd7G8jT03axHt4MPA/sB7rr6RyEJkKti92X3UBP4AzgEf1Yy4gws4Ff0Cq38/RCcpDohWa2fr8u1u9NS+DWiPPT9AJ2h56GK4G+EecfBdro9+QqYDWwrIQXsiyhSUg+Y70WUAutwn9Bf4519OdbG62MTtKf4dloZXJ/UZlBK6thYC3QCjgdyIziHr8NbASy9ft4M5qY3Rvxoi9Fq5Ru1O02A3rGUoYjBCEHGIj2vt6MJo49IsJEUza3c7TQlGX3JbSy1BlopN/jPCKERr+HOWjlrSifG4HpEWGaAQGgo35v5gNfo7/jUQpNWeVthp6ndmjv+gS0979RBesfBRgJXKHb6IQmks8UExo3MA9oDFyE9uES+V7VAZ4ALtXLzO1oIhB5v/vpx+7Qn0k/tPo6GBGmFVpd21sPcxmaqC1HFy20d9yNVj//G60OzKUShaamfrNblHK+6GFEIzTjI35n6scmlnCtG4rZ7hMR5jL9WP+IY0Uv4fklpQnta7PUPJRgp2Ep568p6TxaJZILJEccc6J9lT5YLOxMYHGx+/JysTCbgdH6/8/Sw0RWABa0gh2t0Hwf+SyKnTtTt98thjJRdJ/qlVYGKLnyT0g+Y71WSeVTPzYM+LrYMYFW0fTRf9+t27+2WLhj3eN/oVXijYodHwps0P/fWrfbpBQbUZVhPex2YHaxY/OAD2Ism9s5WmiOZTcJrWXQs1iYdRwpNNuBB4qFaabnr2bEsafRWogvor1f/4qhjJZV3orK/XXFwqwH3ipWrmOqf0pJT1/g12JlLUjEBwNaKy8M2I9hZwKwMOL3TmBEsTAfcqTQLAWeKxamgZ7mi/XffwHPFgvzMVEITVwmA0gpc4ApwHwhxFwhxEAhxNnlNPd9hN29QAjtSybyWn60ZnKJ8Tg89rKxhGPF48XKRrQvpx+FEDOFEI8KIU6NMu7PUsqCiN9nAla0r4ZIlqF9rUeyodjvnWhf1wDn6v9+XXRSauMB66JMF2jdMYOEEN8IIcYIIZpFnLtU/3dBaZGFEC2EEPOFEDuEEPnASv3UaTGkARKfz2ivVRqXAY2FEAVFf2jdLg3RhDCStcV+H+seN0ETrHXFbA+KsNsYyJFSlje/xTlW/mMpm7HYPQOt62x1sTBF5QUhRCZauRlX7F7MjUhbESPQWrj90LoZt5WRtljSWlTeit+D5Rx9D2Kuf4QQPfWysFvP32iOfl/+lkeOg+1EKydZug1Fr283CCH26XYeKLIjhEgFTiHindH5qtjvy4A+xe73Jv3cWbqdehzjuR2LuM06k1L2RHsRFqL1T/4ohLhfPx3W/xXFollKMFXSgGnxY5Kj0x4odr60YxXKs9QmEXRAa2quBboCvwghbogiurs0s8V+ixKO+UuIUzwvxeNEjZRyKlrhfA1thtZcIcS70cQVQjRA64vejvbF1QStKwC0yioWEprPclyrOArwJVr3V+Tf2WhfoEWEpJTeI4wf+x4XXffqYnbPBy4slsZ4UZ57XVLZjMWuiDhWGkVhH+XIe3ERmuj+EBG2Llo3Tkj/N1bKUwZKugcx1T9CiO7Aq2jj0dehtXiGc3SdWFL6DtlBG495Eq37ti3afZrC4fcumvtdZG8MR5frs9AEPlo7pRqPG1LKH6WU46SUHYA3gfv0U0WKfEpRWCFEFppCHi8UPVBTWQGlxhop5SgpZTO0r7wesdpB65rxoQlzJM3QBoKjpejL46qiA0IIM5rwR42U8h8p5VQp5Z3AvcB/9C+Z9XqQ7FKiXgY40LoPVkkpt1B2y6A8xCWfMeDn6Oe4Du1rdqeUcmuxvzJnYB3jHn+rB2lQgt3f9HPfArWEEE2OkV5KSHN5iFfZLMmuH2ha7PjVRf+RUu5G6w49u4R7sbVIwIUQCtpA+E9ok4aGxnldXFE+mxU7fi0VuwdFNr/T68tvpZS/orWKy2NnnpTyTSnld1LKrUS0rKWUB9HGi64qFu/KYr/XAeeVcr8LdDs7Ofq5Ff9dInGZ3iyEOBNtQG0OWgE5Be1hrAeQUnqEEKuAAUKIzfp1n0UryMcL+9AGzLKFED8BPr2b7giEEFej9ZUvQBu8Owvti/NNPcgfaC2464QQH+l2DpZ0QSlloRDiZWCEEGIvWjO+O9ogadtoEy6l/FUIMQd4VW9F7kX70kklyi8QIcQraK2SLYAdbUB5B5AvpcwTQrwH/FcIYUdrdtcCrpZSTgB+1a/TXw93EdrYQlyJRz5jZBvQVG+xFaINoL6CJhD/E0KMRLtH9dFauZ9LKYt3LRwiinv8FjBZCDEA7R4noYloppRyDLAYWAF8JIToh9Y1cwpwjpRyClGW4WiIV9kswa5bCPEaMFIIsRvtXtyLNilgT0TQwcCbQohc4H9orYNzgA5SyvsjwlyANobwl273PSHExeXNd7G0/iaEmIFW7u9He7d7obUyb6ug+S3AvUKIzsCPaLMSbyqnnTuEEC3RhOBOtAkGkfl/EXhGr3vXANejfTRGvjNDgQVCiJfQJv7ko9Vt3YGHpZQe3c4I3c7XaL0WbaJJZLxaNG49UR+i9Zd+gtaX93BEmHvQXoLVerg30Crq4wIpZRh4CHChvfzflRL0INrXwSy0CvYttKnXI3Q7u9GasgPR8jerjEsPRpvmOB7tK+l2tEHrL2PMQg+0AjsXbWBvJ1o3pvcYcSIRehp+ROuDTkJ7qYsKYw+0aZ4jgZ/RBoX/BSCl3Ig2W+V+tFbHY2jTJxNBRfMZC0+jTa3fgiZqDfTnexVapf6pfu49tC6xsspzWff4PrQZWYPR7uOXaDN8fgetJY1WSXyB1v22Be2LPkM/H20ZjpZ4lc3iDEQTj+loFV8NtG6kQ0gpp6Pl43o9zFq0rsmdcOiDbyhwj5TyLz3aY2gTAqZUMH2R/B/amOy7aOMwTdEmIm2uoN3X0fI/Fe05XcGRXa/RMgKtR2UW2sdJTeDlYmHGo30gTdCvdSWaaBx6Z6SUS9CGAy5A+5jZiFYW8zncBThBt/0S2ofHVWjdfWUiDpdxg5MJIYQJbQbNbCll/6pOT6KoLvk0MIgneuv5Iillorqdj6DaegY42dBnMGWhfbGkoE2VbIg2rfqkobrk08AgXgghTkFbz7MEbdJER7QutoePFS+eGEJz8mAChqBN/Qygdc+0lFL+oI8xbDpG3PullO9VQhrjQan5rNJUGRxX6OM1pTlR/UNKWdYU7ZOJENpYywi0scGtQC8p5eTKSoDRdVYN0GdmNTxGkN1SyvxKSo6BQcLRZ7WmlnI6IKX8ozLTU90xhMbAwMDAIKFU520CDAwMDAwqAUNoDAwMDAwSiiE0BgYGBgYJxRAaAwMDA4OEcsJNb3a5XG+huWvYo6rq+WWEbYDmTqEG2rTYgaqqfpH4VBoYGBgYFHEitmjeBtpHGXYIoKqqegmaV+H/JipRBgYGBgYlc8K1aFRVXe5yuRpGHnO5XGeg+UrKRHN+2FNV1c1oTuOK5tKnoXkxNTAwMDCoRE7EFk1JvAH0VlW1MZpjvaKWyzDgdpfL9ReaI8LeVZM8AwMDg+rLCS80LpcrGW0vixkul2sDmlfUuvrpW4G3VVWtj7a50HSXy3XC59nAwMDgROKE6zorAQXIVVX14hLO3Ys+nqOq6lcul8uO5lJ9TwlhDQwMDAwSwAn/da+qah6wzeVydQdwuVzC5XJdpJ/+E22TMlwu1zloDuXK3AXRwMDAwCB+JMTXmd5yWA7Y0FpNH6uq+nSxMHcDz6NvZAS8oqpqmRsWuVyuD4AWaC2T3WibUy0GJqF1mVmAD1VVHe5yuc5F27gpGW1iwABVVRdUNH8GBgYGBtGTKKERQJKqqgUul8sCrAQeVVX164gwdwNNVFWttD0RDAwMDAwqn4SM0aiqKtG2bQathWEhMXu6GxgYGBgc5yRsMoDL5TIB36JtUPWqqqrflBCsq8vlagb8AvRVVXVHCXbuQ9tLHX36soGBgYFB7Igqu3Ci96NxuVw1gJlo61x+jDieDhSoqupzuVwPAC5VVVuVYU7+/Xf51lxmZGSwb9++csU9UTHyXD0w8lw9qEieTznlFKhCoUn4rDNVVXOBpRRzG6Oq6n5VVX36z8mA0VoxMDAwOAlJiNC4XK5MvSWDy+VyAG2AzcXC1I342Qn4ORFpMTAwMDCoWhI1RlMXmKaP0yhoji0/c7lcw4F1qqrOBh5xuVydgCBwALg7QWkxMDAwMKhCEj5GE2eMMZoyyO6aQeOL/PTpVcB559SqFnmOpLo850iMPFcPTuQxmpPBBQ1SStxuN8cSTY/HQzAYrMRUVQ39evn49XcrV7evy523SB64W6F2Zriqk2VgYFCNOSmExu12Y7PZsFgsVZ2UKqd9G7jW7cesbGL0y2fw1ru1adjdh/KknyWmgrINGBgYGMSZE97XGWgtGkNkDpOUZOXcsw/SIOsTwmEvWz+w8UvrZFZ9YyVsNG4MDAwqmZNCaAyORggTijlAeq118O8wFApc92RwVbssxr6cwu9/mKo6iQYGBtUEQ2hOVgTIC8E8+iB844Ff3Ewcm8OZ/woycXIy115Xm07/yWC66uRgXpWNERoYGFQDqq3QOD79lKzLL6du/fpkXX45jk8/jZvtAwcOcMcdd3DttdfSpk0b/u///o/9+/cfEWbUqFGcdtppjBs37ojj4XCYnj17Hop7yy23sH379kPn77nnHtq0aUN2djZdunThxx9/pCTEKRJTdgiZph9wwvi7FbpOz+PrL3czuF8e+fmCgc/U4JLmdXigf02+XG6jGsyXMDAwqGSqpdA4Pv2UtAEDMO/ciZAS886dpA0YEDexEULQq1cvVqxYwaJFizjttNMYNWrUofMvvPAC33//PatXr2blypW8+uqrR8Tv3r07y5YtY9GiRbRr144BAwYcOjd+/HgWLVrEggULeOCBB+jfv3+JaahLmIulwColVn02nhXoXdPJrec5qPuwl4Wz9jJX3ct/urtZ+bWVO3ul06RVbZ4Zm8qmLSfFPBEDA4PjgJOuNkkdOhTLpk3HDGP99luE33/EMcXjoUb//jjff7/UeIFzzyVv+PAy01CzZk2uvvrqQ78vvfRS3nnnHQBeffVVfvvtN6ZPn47VauX999+nd+/eTJ48mZ49e6IoCtnZ2YfiNm7cmClTDm/Tk5qaeuj/eXl5KErJ3womoENYITvPy0q3n3VWM/P2ufnCbualFBsP13QyPjlEX6ePYefl8dRjeSxeYefj2Q6mvpfEG9OSOa9RgG6dCulyvYfMDGMWgYGBQfk46YQmKoqJTJnHK0A4HOadd945JB4PPfTQEeftdjuTJ08uNf7UqVNp27btEccee+wxli1bhpSS995775jXT5MwKs8HaG7lbvAGuc4b5HNdcB4qEpx8Hze09tK+tZcDOQqzvrAzY5aTZ8amMfLFVFpe46N750LatvRis5bjRhgYGFRbTgrPAPn5+aSkpERtJOvyyzHv3HnU8WC9euxZs6ZCCSzOoEGD2LVrF1OmTCm19VEakyZN4vPPP2fGjBk4HI6jzn/88cfMmjWL6dOnH3Vu27Zt/PLLLyQnJ9O0adMS7YeBz3TB+cVi4t+BEH3yfXT0Bg/1qf6y1czHsx18MsfJrj0maqSG6djBQ/fOhVx6YQBxnM0jMFaMVw+MPMdGVXsGqJZjNPkDBxIuVnGHHQ7yBw6M63WGDx/Otm3bmDRpUswiM3XqVGbOnMn06dNLFBmAbt26sXr1ag4cOHDUuRAh3lPeIU8cLPUaCtDJG+TLvW7+e6AQCTxYy0mbzCRm282EgX+fGWRQv3zWLNrN+2/sp1UzLzNmOeh0WybNO2by8hvJ7PynWhYjAwODKKmWNYTnpps4OHYswXr1kEIQrFePg2PH4rnpprhd47nnnmPjxo289dZb2Gy2mOK+++67vPvuu3zwwQfUrFnz0HG3283OiJbYggULqFGjxhFhishX8vhL7GChfUGZ11OAzhGCEwZ66YIzRxcckwmaN/UxcUwuG5bt5sUROWSmhxkzIZUr2tbGdU86M2Y5cLuPsyaOgYFBlVMtu84SzZYtW2jVqhWnn346drsdgAYNGvDmm2+WGbegoIBGjRpRv379Q3my2Wx89tln7N27lx49euDxeFAUhRo1ajB06FAuuOCCo+ys+HM5Q38ZjL+OD/XCmdQL14s6/SFgjsPMS8k2tlpMNApoYzjXRXSpFfHHDhOfzHHw8Wwnf+ww43SEuT7bS7dOhVx9uZ8YG3IVxuhSqR4YeY6Nqu46M4TmJGXlnyt46pdBeOsWUti6gGdzx3K9txMihrIWAmbrgvObxcQ5uuB0KEFwpIS1663MmOVgznwH+QUK9eoG6drRQ7fOhZzRMBTX/JWGUQFVD4w8x0ZVC0217DqrDkgOf0C4hZtHaz1Il4zr+cq6KmobJqCLJ8iSvW4m5hTiE3BfLSfZmUl8oXepFSEEXN7Yz/PDD/Ldsl389/kD/PuMIK9MSabZ9ZoXgnc+cpJ70OhaMzCobhhCUw0wY+Eqb1P2KXu5PcPFPbXuYLP52GuNIjEBN3mCLN3j5uUcD14BPWs5aZeZxFy7meJtYocdOl/n5d3XD7D2y90M6X+QggLBk8NrcGmLOtzfryaLlhleCAwMqguG0FQDAsJPrpLLl3tWMPDgU2ywrueGzGwer9GHv01HT/MuDRPQ1RNg6R43E3I8eAT8ny4480oQHIA6WWF63ePmy//tZd6MvdzucrN6jZW7HkyncavaDBuTyk+bq+dyLgOD6oIxRnOScqx1NAdFLpNSXmFa0lsA3OW+h175D5Mma8R0jSAw02FhfIqV7WYT5/tD9Cvwke0NHrMz2O+HJSvtzJjlYNFSO4Gg4NyzA3TvXHEvBEbfffXAyHNsGGM0BpVOmqzBwLwhfLlnBTd4OjEl6TVa1m7K5KTX8OGN2o4Z6O4JsGyPm5dyPBQogntqOWmfkcQCW8ktHACrFdq18jJlQg7rl+3i2cG5WC2SZ8am0bhVbe58sBZz5tvx+uKSXQMDgyrGEJpqzCmhejyfO57P9i7gYv+lPJc2gtZZ1/KJQyVE9LPEzIDLE2DZngLG5XjIVwQ90p10KENwAGrVkNx9WyGff7SPpbP38MDdBfy02cID/WpxaYs6DByexroNFk6shreBgUEkhtAY0Ch4Lm8dmM67+1Qyw1kMqNmXjpntWGpbfMTstbIwAzdHCE6eLjjXZSSxsAzBATjrDN0LwcLdfDB5P611LwSd/5NJsxuymPB6Mjv/NjZsMzA40aiWQlMzNZusWvWO+quZml125ChI5H40RYwbN4569eqxefPmuKQZ4Cp/Uz7d9xkvH5iER3i4N/0Obk93sdHyfUx2LBwWnBdzPOQqgrvTnVyfkcSiKATHZIJmVx/2QjBuZA61M0OMfTmVK7Kz6N4jHfV/hhcCA4MThYRM93G5XHZgOWDTr/GxqqpPFwtjA94BGgP7gZtVVd2eiPQUJxBsjNn0K0Ic9tYspZVAsElc7BftR1O0VcCIESMYNWoUL774InDkfjS9e/fGZrMd4dW5e/futGnTBkVRmDp1KgMGDEBV1UPnf/jhB9avX0+9etGv9o867Qiu93airbc9HzrfY2LKS3TJvI7rPB15LG8gp4UaRm3LAtziCdDVE+Bjp4UJyTbuSndysT9Ev3wfrXzHnjQAkJIsubmLh5u7ePjzLxOfzNa8EPQdXJPBI8Nc19ZL985V44XAwMAgOhL1avqAVqqqXgRcDLR3uVxXFgtzL5CjquqZwEvAmHhcONk5lBop3Y75Zzb9BASKxQxiNv14zHjJzqFRpaGk/Wj++usv4Mj9aOrVq8f777/Phg0bDm0VULQfTZETzsaNGx/h38zn8zFo0CBGjRqFSKDrZCtW7izsweI9q3g4vw9LbIvIzmrOsNQh7FNim/liAW4tDLBiTwHP53rYpwjuTHfSMSOJxVG0cIpoUD9E3wcLWDl3D/+bvo8u13uYv9jOzfdmcEXbLJ6bkMKWrTFn1cDAIMEkRGhUVZWqqhboPy36X/H6pDMwTf//x0Brl8tVSX0hVsLhLKTULielIBzORNuDMr6UtB/NpEmTsFq1axXtR9OzZ88S4xffj+aFF16ga9euNGjQIO5pLYkUmULf/MdZvGcV3Qtv4f2kd2id1ZSJyS9RKApjsmUBbtMFZ2yuh72K4I50Jx0znCyxmaIWHCHgskv9jH3msBeCs88M8uqUZC5saqXjbRlM+9DwQmBgcLyQsJVyLpfLBHwLnAm8qqrqN8WC1AN2AKiqGnS5XAeBdGBfMTv3Affp4cjIyDjqWh6P59D/CwrL3gETQBG7Sa9xFVrjy0ZO3jzCMiuquLEwZMgQkpKS6NGjR8xxJ02axK+//sqMGTMAWLduHRs2bGDQoEFlxlUUBUVRSE5OLvGexUoGGbzGm/QPDOAZ81DGp77ABynvMjA0hDvDPTDHWJQeBXqFJe8Ew4yxwO3pZq4IC4aETLSVIiafbPfeqf39szuAOtPMtA/MDBpRg2Fj0rihneR2V4jslhKLJcZMnyCYzea4POMTCSPPJxYJX7DpcrlqADOB3qqq/hhx/Cegnaqqf+m/fwMuV1V1f8mWgDgv2Ex2PonDNh2P704KCkfFHL8shg8fzs8//8zbb78d81YBU6dO5YMPPuCjjz46tA3AK6+8wltvvYVFrzH/+ecfMjMzGTduHM2bNz8ifjQbn1WE9ZZ1jEl9lnW2NZweOIPH8p8k29s+JoEowg985LTwcrKNv80Kjf1B+uf7aOYLxWwtIyODvXv38ePPFmbMcjDzcwcHckxkpIe48Tptw7bzzzm5fN8YixerB8aCzWOgqmousBRoX+zUX8CpAC6XywykAUfv4JVACj19CASvoNDTJ+62E7EfzcMPP8z69ev55ptv+Oabb6hbty7vvffeUSIDYFJ2IORIUpMeieuMuiIuDTThw/2f8vr+qSgoPFjr/3Bl3Mg669qYbVmBOwoDrNxTwOhcD/8oCrelJ3FjhpPlMXSpFSEEXHBugOFP5rF+yW6mTtzP5Zf4mfZBEu26ZdGmSyavv53E3n3G7AEDg8ogIW+ay+XK1FsyuFwuB9AGKD4PdzZwl/7/bsBiVVUrdVleWNYmN/+TuHeZbdmyhYkTJ7J79246depE27Ztuffee6OKW1BQwMCBA3G73dxyyy20bduWG264oULpieeMukgEgja+bD7fu4hRuc+z0/QXN2fcyP01e7DV/GvM9mzAnbrgjMr1sNOkcGt6El3SnSy3xi44ABYLZLfyMbnIC8GQXOw2yfDnNS8Ed/Sqxex5hhcCA4NEkpCuM5fLdSHaQL8JTcxUVVWHu1yu4cA6VVVn61OgpwOXoLVkblFV9fcyTBu+zqLkzz+Ws/nnwdStU0irljnsz/06IWNQkXiEh7eTpvBa8qsUCjfdCm+hT35/aofrlMueD/jAaWFiio1dJoXLfUH65fu4xl96l1q03Qu//mbm49kOPp7jZNduE2mpYTq299CtUyFNLg6QwAl9ccfoRqoenMhdZ4ZTzZOUP/9YweafB1G3TiEtW+ZS4B6F19+dylije0A5wH+TJ/Bu0jRM0kQPd0/uL3iQFJlaLnvFBecKXXCaliA4sb6MoRCsWmNlxiwncxfZ8XgU/nVakG6dCunWyUP9Uypnw7aKYFS61YMTWWiMTuqTHCnNhEJnkprcj5qp12Mxr0n4NWuFazEk7xkW7FlGtrcDk1Im0jLraqYmTcFH7H1UNuDuwgCrdhcwMtfDH2aFmzOS6JbuZJW1Yi5pTCZodpWfic8d9kJQJyvE8xNTuaJtbbrdnc5HMw0vBAbwXUC8AAAgAElEQVQGFcEQmpOUsEwCBP7gVeTkzeVgwUQUZQ81U7uQmvQAivJXwtPQIHQaL+W+wqy98zg3eD4j056mXVYLZjv+R5jYtwKwAz10wRlx0MM2s4JLF5zVFRQcgOQkzQvBx2/v5+sFu3ns4Tz+2W2i35CaXNS8No88WYMVX1sJl38XAwODaokhNCcpUqYCp+L3ZwMKPv9N7M9dgdvTD5t1IelpzUlyjEXgTnhazg9cwDv7P+Tt/e9rC0BrPsSNGdexyrq8XPbswD3uAKt3FzD8oJffzQrddcFZLuKjAqfWC9G3VwErv9jD/97dy003eFi4xM4tuheC0eNT2LrNcPBpYBANxhjNScqx1tEoyk6SHaOx22YSCtfBXTgQr78rlfHdESbMbMdMxqWMZaf5L671NmdA3iDODZ5fbpse4L0kK68mW9ljUrjap63DudIf3/EVjxcWLLHz8WwnS1faCIcFl1zop3vnQjq191CzRtW8S8Z4RfXgRB6jMYTmJCWaBZtm8zpSnE9jMW8gELyY/MJhBIOXVUr6fPh4L2kar6ZM4KA4SGdPF/rmD6B+6NRy2/QAM7NqMlYE2GtSaKoLzhVxFhyAPXsVPv3cwceznPz8iwWrRdKmhebgs+U1vkr1QmBUutWDE1loqnXX2R5lN7emd2WvsieudhO5TcAVV1xBs2bNaNu2LW3btmXp0qXlTmcw2IScvDnkFYxHUf6hVuqNpCY9hKLsLDtyBbFh4x73fSzZvZr7Cx5iruML2mY1Y1TqM+SI8q3bdQCPhE18taeApw96+cWscFNGEjenO1kThzGcSLIywzxwt5uFn+5l/sd7uPMWN998a6XHw+k0blWboaNT+fFns7Fhm4EB1VxoXkkez1rrN0xMHh9Xu0XbBKxYsYJFixZx2mmnMWrUYRc3kdsErFy5kldfffWI+N27d2fZsmUsWrSIdu3aMWDAgCPOv/HGGyxcuJCFCxfSokWLCqZWwevvzoHcFbg9j2KzziM9rRlJjheA2JxmlodUmcbj+U/y5e4VdC68ialJU2hZuymvJ7+KF0/ZBkrAIeE+t5+v9hQw9KCXzWaFLhlJ3JLuZG2cBUcIOP+cIM8MzOPbxbuZ+sp+rmjsZ/pHmheCtjdl8trbSezZW61fNYNqzknXdTYidSg/WzaVaciPnw3W9UghEVLhYv8lWMvw3nxO4FyeyovOaWckn3/+Oe+88w4fffQRr776Kj/++CMTJkzAarXi9Xrp3bs3l19+eYkenDdu3EivXr1YtWoVoLVopk2bRqNGjY55zfL6OlOUv0h2PIvdNptQuA4FhYPx+W+ksr5Jtpg380LqaBbbF1EnVJe+eY/TxdMNE9EJREndCx4B05xWJiVb2WdSaObV1uFcFkjcGpmcXMGsudreOd9ttKIokhZNfXTrXEi7Vl7ssXklOiZGN1L14ETuOqu2QrPN9Dt7TXt0oRFkhrL4V+j0Y8Ypj9CEw2FuvfVWsrOzo3ZDE0nfvn1JS0tj2LBhgCY0RXm97LLLGDhwIGlpaUfFq6hTTYt5DcnOp7GYNxIIXkK+ezjB0KUx2ykva6xfMyZ1JBus33FW4GwG5D1JS1+bMp12HutlLBTwjtPKf5Ot7DcpNNcFp0kCBQdg6+9mZsx28MkcJ//sMpGaonkh6N45Pl4IjEq3emAITeURl8kAe5TdtKh9FT5xePGgXdpZuvsrMsPxddMyaNAgdu3axZQpUw5tZhYtkyZN4vPPP2fGjBk4HA4Adu7cSb169fD5fDz99NO43W4mTpx4VNz4eG8OY7fOIMn5HCZlD17fTRQUPklYnlJOe7Ehkcy3f8HzqaPZbt7G5b4reSJvMBcHShe8aF7GQr2F899kKwdMCi10wWmcYMEJhWD1Ws0LwRcLNS8EDRsc9kJwar3yXd+odKsHJ7LQVMuO41eSxxMu5qIxRDjuYzXDhw9n27ZtTJo0KWaRmTp1KjNnzmT69OmHRAY4tH2zzWbjrrvuYu3a2L0lR4+C13+zPn7TG5v1c9JrNMNpfwnKOX4SCwJBe+/1zNuzhOG5o/jNvJWumR15uOZ9bDOV5RavdJwSern9fL2ngMF5XjZaFDplJnF7LSfrLYlbG2MywbVX+nl59GEvBKfUCfHCK6lcmX3YC0GB4YXA4CSjWgrNeuu3BIT/iGMB4We9dV3crpGIbQIKCwvJy8sDQErJrFmzOO+88+KW5tKQJOP2DGT/waX4Aq1Idr5AelozbNZZHL1xavyxYOE/hXexeM8qHs3rzzLbEtpntWRo2pPsU/aW226ShAcLNMEZlOdlg0WhY2YSd9Ry8J0lsa9GkReCGVP3883C3Tze+7AXgoub16b3wBos/8pK6Ph3tWZgUCbVsuss0WzZsoVWrVpx+umnY7fbAWjQoAFvvvlmmXELCgpo1KgR9evXP5Qnm83GZ599xh9//EHPnj0Jh8OEQiHOOussRowYQe3atY+yk8iNzyzmr/Txm5/wB5pQUPgMwdDFcb3Gsdin7GViykt86HwPq7Tyf+4HuLfgfpJlcoW6F9wCpiZZeS3JSo5JoZU3QL98H5cEKsfnjJSwboOFGbOczJnnIC9foW6dEF1vKKR7Zw9nnl7yhm1GN1L14ETuOjOE5iQl0TtsQgi7VSXZ+RyKsg+PrxvuwicJy/JtCVAetpl+58XU55jr+Jz0UAaP5PflIeejHNx3sEJ2C4oEJ9lKrqLQWheciytJcAC8Ps0LwYxZTpatshEKCS65wE+3zoV06uDhlnvTaXyRnz69CjjvnFpGpVsNMISm8jCEJkoSLzQagnycjok47ZORmCn0PEyh9z605ZOVwwbLesakPssa29ecIc+kb87jtPdeX65tpSMpEPBWkpXXdcFpowvORZUoOKB5IZj5uYMZuhcCi1kSCApMJonZDHfdGuaBu/dSO7P6ePs0hCY2DKGJDUNoouTP1av5ZeBAattsXN6rF56bbkro9UzKdpKcz2K3fkEoVJ8Cz2B8/o5UVtmWSJbavuTFmmP4WdnExf5LeCJvCJf7r6yw7Xy9hfN6so1cRdBWF5wLK1lwAH782cyMWU6mTE8+dExRJGYT3NzFTd8HC6qF4BhCExtVLTTVcjJAtSAYBClRDhwgbcAAHJ9+mtDLhcINySuYTE6eSlimkpbcixopN2E2bUzodYsQCFr62rAquI4xOePYZfqHWzO60rPWXWwxF99FPDZSJDxS4Ofr3fkMyPOy1mqmQ2YyPWo6+CHBkwaKU+SFIJJwWOAPCKarSVznyuDb7y2G6xuD4wpDaKoBisdDynPPVcq1AsGm5OTNI889FrPpN2qmXkdKUj8UEV9/cqVhwkQ3z818uXslA/IGsda6hhsy2/JEjX78oxzdGo6FFAmPFvj5anc+j+d5+cZmpn1mMvfUdPCjuepeJatFYrFI6p8SYv8BE51uy+TK7CxGvpDK9z8aomNQ9RhCU00wldDlmMCr4fX9h/0HV1LovR+79VNq1bgGp30i4K2UFNhxcH/BQyzZvYoe7v9jtmMmrWtfy9iUUeSJik0WSJXQR2/hPJbn5WubmXZZlS84VovEbpP0+E+YNQt3883CPXy/YhfjR+Xw7zODTJ6exHU3Z3JNB23/HMPJp0FVYYzRnKT8uXw5vwweTN3CQtru2kXYbmfv8uWE9AWflYlJ+Z1k50hs1vmEQg0oKByCL3AdiegyLq0f+y/TDl5KGcssx0zSZBoP5j/C7e67sVFxp2MH9UkDbyTbyFME7T0B+ub7OD+YuLGS7K4ZNLnYT58HCji3lFlnObmC+YvtzJ7rYOU32sy10xsG6djOQ6cOHhqdVfJ06RMBY4wmNqp6jMYQmpOUSKFps2+ftqzSbKbgkUcoeOABiHERaTywmFeQ4hyG2bwZf+AqCgqHEQyVf8OzkijrZdxk/pGxqaNZYV9KvWB9+uUPoJOnC0ocGvcHBbyZZGWyLjgddME5L0GCI1ZZMd+RjvAJpE0SnL4f2dRfYtj9BxTmLrIze56Dr9ZaCYcF/z4jQMf2Hjq195a6Rud4xRCa2KhqoTG6zoDdiuDJVBvZGUlxsZfI/Wi8Xi8DBw6kadOmtG7d+qgtBA5hNoMQhGvVIvell9i7ejW+1q1JHTuWrFatsH35ZVzyGguB4LUcyJtPnns0ZtMWaqa2J8X5OEKUf3V/rJwbPJ+3D7zHO/s+pEa4Jv1rPkLnzPassC2rsO00Cf30LrV++T5W2sxkZyXTs6aDTXHuUhOrrJjvqoXwaXWH8Ant96qSPZCn1wpzu6sQ9a39rF+ym2eH5FKrZphx/02heccs2nTJZMLryWz7w9ie2iD+JKRF43K5TgXeAeoAYeANVVUnFAvTApgFbNMPfaqqalmukePaotmtCMYnW/koyYoE/EKw8++8MuOVRU5ODj///DNXX301ACNGjCA3N5cXX3wR0PajWbt2LePGjaN37960bt2ahx56CNCEZtGiRbRp0wZFUZg6dSpz585FVVUAnnrqKRRFYdiwYQgh2Lt3L5mZmUelobR1NLbly0kdMgTLb7/hbduWg888Q+i00yqc51gR4iBJ9vE47G8hsVPoeZRC771Qwa6sWL76woT5zDGbcSlj2GH+k6a+axmQN5jzAxdUKA1F5AqYkmxjSpKVfEVwnUebFn1OHFo4ltPrHhKZSKRVEtj2T9R2du1R+HyBg9lzHazboInU+ef46dTeS8f2HhrUPz594Bgtmtio6hZNooSmLlBXVdX1LpcrBfgWuFFV1U0RYVoAj6mqekMMpssUmqGpNjaV4RjRD/xlUthj0u67jPDTfpWv9C6EcwMhhuf5Sj1fGvHaj8btdtOkSRPWrVtHUtKxW1/HXLDp95P05pukjBuHCIUoeOgh8h98EByVt8iyCJPyG8nOEdisCwmGGlJQ+BT+QDvK+06U52X04eP9pOm8mjyeHFMOHQtvpH/+E5waalCuNBQnJ0JwChTB9brgNKqA4Fjrle5BWzYMEm7lJdzCh7zar+0EFwU7/1H4bL6DOfMcfPeDJjoXn++nYwcPHbO91Dvl+BEdQ2hio6qFJiFdZ6qq/qOq6nr9//nAz0Dlj0KXwi8Whd0mgRTiCJFJBOFwmHfeeYfs7GwAHnroISZNmoTVqr3IdrudyZMnlygyoHlxbtu2LQDbt2+nZs2ajBs3jg4dOtCtWzfWrFkTe6KsVty9erFn+XI8HTqQMm4cWS1bYp8/n8qelhQKn8HBgrfJzXsfpJUaKfdSI8WFyVT2nkLxwoaNHu7/Y8me1fTK781C+zzaZjVjROpQDijl21Y6kpoSHs/38fXufB7N97HMZqZ1VjL313SwpZxdatJW8nOSZok8PYjyvhPLnelYzquD+bZaKG8kwa/mY/pArVc3zP13u/nsw318vWA3g/vlEZYw4vk0Lm9bm07/yWDK9CT+2W30uBvERsInA7hcrobAcuB8VVXzIo63AD4B/gL+Rmvd/FRC/PuA+wBUVW3s9x892Lljx44jXOmXxZ6ILrMwWpdZEfHoOosknvvRbNy4kQ4dOvDKK6/QpUsX1q9fz913382qVauO6jr8448/+OWXX8jKyqJ169bHvI5YtgxTnz4omzYRbteO4IsvwllnxZzXCiODwGSEHA7kAj2QYhiI6PcIMpvNBIMVG9j+m52MNo3kXeVtkknm0dBjPBjujRNnhewWsR/Jy6YQryhh3EDXsMLgsMK5MvryEVwqKOxi1jbXKcIpcc4MYm4hkR4IrRQEFygE5yuEt+hjOadJzG3DmLPDmFtKRGrZ19q6DT6ZpfDxbIWNPykIIWl6haRb5zBdbghTJ75bOEVFPJ7ziUZF8qx/2J5cXWdFuFyuZGAZ8Kyqqp8WO5cKhFVVLXC5XNcBE1RVLat2i+sYTUmCE0+hGT58OD///DNvv/12zFsFTJ06lQ8++ICPPvro0FYBBw4c4JJLLmH79u0IXRxbtGjBhAkTuOiii46IH7Ovs0CApLffJuXFFxE+HwX330/BI48gnfGpXGNBiBySHC/hsE1DSgdubx883nugjK22Ib5dKlvNv/J8ymgWOeaTFapNn/zH6Frowow5LvYPCMEbyVbeSrJSKKCjN0jffB//jrJLLZZZZ+wwoSy1oSy1IVbYEG5Fa/1c5ifc0ods4UWeGyyzKtq6zcSceVr32patFhRFcmUTP506eLiujZf0WpXj/sboOouNqu46S5jQuFwuC/AZMF9V1XFRhN8ONFFV9Vh3MiHTm4sEZ53VzIJ97nLbieS5555j3bp1R21cFg3vvvsuU6dORVVV0tPTjzh366238sADD9C8eXN+++03OnfuzKpVq47azrm8TjWVPXtIHTkS5yefEDzlFPKefhrv9ddT4f2Gy4FJ2Uqy8xls1sX6+M3T+ANtOdb7kogKaJ11Dc+ljuQ767ecGTiLx/MG0drXtsJOO4s4oAheT9IExyOgkydI3wIfZ0UpODHn2Q9inVUTnSV2lE0WAGTtkDau08JLuJkPahy7btiy1cyceQ5mz7Pz2zYLJpOk6RU+Orbz0qGNh5plxK8IhtDExkkpNC6XSwDTgAOqqvYpJUwdYLeqqtLlcl0OfAycpqrqsRJ0QqyjSdR+NKB1ifXv35+cnBzMZjNPPPEErVq1OspORb03W9esIW3wYCybNuG79loOjhxJ8MwzY7YTD6yWJSQ7h2E2bcUfuJb8wmGEQo1KDJuoCkgiWWCfxwspo/nd8htNfJfzRN5gLg00ids1igtOZ11wzixDcCqc510KyjJddJbbEAcVpCKRlwYIt/AiW/qQFwZKHdGVEjZtMTNnvoM5cx1s32HGbJY0u8rHDe08tG/tJS01vvWMITSxcbIKzTXACuAHtOnNAIOABgCqqr7mcrkeBnoBQbR9gfupqrq6DNMnhNAcD8Rlm4BgEOe775I6dizC7cbdsyf5ffogk5PLjht3Ajhs00lyvIgQeXh8t+P2PI6UtY4IlegKKEgQ1fkBL6eMY69pD9meDjyeN5DTQ/ET4f264EzVBedGT5A++T7ODJUsOHHNcxDEdxaUpXbEEhvK91p3paxV1NrxEW7ug4yS0yIl/LDJwpx5dubMd7BjpxmrRdK8qY+O7T1kt/SSklzxOscQmtg4KYUmgRhCEyXx3I9G2bePlNGjSfrwQ0J16pD31FN4Oneuku40IQ5EjN8k4/b0xeO7i6Lxm8qqgApFIW8mvc7k5El4hRdX4a08kt+PrPDRu52Wl/2K4DVdcHwCbvQEeDTff5TgJDTP+xStlbNEH985YEIKibwwoIlOSx/yEj8lDVtJCRt+sDB7noM58x38s8uEzSppea2XTu09tGnuIympfPWPITSxYQhNbBhCEyWJ2PjMsn49aYMHY924Ed9VV2ndaY1K7sJKNCblF338ZinB0On6+E1rMjIyK7UC2qfs49XkCbyf9A4WaeEe9330LOhFioxfedynCF5LtvK287Dg9Mn3c4YuOBkZGfx0YD/jk618G8dxxqMIg/jBoonOEjtivQURFsi0MOFrfciWXq21U/fo1k44DN9+b2HOPAefzXewe68Juz1M62Y+OrX30LqZD0eU633AEJpYMYQmNgyhiZKE7bAZCuF8/31Sn3sOkZ+Pu0cP8vv3R6ZGMU827kisli9Jdj6D2fQ7vkBzLNYJ7Nt/tKeERPOHaTsvpo7hc8dsaoXSebigD7e6b8caxUy5aNmnCCbpguMX0MUT4HZ3gLm1UpimhOLq3SIqcgVihU2fzWZH7NIWSofPCWii08KHvMx/1GTBcBjWrLcye66Dzxfa2bffhNMRpm0LL506eGlxjRd7GZM0DaGJDUNoYsMQmihJ9FbO4sABUseMwfnee4QzMsgbMgRP165V0p0Gfhy2aSQ5XkIoBXi8d+L29Dtq/KYy2GjZwJjUZ/natpoGwYb0zxvA9d5OcZuhBrBXEbyQYuUDp5UQ2qZvMsJ8pQlNJBLEz2bEUjvKYhtirRURFMikMPIaH2G9m41Tj/QuEArBV2utzJ7n4IuFdnJyTSQnhclupXWvNbvah60ErTaEJjYMoYkNQ2iiJNFCU4Rl40bSBg3C+t13+C67TOtOOz++HpmjRYgDZNR8BeRkpEzF7emHx3cnYKnUdEgky2xLGJs6ii2Wn7nAfxFP5A3mKn/8nkPXdCffWE0lerZo7A/SxB/iMn+IJv4QmeEqeMcLBGKVDWWJ9if+0gZx5JmBQ6Ijr/SB/XCUQABWr7Exe56deYsc5OYppKaEad9aE51rrvRh0R+lITSxYQhNbBhCEyWVJTQAhMM4VJXUZ59Fyc2l8M47yXv8cWSNGom9bglkZGSQk7OSFOcwrJYVBENn6eM3LSs9LSFC/M/xCS+lPM8/5r9p7m3FgLwnaRQ8t8K2j1xsLPBHVCGX+YJstJrw6SLUMBg+Qnz+HQxTqT6aJfCb+bDofG3TFpnaw8ir/ZrwtPDC6aFDVaHfDyu+tjFnnoN5X9rJL1CokRbmujYeOrb30um6FHJzDaGJFkNoYiMuQpPdNYPGF/np06uA2pnxX8l84MABHn30UbZv347NZqNhw4aMGTPmiMWXo0aN4vXXX+fRRx+lX79+h46Hw2Huv/9+Nm/ejM1mIyMjg+eee46GDRuyY8cO7rnnnkNh8/LyKCgo4KefjvLcU7lCoyNyc0l54QWSpk0jXLMmeYMG4XG5IEbXOxXh8MsosVoWkuwcjtm0DZ+/FQWFTxMKV/5aIB9epiVNZVLKRPJFHl083eib/zinhCru/m+PIngtM41pSugI7xY+4AeLiXVW7W+t1cQ+k/YcUsKSxv4QTfxBGvtDXBoIEYcZx9HjEYivtAWjymI7Ypve2jkteEh0ZFM/6DPSfH5YttLGnPkO5i+24y5UyEyXtG9TSMd2Hq5s4sdUDXY3MISm8oiL0NQ77xSsFolQ4OYb3XEXnERuExDJ0KFDCYVCPPvss0edqwqhKcL800+kDRmCbc0a/JdcwsFnnyVQzEVOojj6ZfTjsE8lyT4eIQrx+O7C7emLlDUrJT2RHBS5TEp5hWlJbwFwp7sHD+b3Jk1WrOWXkZHBJn3WWWneLSTwh0mwzmo+JD6bzQpSCBQpOScY1rvatJZP/ZCsvFppu+4eZ4kdscqK8ChIq0Re7ifc0ots4UOerbnH8XhhyQo7C5ak8dkCgcejkJUR4vpsraVz2SX+yvyuqVQMoak8yt4mYHQqm7Ycu0/+q7WHp7QIoeU/KyNE/VNCWI8xSejcswMMfzL2gdZ4bRMQid/vp3Hjxrz//vtccMHR+6dUpdAAICWOTz4hdeRIlH37KPzPf8h74glkrcQO0Jf2Mgqxn2THWOy29/Xxm8fx+G6nxAUgCeZv005eSnmemY6PSZGp9MrvzV3uHtgiByxioLwVUJ6A76wm1uris95iwq1odVGdUJjGh8Z5gpwXCMdx/twx8IFYY9VEZ6kNRX+XZd0Q4aKZbNf6yDg9nT//3M+i5Vr32uLldrw+QZ3aIW7I9tCxvYfGFwWqZm5KgjCEpvKIu9BEmCYlWXL+OYFS45VHaMLhMLfeeivZ2dnce++9McUF6Nu3L2lpaQwbNuyI45999hkTJkxg4cKFJcarcqHREXl5pIwbR9JbbyFTUsgbOJDC224jUX0dZb2MJtMmUpxPY7WsJhj8NwWFw/AHmyckLWWx2byJsamjWWZfTN3gKfTNf5wbPV0xxTiCEq+B8SCw2azoLR5NfHbo2xjYpeRifXJBE3+Ixv4QtSqj7tipoCyza2M7K2yIfAVpkpivkviaFiBbac5ACzyCRUvtzJ5nZ8kKO/6AoF7dIDe00yYSXHT+iS86htBUHnHrOivCapEoCtzcxU2fBwrIivOYTTy3CYjkjjvuoEWLFqWK1/EiNEWYN2/WutO++gr/hRdycORIAo0bx/060b2MEqtlPsnOEZhN2/H521BQOJRQ+Iy4pycavrKuYkzqs/xg/Z6zA+cwIG8QzX0to54SncgZWLsUcairbZ3VzA8WhaBeY58R0Fs8+r9nBMOJrckCINZbUZbYsKxIIrxBe59kZohwcx+ypY9wMy95FliwxM7seQ6Wr7IRCAoa1A/SsZ2HTh08nNcoeEKKjiE0lUdcx2gSKTAQ/20Citi1axfXXHMNa9asoVYpXVHHm9AAICX22bNJGz4c065duG+5hfxBgwgX81BdEWJ7GX047W/hdIxH4MXj66GP36SVHTXOhAnzhX0OL6aO4U/zH1zpu5on8oZwYaDssa3KnOrrAb63Hp5ksM5iIkefZFAjHNndFuLiQCjazT1jJiMjg32bDujOQG0oy+yIXEVzj3NxQBOdll5yGgaZv8zO7LkOVnxtIxQS/Ou0w6LT6KwTR3QMoak84jbrrMnF/oQJDCRumwCAl19+mZ9++onXX3+9VBvHpdDoCLeb5PHjSX7jDWRSEnmPP07hHXeAueLjJeV5GYXYS7LjeX38piYFnsfx+m6jKsZv/Pj50PkeE1Ne4oBpP9d5OvJY3kBOCzUsNU5VrimRwG8mJaLVY+JXfSt1s5ScH9CmVheJT904rek5Ks8hEN9btHGdxXbEBgtCCmSNMOHm2oSCA5cE+HyDtjh09Ror4bDgzNMDdGqvda+ddcbxvZGaITSVxwmxjiaR2wQAXHPNNYwYMYKWLUtfG3I8C00R5q1bte60FSsInHsuB599Fv/ll1fIZkVeRrPpR5Kdw7BaviIYbER+4TACwWsrlJ7yki/ymZL8Gm8mvU5ABLjVfQcPF/QhI5xxVNjjbfHiASFYHyE831lMePVJBvUiZrdd5g/RKBgul5yXmecDAmWFDbHYrrV69urucc73I1v4yGns5397Tcxe6ODrdVakFDQ6K0DH9tpEgjMahkq3XUUYQlN5nBBCczxwIggNoHWnffEFqcOGYf77bwq7diVv8GDCtcvnBbnila7EZvmCZOcITKYd+PzZFBQ+RSh8egVslhs9FrQAACAASURBVJ89ym5eThmH6vwAu7TTs6AX97rvxykP73x6vAlNcQLATxbl0ASDtVYTu/TutqSw5JJA0SSDIJf6Q6RFUSXFlOcwiE1mbb+dJTbEOisiJJApYeS1PnIb+/k8JPjgKxtrv9O6uM9rFKCTLjqnnXp8iI4hNJWHITRRcsIIjY4oLCR54kSSX3sNabWS378/7h49OORzJEriV+l6cdqn4HS8jMBPofdeCr2PImVVOA+F301beT71ORY45pIZyuKR/H50L7yFHOUA/bMeYdyeiWSGs6okbbEigb9NgrVWE+ssZtZaTWyyKISFQEjJ2cHwodltTfxBGpawpqdCzzlPIFYWucexI/7RWztnB8i/zM8Sq2TKr1a++UGb0H3RedpW1Te081L/lKoTHUNoKg9DaKLkRBOaIky//07a009jX7yYwNlnc3DkSPz6wtdoiPfXvSJ2k+Qci936EVLWosAzAK/vVqhcJy6H+NayljGpz/KtbS3/Cp5O/eCprLKt4Fb3HQzPG1UlaYoHbgHfWbTWzrdWM99aTeTp3W0ZofAh9zmN/SEuCISoH6/nLEH8Yj689cEaK8IvkI4whY0DrKkR5q1/TMz7TROdSy/y07GdhxvaeTilTmLGd0vDEJrKwxCaKDlRhQYAKbEtXEja0KGYd+ygsHNn8p56inDdumVGTVQ3ktm0kWTn01gtawgEz6Wg8BkCwegFMJ5IJIvsCxidMpw/LNsBMEkTI3JH08bXnvRw/GbxVRVh4Bd9Tc9afaxnu1kTd6uUNJYKFxV6D7V84uY41C0Qq7UFo8oSG+JPbQTJf2qQH+uG+PCgiQ93mfEIuOwSH506eP+/vTsPj6o8Gz/+PbNmByGALAIKVpbIKgkJmKqVpWjV+tpHrFq1tlZrtVr9tbxqFRUtldeNiisVXNFHja/W7VWryBYQQRQKtri0CsguhCST2c75/XEmQ1aYLCeTSe7PdeWSmZyZeR4G555nu29OmxxwJJVVXRJo2o4EmgSldKCpFgiQ/eCDZM2bh+V2U37ttZT/4hccKn2Ds+sVFn7va2RlzMLt3kJV6IeUV/4R0xzg0Osd2k05f0BnLiJqRO35qNjHyPfCQxgfLGJ8qJD84HiOSEK5BCfsrnGmZ11GGmsMk1CNxKHV6XNOCEU5LmLS4kw0FvCVG9fiWNBZ4ceoMjB9Fv/pF+W1sMGz+9x84bYYPy7Ej6YGOG1SFbndnQk6EmjajgSaBHWIQBPj/s9/yJk5k/S33yY8aBBls2YRLC5u8Nq2WRgPkJH2GBnpf8EgQmXVL6gMXI1F2/0b3OnawUm9Cgkawfh9PsvHpeWXsd77KWt8qwm4AgAMCQ9lfLCIglARBcHxLc6t1h7k5uaydffuRhOH5pgWY2rkbhsTjtLMqtEHBcD40I/rvVhZ68/t9cN9XUze91qUBNws91mMLAhxxtQAP5wUoFvX1vt8lUDTdiTQJKgjBZpq/r//3Z5O+/e/CUybRtnMmUT71s6A3JY7sFzGdjIzZpPuf4Go2YOKyj9QFVK0xfrNzTn/jc58jrARit/ntXyoivO4rexOQoT41PcJq3wrWOlfwVrvR1S5qjAsg6GRYXbgCRaRHyogJwkHVFuqofe5ZuJQe62nduLQYWEzlsXADj59W5o49JvqZKB+jGV+jAoXUZfFx+kWr0ddLPZb5E4Icsa0AFNOqaJrItvpDkECTduRQJOgjhhoAKiqIuuRR8iaOxeA8quvpvzyyyGWeSEZW3097k/IyrgZn/cjwpG82PrNeEdf8/TcyWzy1S8PMTQ0nNd2v13v/iBB1vs+odS3nFX+Utb61hA07MAzLJzH+FAR44OFjAsVkJ2knXVNkej7XGYQO9NjB5+1XjeVNRKHVk+1jYslDm12ibwQGB/Z6XGM99NwbbKfaafH4l2XwWK/RXRikB+cEWDyyVXkZDf9c7dTBRqllKG1TlZ0arVAYyz34bmwu12AyW8ReWqPXQOjFThVjwbgnXfeYc6cOViWhWVZ/O53v2PatGn12tBhA02Me+tWezrtjTeIDBzI/ttvJ3jKKUk8U2Lh971KVvos3O5tVIVOo7zyJkyzv+Ov3Jw+B6niE986VsZGPB/71hAyQrgsF8PDxzM+VERBLPBkWVkOtbz5mvs+100cutrnZkt14lDTYlS4lRKHbnfZB0XfS8Na7MdT7iKKxRq3wQdpFmX5QY4/N8Cpp1SRleCcXmcLNPdqra89zDVHAU8CR2JvIHlUa31/nWsM4H5gGlAJXKy1XnuYl2+VQGMs9+G5qBtG4OByoZVuEnlib6sEG6fq0ViWxfDhwykpKWHIkCFs3LiRs846i88++6xews6OHmiq+ZcsIeemm/B+8QWByZNxz53L7qSObgNkpD1MZvo8wKSy6pdUBq7CwrkP69YIrlUE+Ni3llX+Ulb6VrDOt5awEcZtuckLj4it8RRyQiifTCuzlVrefK35heLbGpsM1tRJHDo4HD1YFjvczMShETA+9mK8l0bVm2lkbbZHO7sNWOK32DkiTN+fVnDiaVVkZDT+edypAk0ilFK9gd5a67VKqWxgDXCW1npjjWumAVdhB5oC4H6tdcFhnvqwgcZ9cw7GxkMPgI2VPgyr/t+5ZVhY4xsPNNawMNHbklePxrIs8vLyWLhwIePGjWPlypVcf/31LFu2rN7jOkugASAUImv+fLLuvRfDNDlw5ZWUX3EFNDHHXGtyGdti6zcvETV7UlE5g6rQT6Dle6HqcWIUFzACfOxdw0r/Clb5SvnE93E88IwIj4xvLhgbGlcrS0FbcXLkGjDgE6+71tbqfa6DiUMPTrdFGdmcxKG7XbDYz76SdDJW+cmqMjCB9V6L/3wvQtezAoz8WTnpWXZeRpUb5fJ30nC1YPal3QcapdRftdaXxv5sAI9prX/RlBdRSr0CPKC1fqfGfY8Ai7XWi2K3/wmcpLX+9hBP1SqBxlXaeCZlszDY6O+aE2haux7N0qVLueKKK8jIyKCiooInnniCE044od7jOlWgiXFt20aPOXNwa02kf3/233orwUmTSGZ6Xo97LdmZt+D1rCUcGRFbv2lZPre62mK6sNKo5GPvR5T6V7DKX8qn3nVEjAgey8OI8Ch7O3WwkDHhcaRbzgf4tpwitYAvPC4+qhF8Pq+ROPT4WOLQ6uBzZFPO9JhgrvOy7ZkMjPfT6L/DhRuD/YbF5qOiTPh6DNnsqvewaJfu7NhYfy2uMckONInks4snedJaW0qpJhXtUEoNBEYDq+r8qi/wTY3bW2L31Qo0SqnLgMtir09ubv2kgoFAIP7nRAKB95jeGMEGRjR+i8iLew77+Ka46aabyMzM5JJLLmnyYx966CE2b97MCy+8AEAkEuGBBx5gwYIFjBs3jtWrV3PFFVewePFiMjNrT2e4XC5cLhdZWVkN/p11SLm5GIsWEb70UtzXXEP3Sy7BnDKFyD33wODBSWrUZLAmYfI8Hs+NHJHzYyzOwTLuBKN1zt94PJ42eY/7058zORuA8nA5q4xSlro+YJlnCY945/Fg9ly8lpcTrHwmWsWcaH6ffGs86bR+4GmrPlfrAcS3d1iwJ2Sx0rBY6TIp9bh42utmfmx2dIAFhaaL8ZZBoWWQZxl4DvUZPxl6TgaIEt4V5dOHXex70UX/ze4GgwyAe/+elPr/OpFAs1sp9QtgBVAIJPxJrJTKAl4CrtFa140ADf3N1/sqoLV+FHi0+vcNfYuJRJqW3jvy1J5G12ha02233cZXX33FwoULm1z0bMGCBbz88ss8//zz8TID//jHP9ixYwfjxo0DYNy4cWRkZLB582ZGjRpV6/GmaWKaJuXl5e064WJry83NZXdeHrz5JpkLFpB99914R4+m/PLLKb/qKqyMtp/msZ0KFJGZ/hAZaQ9iWK9RWfUrKgNXYtGyNY9kbYAYyWhGMprfcA3lRjlrfKtZ6bNHPHd7/8wc95/wWT5GhcbENxeMDo1pdsnqmtpDItGC2M9vgRCw0euKl8Ve7HPznKd24tBxNRKH5jQ26DFgwBX2TzgEHN346zel/7ERTdIkEmguwh5RXAn8E/hZIk+slPJiB5lntNYlDVyyBTiqxu1+QP15MQdYE0JEnthbe9dZK20EqDZ79mw+/fRTnnrqqSYXPXv66ad5+umn0VrXKnrWu3dvvv32Wz7//HMGDx7M5s2b2bVrFwMGJOdkervm9VJx2WUEzjqLnFmzyJ47l/QXX6Rs5kyqpk1L0nRaBhWB6wgEp5OV/icy0+8nzf98bP3mv3Bi/aatZFlZfD94Mt8PngwH7DIHH/k+jK/xPJB1H3Oz78Fn+RkdGkNhcAIFoUJGhkbjp2n/f7RHPmBU2GRUOMQvK+xvzFvdRmyqzcNHXjf3Z/kwDT+GZTEkUrNIXIQBDZzp8TaeACPlJLJG4wZ+gj16fAgYrbVefZjHGMATwF6t9TWNXHMa8BsObgaYq7U+3OR1SpyjcbIeTUlJCfPmzcOIfVBef/31TJ06td7zdMY1Gmj8m67vww/pcsMNeDdtoqq4mLLbbyeStOk0m8fzEdkZt+D1rCMcGc2ByplEIvXX2w6nPXy7P5wyY3888Kz0lbLRuwHLsPBbaYwJjaUgWEhhaAIjQqPwcfhP2FToc10VBqz1HiyLvcbn5kDsTE+PGmd6xoaijAhH8QN9+jZe7nzb1jUJv3ay12gSCTSLgA+An2mti5RS72qtTz3MYyYCS4H12NubAW4A+gNorR+OBaMHgKnY25sv0Vp/dJj2pkSgaQ8k0DQgEiHjqafIuesujECA8l/+kvLf/hYrK5nnREzSfCVkZvwJt2s7VcGzKA/cgGn2PfxDY1LxQ3e/sY/Vvg9Z6bcPkG7ybMQyLNLMNMaGx1EQLGR8sIjjwyMbDDyp2Oe6qhOHVu9s+8jn4d+x6TafZTEiHGW1f2ijj0+lQJPI1FmPWGBQiT6p1noZh+lU7NDnlYk+pxAt5vFQecklVP3oR2T/6U9kP/ggGSUl7L/5ZqrOOCNJ02kuqkLnEAz9kIz0B8lIexi/7y0qq66gIvBrIFlrSs7qYnXl1OBkTg1OBmCf8R0f+lfF1nhWcE/OXQCkm+mcEMqnIGQHnrzwCLzNP7/frriAIRGTIRGTCyvDAOxyGaypsa16e69cjtxRP6Bu75U6GwEgsRHNs8ArwDXAXcDZWusL26BtDZERTYJkRHN43jVr6HLjjfjWrydYWMj+WbOIDBnicAsPzeXaQlb6HaT5XyVqHkl55Y0EQ2dxqPWbjvDtvq69rr2s9q1kpa+Ulf4V/Mv7GQCZZiZjQ/mc4jmVEd+NZHj4eDzNKgadGn7cPYPVPjdWA1+Ctm5L/KhFKoxofg78AliLvWBf/1ShECkoPHYsu19/nYxnnyVn9mx6TJ5Mxc9/zoHf/Q4rJzn5vkyzH2UVDxEI/pysjJvpknUV4cjjHKi4jUh0TFLalAzdzG5MqZrGlCo7vdIe1x4+9JXGMxfM9NwIPSDLzLJHPMEixoeKGBYe3qECzyPfBbgvy8fzmT5MDELJOxLWIolscxmitX4AuBVIBwY62iIh2pLbTeWFF7Jj6VIqp08nc/58ehYXk/7ii5DEhLPhyDi+K3udsvJ7cLm20q3Lj8jJvAqXcajzzB1Xd7M7P6w6nZn77+CtXe+zOfQ1c/c+xJmBs/nG/TV/7jKLH/eYxtgj8/hlt4uYn/kwG7zriZK80sutoadpcWdZkNId5VxiukizLHyplQgZSGxEczfwA+A27E0BC7DP0wjRYVjdurH/rruoPP98utx4I0f89rdkPPOMPZ02fHiSWuWiKnQuwdBpZKQ/QEbao/h9b1IRuBK/73W8nk32ZSb0jNU2C0eG811Z4ifGU1VPenFa1RmcVnUGALtcO1npK2WV304S+l7auwDkmF3IDxbE13iGRIbhSsFt5D1Ni7lRD5fv2s99WT4+8qXWqC2R1mYopfyAX2u9SCl1udONEiJZwiNHsvvVV0nXmpw77qDH1KlUXHQRB/7f/8Pqkpy6LRZZVARmEAj+lKz0O8jK+B9MMwPLcmMYB7+xW5aPcDO2R3cEPcye/KjqTH5UdSYAO1zbY9Nspaz0L+fddDv4djG7kh8cT0GokMJgEd+LDEmpwFM9woHGU2W1R4kEmqexNwPcopRKA75ytknO6zVqMu5d9RMcRHt0Z8e6ln8bdLJMwLvvvsucOXOIRCJ07dqVe++9l/79nU9F36m4XASmT6dq6lRy5swh84knSH/1VcpuuIGAUtDELA+txTT7U1bxCIHgSrIybsDr+WfdhlMZaPDYWqfTyzySMwI/5ozAjwH41rWNVf5SVsU2F7yT/hYAR0SPID9UGBvxFHJs5LiUCjypolMWPmutQ1CNcapMwL59+5g4cSKvvPIKgwYN4qWXXqKkpIRnnnmmXhtk11nr8WzYQJebbsK/ejWh0aPZf+edhEeMaNXXaLooXbKm4/OuwDDs5STT7EUg+DNCkQlEIqOgg2wDbkhL3+dt7q3xdDkrfSvY4rHTLnaLdiM/NJ7xscwFx0a+h5G8zVq1SJmAtnPYQJNz8//g3fivQz6Jv7TxYBIsbDwIhYd9j7Lbrk+wqQe1VpmAdevWce211/L+++8DdkDLy8tj/fr1dOvWrdbjJNC0Mssi/cUXybnjDly7d1N5/vmU/eEPWHX+3tuSy9hB966FGEYQy3ITiQ7C67H/7ZtWBuHweMKRIkLhCUSiw2mLEtNtpbXf5y3ub2oFnm2erQB0i3ZnfGx9pyBUxKDI4KQFnlQONI1OnSmljtBaf9eWjemITNPkySefZPJk+2Ba9cilWlpaGo899lijj1+wYAGTJk0C4JhjjmHnzp2sW7eOUaNG8fLLLwOwdevWeoFGtDLDIPCTn1A1ZQrZd99N5oIFpL/2GmUzZlD505+Cu+0/xE2rF4HguaSnPU0geAHllXdiGHvxeUrxepfj8y7H73vPvtbsQjhSSCg8gVCkiGj0OJL4udPu9IsexTmBczkncC4WVjzwrIxtLngj3U4BlRvtEd9YMD5YyNHRQe1mxNOeNTqiUUo9DBwBbAbeBlZorZuWJrn1pcTUWU033HAD27dvZ/78+U3O4PzQQw/x+uuv88ILL8QzOC9ZsoR77rmHYDDIySefzMKFC3nppZcYOrR2qgoZ0TjL89ln9nRaaSmhESPYf8cdhMe0/TkXl7GD7kdczZ7v/oJp9Wzg99vxekvxeezA43Z/DYBp5hIKFxGKTCAcnkDUHEgqBZ62rUdj8bX7P/EEoSv9K9jh3g5Az2gvO11OLDv1wOjRjgWeVB7RJJIZ4FhgElCE3dBVQInWeovzzasnpQLNbbfdxqZNm1i4cGGTMzgvWLCARYsW8fzzz9fK4FzTrl27KCgoYMOGDWTUSX8vgaYNWBbpr7xCzu23496+nYrp0zlwww2YNTZ9tIWm9Nnl+hqfZwU+73K83uW4XTsAiEb7xINOKFLUpFxryZDMbAgWFv92f1Ur8Oxy7wTgyOiR8cOjBcFC+kcHtFrgSeVAc9hdZ1rrzdijmgdjmZwLgN7Yaf5TUrRH90Z3nbUWJ8oEAOzcuZOePXtimiazZ8/mggsuqBdkRBsxDAJnnUXVqaeSfd99ZD72GOlvvknZ739P5QUXgKf9nXUwzf5UhfpTFZoOWLhdX8SDjt/7Lun+WJG96MBY0JlAKFyEZfVIbsPbEQODo6PHcHTlMZxXeQEWFl+5v4hNs5Wy3L+UVzLsyii9I31iW6ntzQVHRTvnDtEOtxmgPXCyTMD111/P6tWrCYfDFBcXM3PmzPhr1CQjmrbn+fxzezpt6VLCw4ax/847CcWK1Dmp9fps4nFvwutZjs+7Aq9nJS7XAQAikeNiQWcC4ch4LKtrK7xe87Xn/G4WFl94PmdV9RqPr5S9bvuLbd9Iv4NrPKEi+kb7Jfy8qTyikUDTQUmgSRLLIu3118m59VY827ZRec45lN14I2bP+usnrcW5PkfwuNfji20s8Ho+xDCqsCyDSPT4WNCZQDic3+IqoU2V9Pe5CSwsNnv+FZtmW84qXynfue19VkdF+scDT0GwkD6NTFnudO3gup5Xc8/Ov9DDbPq/pXYfaJRSU7XWbymlBgPXAs9rrZe0Sevqk0CTIAk0yWVUVpI1dy5ZjzyC5fdz4LrrqLj4YvC2/tmWtutzEK9nXWzEsxyvZw2GEcayPIQjowjHRzxjoRXKNR9Ke3mfm8PEZLPnn6yMbaX+0F/KPtc+APpHBjK+xuaCI83eANyc898synya8you5LayO5v8mskONIlMIl8PvIVduOwxYC7g/HyAECnMysjgwIwZVCpFl5tvpsvMmWQ89xz7Z80iVJiqqQL9hCMFhCMFVFb9Dgjg9ayOj3gy0v5CZvr9WJafcOSE+Fbqjn54tKlcuDguMpTjIkO5qOLnmJj807MpHnjeSn8DnbkIgIGRoxkRGs0b6X/DNExeynyeq8qvadaoJpkSCTTZSqn+QFRrXaqUqnC6UaLlUmxKtMOKHnMMe596irS33ybnllvIPeccKs86i7KbbsLs3TvZzWuhdMKRYsKRYioCYBhleD2rYqOd5WRl2MXLqg+PVu9qi0SH0ZEOj7aUCxdDI8MZGhnOJRW/IEqUzzyb4glC30z/GxHDLowWxeQvWfc1a1STTIkEmj8Bs4BZsVxnK51tUtNZloVlWRhJqZDY/liWRTSa2unROxTDoGrKFKqKi8l+8EGy5s0j7Z13OHDttVRcein46pcqTkWWlUMoPIlQ2D5gXPfwaHb88GhXQpFCwrFzPNHo90ilMzxOc+NmeCSP4ZE8Tg+cyUm9Do6Aw0YoJUc1iQSao4EZWuvqxZEZDranWdLS0igrK6NLkrLrtjf79u3jm2++keDb3qSnc+C666g85xy63HILXWbNsqfTbr+dUHFxslvX6iyrG8HwaQTDpwHVh0ftMzw+z3LSfG8CEDV7xINOOFyUcodHnfRA1n2Y1J6dSMVRTSKB5kvgLqVUF+BvwIta673ONqtpfD4fu3btYteuXbgbSQXicrkwTbONW9b2LMti79697N+/H5fLVb0IKNqR6IAB7F24EP+779LlllvIPe88AtOmUTZzJtG+7fugZEuY1pEEQ2cTDJ0N1D88muZ/BahzeDQ8AdPqvP+G1/rWEDZCte4LGyHW+j5KUouaJ+HtzUqpbsDDwFTg/4D7tdbLHGxbQxrcdVZtw4YNfPnllw1OG2VkZFBZWelk29oVt9vN6NGjOeqoo5LdlDaVcruRqqrIeuQRsubOBcOg/OqrKf/Vr6AJh3xTrs8Nqj48uswe9XhW4HLZW4Aj0aPjGQtC4QlYVm4H6XPTdOhzNEqpHwLTsfOe/Q14AbvBr2it23q8f8hAcyjyD7NzSNU+u7dsIefWW0l/4w0iAwey//bbCZ5ySkKPTdU+H1rNw6PL8XpX4jLKAYhEhuD2/ID9B8bGDo92jinzVA40iUydHQ/coLXeWvNOpVT9nPYHf/c4cDqwU2ud18DvT8IuplZdRK1Ea31boo0WoqOJ9uvHd489RuUHH5Dzxz/S/cILCUyZYk+ndcrCdi4i0eFEosMJBC/DPjz6Kb7YGo+bx+maPQ/LcsUOjxYl7fCoOLxEAs3zwB+VUlnARcAlWuv5Wuu65f1qWgg8ADx5iGuWaq1PT7ilQnQCwe9/n13vvkvWY4+Rdd999Dz5ZA5ceSXlV1wBsQzenZOHSHQMkegYKqt+Q273bPbtfye+lTojbT6G8VDs8OjoGodHx+D04VFxeInkrf8rcB/QR2sdBc473ANimQPa1YYBIVKGz0f5lVey84MPqJo8mZy776bnKafgf/ttu5SmAMNPODKeisB17DtQwq7vNvJd2SIqqy7HMCJkpM3liJyf0OOI4XTNVmSk3Y/H8xEQTnbLO6VERjRurfVnSqnq261VULtQKfUJsA24Xmv9j4YuUkpdBlwGoLUmNze3WS/m8Xia/dhUJX1Ocbm58MILhBcvxn3NNXS/5BLMqVOJ3H03DB4cv6xD9TlBDfe5P2DvaLOs/VgsxWAxXu8H+Lz24VGLLGAilnEScDIwAozW+khzViq/z4kEmvdiRdD6KKXuB95phdddCwzQWpcrpaYB/wsc29CFWutHgUdjN63mLoZ1zAXTQ5M+dxB5efDmm2Q+/jjZ99yDd/Royi+/nMjAgWTffTfGtm24+vThwIwZBM4+O9mtbROJvc/jYz8zYodHV8QPj3rcbwE1D4/aU21R81ja6xmeVtgMkDQJbW9WSuUBQ4F/Al9rrfcl8JiBwGsNbQZo4Np/AydorQ/3tyi7zppA+tzxuHbsIGfWLDJKSrAMA6PG/79mejr777qrUwSblr7PtQ+PLsPttstr1T48OoGoOYD2Eng65K4zpdQLwPla65DWegOwQSl1HHZZ5/yWvKhS6khgh9baUkrlY0/H1a9EJoSoxezVi31/+Qv+pUtx79pV63euQIDs2bM7RaBpqYYPj1ZvpV5R4/Bo3xqHR4s69eHRljjU1NlzwBtKqXO01vuUUlOA24GfHe5JlVKLgJOAXKXUFuAWYulbtdYPA+cAVyilIkAAmK61llVOIRLkauSbrXvrVrpdeCHB4mKCJ55I5LjjQNIQHdbByqPnUfvw6HL83rdJ92ug5uHR6sqjqblm0tYOOXWmlCoC5mBnAijCDgiHnTZzkEydNYH0uePqmZ+PZ+vWevebmZmYvXrh+fJLAKK9ehGcODEeeMxevdq6qY5o2/fZxOPeaK/veFbUOTw6NJ6xwOnDo6k8ddZooFFK3Q5YwBBgMvAQsb2BWuub26qBdUigaQLpc8eVXlJCl9//HlcgEL+v5hqNe8sW/EuX4l+yBN+yZbj32qcNwscdR/DEEwkWFxMaPx4rMzUPNyb3fa4+PFpdAG51rPKoq1bl0VA4H8hotVdN5UBzqKmzd2P//Tswrw3aIoRIMvR92QAAEGxJREFUUPU6TPbs2bi3bSNaZ9dZtF8/Ks87j8rzzgPTxLNxI2lLluBfsoTMp54ia/58LK+X0Nix8cATHjkSGklKK2qqeXj0KuzKox/XODz6GIbxIJblrXF4tKhTHx5NOKlmOyEjmiaQPncOTe5zIIBv9eqDI54NGwAwu3QhWFQUDzzRgQPb7fpO+36fK/F5VtfYSv0phmFiWWkHK4+GJxCJjiSxEya2VB7RSKDpwKTPnUOLt/ru2YNv2bJ44Kle+4kcdZQddE48keDEiVjdurVWk1ssld5nu/LoyviIx+vZBIBpZRIOF8R2tU2MVR5t/PCoBJq2I4GmCaTPnUOr9tmycH/5pR10li7Fv3w5rgMHsAyD8PHH25sKJk4kNG4cpCVvGiiV32fD2FOr8qjH/QVQfXi0qMbh0cEckTMFr6d+0pRwZDjflb2d8GsmO9AkPm4TQnR8hkF00CAqBw2i8uKLIRLBu25dPPBkPfww2Q88gJmWRqigIB54IsOGgSs1Urkkm2V1Jxg+nWDYzinsMr6tdXg0zfcGAFGzJ5aZgWW5MYxojcf7CEdOSErbm0tGNB2Y9LlzaMs+G+Xl+EpL44HH+69/ARDt3j2+thOcOBHT4UqhHfd9tupUHl2C21X7LLtlpbFnXymm1TPhZ5URjRAiZVhZWQQnTSI4aRIArm+/PTjNtnQpGf/7vwCEBw2yg05xMaHCQqzs7GQ2O4UYmOYAqkID4odHczJ/jd/3OoYRxbJ8BILnNinItAcyounApM+dQ7vps2Xh+ewz/EuW4F+6FF9pKa6qKiy3m/Do0QcDz6hR4PW26KXaTZ/bgMvYQfeuhRhGsFmjGZARjRCiozAMIkOHEhk6lIpf/QqCQXxr1sQDT9a995J9zz2YWVmECgvjgScyaFC73UbdHphWLwLBc0lPezolRzMggUYI4RS/n1BREaGiIg7MmIHx3Xf4V6yIB560d+yKI9HevQ+u75x4ImaK1lxxUmXgGtLTvqQycE2ym9IsEmiEEG3COuIIqk47jarTTgPA/Z//xM/upL39NhnaTlwZHjYsHnRCBQVYnbqEtc20emG5/o5ppeZ0oQQaIURSRAcMoHLAACovuACiUbzr18dHO5mPP07Www9j+XyExo2LB55wXp6kyUlBshmgA5M+dw4dsc9GZSW+Vavigce7KXaavmtXghMn4p02jT2jRxPt3z/JLW07qZwZQEY0Qoh2x8rIIHjyyQRPPhkA186d+Jctiwce92uv0QuIDBx4cH2nqAira9fkNlw0SAKNEKLdM3v2JHD22XZ2assid/duAq++in/JEtJLSsh86iksl4vwyJEHyyCMHQs+X7KbLpBAI4RINYYBQ4dS0aMHFZdeCuEwvo8/tkc7S5aQNW8e2XPnYqan29uoY4FHqo0mjwQaIURq83oJ5ecTys/nwPXXY5SV4S8tjQeetPfeAyDas2ftaqNHHpnkhnceEmiEEB2KlZND1ZQpVE2ZAoB761Z8sW3U/sWLySgpAWLVRmOBJ1RYmLLVRlOBBBohRIcW7duXwPTpBKZPr1Vt1Ld0KZnPPEPWX/9au9roiSfa1UY98vHYWuRvUgjRebhcRPLyKM/Lg1//una10aVLyZkzB+bMwczJIThhQjzwRI8+WtZ3WkACjRCi80pPJ1RcTKi4mAOAa+9ee5otNtWW/uabAET69TuYrWDiRMx2VG00FTgSaJRSjwOnAzu11nkN/N4A7gemAZXAxVrrtU60RQghEmV260bVmWdSdeaZdrXRr76Kn91Jf+01Mp991q42mpd3MPAkudpoKnBqRLMQeAB4spHf/xA4NvZTADwU+68QQrQPhkH0mGOoPOaYg9VGP/nkYDbqRx4he948rLQ0gvn58cAj1Ubrc+RvQ2u9BNh7iEvOBJ7UWlta65VAV6VUbyfaIoQQrcLjITx2LOXXXsuekhK2b9zInieeoOL883Fv306XWbPoOWUKvUaNouuvf036c8/h2ro12a1uF5K1RtMX+KbG7S2x+75NTnOEEKJprMxMgqeeSvDUUwFwbd8eX9vxL1tGxiuvAHa10VDNNDmdsNposgJNQ9s3GszuqZS6DLgMQGtNbjNrVXg8nmY/NlVJnzsH6XM7kZsLeXlwxRVELQtz40aMd9/F/fe/k6E1mQsXYrndWPn5WD/4AeYpp2Dl5ydcbbRd9jlByQo0W4CjatzuBzSYlllr/SjwaOym1dzspR0xw+3hSJ87B+lzO9WrF5x/vv1TXW00tqPNe+eduGfNql1t9MQTiQwe3Og26lbI3pw0yQo0rwK/UUo9h70JYL/WWqbNhBAdU81qo3/4A8a+ffiXL48HngarjU6ciNmjB+klJWTPno172zZ69unDgRkz7OSiKcSp7c2LgJOAXKXUFuAWwAugtX4YeAN7a/Pn2NubL3GiHUII0R5ZXbvWrjb69dcHS1zXqDYa6dMH986dGJEIAJ6tW+ny+98DpFSwkcJnHZj0uXOQPncw0SjeDRvwL1lC9r33YgSD9S6J9O3Lzg8/TPgpk134TDZ7CyFEe+J2Ex45kvKrroJQqOFLmvmFO1kk0AghRDsVbWQRv7H72ysJNEII0U4dmDEDMz291n1mejoHZsxIUouaR5JqCiFEO1W94F+96ywqu86EEEK0tsDZZxM4++yU3gAhU2dCCCEcJYFGCCGEoyTQCCGEcJQEGiGEEI6SQCOEEMJREmiEEEI4SgKNEEIIR0mgEUII4SgJNEIIIRwlgUYIIYSjJNAIIYRwlAQaIYQQjpJAI4QQwlESaIQQQjhKAo0QQghHSaARQgjhKAk0QgghHCWBRgghhKMcK+WslJoK3A+4gfla69l1fn8xMAfYGrvrAa31fKfaI4QQIjkcCTRKKTcwD5gEbAFWK6Ve1VpvrHPp81rr3zjRBiGEEO2DU1Nn+cDnWusvtdYh4DngTIdeSwghRDvm1NRZX+CbGre3AAUNXPdfSqli4F/AtVrrb+peoJS6DLgMQGtNbm5usxrk8Xia/dhUJX3uHKTPnUMq99mpQGM0cJ9V5/bfgEVa66BS6nLgCeCUug/SWj8KPFr9HLt3725Wg3Jzc2nuY1OV9LlzkD53Di3pc58+fVq5NU3jVKDZAhxV43Y/YFvNC7TWe2rcfAz4s0NtEUIIkUROrdGsBo5VSh2tlPIB04FXa16glOpd4+YZwCaH2iKEECKJHBnRaK0jSqnfAP+Hvb35ca31P5RStwEfaa1fBa5WSp0BRIC9wMVOtEUIIURyGZZVd+mkXbO2bdt2+KsaIHO6nYP0uXOQPjdNbI2mobXzNiGZAYQQQjhKAo0QQghHSaARQgjhKAk0QgghHCWBRgghhKMk0AghhHCUBBohhBCOkkAjhBDCURJohBBCOEoCjRBCCEdJoBFCCOEoCTRCCCEcJYFGCCGEoyTQCCGEcJQEGiGEEI6SQCOEEMJREmiEEEI4SgKNEEIIR0mgEUII4SgJNEIIIRwlgUYIIYSjJNAIIYRwlMepJ1ZKTQXuB9zAfK317Dq/9wNPAmOBPcC5Wut/O9UeIYQQyeHIiEYp5QbmAT8EhgHnKaWG1bnsUuA7rfVg4F7gz060RQghRHI5NXWWD3yutf5Sax0CngPOrHPNmcATsT+/CPxAKWU41B4hhBBJ4tTUWV/gmxq3twAFjV2jtY4opfYD3YHdNS9SSl0GXBa7jj59+jS7US15bKqSPncO0ufOIVX77NSIpqGRidWMa9BaP6q1PkFrfULsMc36UUqtacnjU/FH+tw5fqTPneOnFfqcNE4Fmi3AUTVu9wO2NXaNUsoDdAH2OtQeIYQQSeLU1Nlq4Fil1NHAVmA68NM617wKXASUAucA72mt641ohBBCpDZHRjRa6wjwG+D/gE32XfofSqnblFJnxC77K9BdKfU58DtghhNtqeFRh5+/PZI+dw7S584hZftsWJYMIoQQQjhHMgMIIYRwlAQaIYQQjnIsBU17oZR6HDgd2Km1zkt2e9qCUuoo7PQ+RwIm8KjW+v7ktspZSqk0YAngx/53/aLW+pbktsp5sSwcHwFbtdanJ7s9bUEp9W/gABAFIrGjDx2WUqorMB/Iwz4C8nOtdWlyW9U0nWFEsxCYmuxGtLEIcJ3WeigwHriygRRAHU0QOEVrPRIYBUxVSo1Pcpvawm+xN9x0NidrrUd19CATcz/wltZ6CDCSFHy/O/yIRmu9RCk1MNntaEta62+Bb2N/PqCU2oSdiWFjUhvmoNjW+PLYTW/sp0PvdFFK9QNOA+7A3rkpOhilVA5QDFwMEEvpFUpmm5qjwweazi4WZEcDq5LcFMfFppHWAIOBeVrrjt7n+4DfA9nJbkgbs4C3lVIW8IjWOmW3/SbgGGAXsEApNRL73/dvtdYVyW1W03SGqbNOSymVBbwEXKO1Lkt2e5ymtY5qrUdhZ6LIV0p12DU5pVT1uuOaZLclCSZorcdgZ4e/UilVnOwGOcgDjAEe0lqPBipw/sxhq5NA00EppbzYQeYZrXVJstvTlrTW+4DFdOy1uQnAGbGF8eeAU5RSTye3SW1Da70t9t+dwMvY2eI7qi3Alhqj8xexA09KkUDTAcXKLfwV2KS1vifZ7WkLSqkesd05KKXSgVOBz5LbKudorf9ba91Paz0QO8XTe1rrC5LcLMcppTKVUtnVfwYmAxuS2yrnaK23A98opY6L3fUDUnCttcOv0SilFgEnAblKqS3ALVrrvya3VY6bAFwIrFdKrYvdd4PW+o0ktslpvYEnYus0Luy0R68luU2i9fUCXlZKgf359azW+q3kNslxVwHPKKV8wJfAJUluT5NJChohhBCOkqkzIYQQjpJAI4QQwlESaIQQQjhKAo0QQghHSaARQgjhqA6/vVmIhiilvg/cgv1lKwr8UWu9Qim1H1iLnSvt50Af4FSt9U2xx80EFmutF9d4rgzsdDDfiz3uUa31Ey1oW1fsBKGd6qCt6LhkRCM6HaVULnArcJbW+iTgLCAQ+/V6rfXJwHXYecQScQvwQey5JgJftbCJXYGzW/gcQrQbMqIRndE04Onq/G9a6wPAx3WuWYedMy0RRVrrP8Sey8Kui4NSai52yYIy4Hzs5Kanaq1vUkpdHHvsYuBxYC9wNHAmcBkwSSm1GPiJ1npX07soRPshgUZ0Rn2A9QBKqZ8CvwZWaq2vr3FNMfDP5r6AUmockKm1LlZKXQBcTuMZtI/ATplzHvBfwKNA/86QUkZ0DjJ1Jjqjb7GDDVrrZ4ELgNzY745XSr2PHXxmA1XYVTurpXFwmu1QBmGv9YBdAXMwtevjGDX+vFFrbQJbsafNhOhQZEQjOqM3gBeVUlprvZ/a/x9Ur9EAoJQKAaOVUtVfysYAd9V5vhVKqfO11s/EEppOwM5JNTn2+xOAL4D92DnZAI4HPo39uW4ACgPulnRQiPZERjSi04mtedwKvKKUeg94EHiykWv3YJdbWAIsBV7UWu+tc9mtwPdjayrLgUFa6w+BgFJqKfBT4GHswNJHKfUG0OMQTdwOdFNKvaiU6tbMbgrRbkhSTSGEEI6SEY0QQghHSaARQgjhKAk0QgghHCWBRgghhKMk0AghhHCUBBohhBCOkkAjhBDCUf8fi2iLxNiFc0kAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEkCAYAAAAM+hfoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXd8FVXe/9/nltwkN4QAaRRpSg2ISBOWqoBgB2SUBVQELKs+7uo+rquuurq6rI+y8lMXFRCkiIwCyooFEUEERIqA9BqpaSSU3OT2+f0xk+SmkOSG3NTzfr3uK5kzZ858z5TzmfM9TWiahkQikUgklYWpug2QSCQSSd1CCotEIpFIKhUpLBKJRCKpVKSwSCQSiaRSkcIikUgkkkpFCotEIpFIKhUpLAEIIVoLITQhRP/qtqU8CCFeFEIcrm47LkVR+4QQ9wkhvFV07io7V31CCDHYeEdaVEJayUKI5yrDLknVIYRYK4SYXVocKSyFOQE0BTYDCCFaGC/R4Gq1qu6wBGheU88lhOhv3O/WIbEoBAghnhNCJFfhKTeivyOnq/CcklqGpboNqElomuYDUqrbjrqKpmm5QG5dO1dJCCHCNE1zV9f5Q4WRJ/mOSEolpDUW4wtwgxDiovHbKYS40dhXottJCHFYCPFiwLYmhHhMCLFECOEQQhwXQtwphGgohFhkpHtUCDEm4Ji8tH8vhPhGCJEjhNgvhBgkhGguhPjSSGuvEGJACcfl2XTC+Pu9EZ58iXxOEUKcLCGdhQFhk4QQqUIIYWwnCCHmCSHSjTxsEEIMDIgvhBCzhBBHhBC5Rh5fFULYSrnejY101gkhYkq7N0b824UQvxjX55wQ4mchRPeA/VcKIT4RQmQacXYJIW4x9jUSQiw07keuEOKAEOLJvPxd4nyF3FN520KI3wkhthvn2CKE6FHkuKFCiF+FEE7DhkHG9Z1QWecyainrjejHjPTXBhx/txBih2FDshBiuhDCHrB/rRBijhDiZSHEGeBUOa/xVUKIpca+LCHEKiFE1yJ56SGE+FoIcUEIkW2k0UcIcR/wMtDKsFcTAe9OkTTyXFjDhBA/GPbsFcb7GBCvgxBipXGebCHEf4UQV5WQTosg0+0mhNhoXL+DQgilBBujhBAzhBCnjHR+EUKMDtivCCHcQojeAWH3GGl2L5peCemX93m7zshLrnFPPhJCxAfsf1Ho5ZQihDhkpPOZECJaCDHaeBcuCiE+FUI0DDjuWiHEV0KINOPabhFCjChy7mQhxEvGdcgUepnxuhDCHBBnmPG8ZQohzgv9fe9dJJ02xrPkFPo7+ogo4sISQliMvBwz4u0RQjxYJJ1WxrOXa6TzWFnXGQBN00LyA8xAJjAdaGf8RgEDjP2tAQ3oX+S4w8CLAdsa+hfSvcBVwH+AHOAr4D4j7C3AATQpkvYR4A6gPbAcvfq+2rCjPbAUXTysJdkEdDe2RwOJQNwl8trGiNfB2J4MpAGnA+IsAD42/o8A9hrn72nk4VnABXQy4piAfwB9DLtuA84Afw9I80XgsPF/SyPNTwFbOe5PIuAGnjLs7wT8HugasD/VuF79gSuB24GbAvb/BbjWOH4CkA1MKsk+Y/s+wFtk2w/8AAwAOgKrjPtmMeI0N+73bKAzcAOwzbjeE0rJX1DnQn9ebzPS7WXkr3HAsVnARKAtMBDYBSwISH8tcBF417CzazmucQL6sz3TiN8B/Vk+i/GsAUnoz/Zi9GelHTAO6Iv+HE1Df4YTjV/UJa7HYCNvO4ERRjrzgXNATMBz+RvwHdDD+H2P/k6GFUmnRZDpngK+BLoZtm8x7utzRhxhnGst+vPWFnjAuH43BORjlnHPotHf4YvAY+Usk0p9BgKe6wvAR8Y96W/c6/VFnmsHsBK4GhgEpBtp5eVxAPr7868i9+Be4/loj/5+u4H2AXGS0Z+1p41reRfgpfB7NQoYa6SRhP5uZFJQ/glgB7pLvzdwjWHXeWB2QDrzjLwNR38+7zLu2+SAdLYb96qPkc63xvWZXeq1DqGwNDIeuMGX2N+a8gvLmwHbcUbYWyWc65Yiaf8xIE4vI+zJgLA84ehyCWFpUVoeith9DPiD8f8i4O/GDehshJ0EHgh4wE9iPMwBaawJzGsJ5/gTcKhowY3+cJ8C3gFM5bw/eXlvfYn9L6MXevYg7vkM4Nui9hV5sYsW9hpwbUDYdRQW6VfQXzZzQJwRVExYyjpX/5KuiXH+h4qEDTTiNjK21wIHA69/Oa7xi8BPRcIEekH3R2N7AXqhXeJ9BZ4DkstxbwYbtowOCEs0wm40tiejF/axAXES0F2K9xRJp6iwlJbuFPSPjkYBcboYcZ4LSMcJNCxi9wfAZwHbEcAeQAV+CdxXjmtQnmfgZfR3MywgTjcjzsCA++Ytcp3eAXwEfHyivw9by7BpJ/BskWdtRZE4XwOLS0nDhC5G443tYYa9VwXEaWzc29nGdht0ke1YJK3ngR3G/0ONdAKFL854HkoVlpC1sWialmVUu74RQqwB1gHLNU07UIHkdgakmy6E8KErbeC53ED8pY6jwC+8q4SwosdVhO+B69FrVEPQH7SewPWGvc3RhQMKvojPicKeIxsB7QJCiKnoL2VrwI7+ZV3UfRmH/gU2W9O0Pwdh7y7gG2C3EOJb9IJxmaZpee6/HsBGTdMcJR0shDChf4nfjS7A4YAV/Ys3GPK+dvM4ZfxNAA6gf91t0fT2rzw2BXmO8p6rGEKIOKAVMF0I8XrgLuPvVehfdADbNE3zB8Qp6xr3AnoIIbKLnDYC/WsV9PvwdZF0L4cdef9ompZiPJsJRlASsFfTtIyAOKlCiAPGvoqm2xnYp2laVkCc3UKI8wHH9wLCgFNF3okw4FDAcblCiLuM86Wi12CDoaxnIAld7PPbxzRN22nYmoT+rgGcCrxO6GVJiqZp6UXCAl1ocegfnNejv/8W9PemVREbdxTZPoUuBHnptAFeQq/5xaOXCZEB6XQGMjRNy++RqWlapnEf8+iJ/gxvLXK9LegCGZjOwYB00oukUyIhbbzXNG2qEGIGelVrGPCyEOJRTdPeQ1dLKHhB87CWkJSnHGEaxQtdT5H9lwqrjLamNcAMIUQS0AD42Qi7Af1GnQi40SZgH3qVtig5AEKIseji9DS6KF9Ar/6+UiT+OfQC7HYhxJuapp2kHGia5hNCjER/oYcCY4BpQoixmqZ9kRetlCSeBP4KPIFeXb6IXqO6uTznD8BfRDRKuidF7SjNrss9V1Hy9j2O/vFQlMDrXUiEy3GNTehup0dLSDew0K1ofkuipA4FpV1r0N/RsmwoLd3yHG9Cz3OvcqSd1wYag16wZpaRdiAVed5KCi+p/CmrTJqH7rJ+Ct3DkQt8jC6egRTNb9F0vgAygEfQ3aBu4Mci6ZTnegP0wyhzSji2PPet1MRDhqZpuzVNm65p2khgDrrfFHSfJECzvLhGA1lVdUctD3k32FxqLJ3v0KubfwJ+0DTNiy4sg9ALlTUBcbei+5AvaJp2uMgvrxvnQOAX49pt0zTtEHrNpSge9DagX4F1QoiiXz+XRNP5WdO0VzVNG4guYJOM3duA34mABuoiDET/kp6jadovhmi2u0Tcy2Ev0Cuw8RL9Sy0UFLvfmqalor+8HUq4V4c1TXOWlmAZ13gr+lfwqRLSzXs/tgFDjRripWwuz/NZHvYASUKI2LwAIUQCui9/z2Wm21kEdCgxPsAaBsTZii4U4SVci+NFjpsOPIjezvqxKKVDSwVt7SuEyC+khRDdDFsv5xqA/s78R9O0FZqm/YreZto2mASEEE3QaxLTNE37RtO0veguxECvy14gThTudNEI/T7msc3427KE633E2LfHSKddQDqxRdIpkZAJi9B7u/xL6D3DWgkh+qI3aO2F/O6gG4CnhN5jpAd6o58rVDZVgAx03/BwIUSicXNKRNO0M+hV6XspEJEd6DWz2ygsLIvQv1hWCiGGC70XWR8hxF+FEHcYcQ4AXYXeq+hKIcTj6AJS0rk9gIL+cq4TQpT5sAoh+gkh/mact6UQ4gb0tpq9RpT/oD8fnwu9F00bIcQtxhd4nn2DhRBDhBDthRB5HQ0qm/+guylmCiE6CSGGUFBrq8wvedDdeH7gJiFEvCjo0fMs8D9CHzPSReg9p+4QQrxXWmLluMZvo4vCZ0KIAcZz0F8I8YoQop8R5zV0wV4khOhpPAtjjfcJ9OcoUQjRVwgRK4SIvIz8f4T+wbdE6D2YeqB/UZ9CHxd0OeleBBYa7/p16G0ngd3B16B3FFkmhBglhGgr9N5wjxkuYYQQ4YY9KzRNmwNMRW9fDXRRXi5vo3cMmGfc6/7o7Vw/apq2vvRDy+QAMF4I0VUIcQ16h4xgPwqy0O/RVOO962ukE3gtV6O7++YLIXoZwrgAvV1IAzA+BD8AZgkhJhrldTchxP1CiL8Y6XxnpLNQCNHbsHmRkU6phLLG4kB/IT5Gb9Rcij64KrDafz96wb3RiPc+uorXCAy/9iPohfYJ9MbC0vgO3b24xjheQ/9CzQ8zwp3oNZmtwFz067MMvQdHXhvFe+gPw1zjvH3QGw0vZasXvcfRj+jiUlbt4Tz6l//n6D7sD9AfmpeN9M6guxwuovco2YNeoOe5Ll828vY5eptHI+D/lXHOoNE07RS6MPdDF+oZ6A3WoH+pVea5UtHde0+jP4efG+EL0J+Bm9FdnFvQ78WpEhMqoKxrnGrsz0C//weM/a2M82N82Q5Gb0tbh34N/kyBH/wz4BP0Hkrp6G6WCmF87A1H/7j7wTifAxihXcaYHE3TcoCbgCbo128R8G/0npN5cTT0+7wMvUayHz1PN6N3ZsA4xo5eW8FosxkPPCSEuK2i9hWxNRX9GrRAv89fALvR3ZiXyyT0Mvdn9Pv2NQXtc+W1z4/uEr8S3QU+D3iTgHLTuJaj0O/devQ8fIX+fAW+Mw+gX9Nn0T92vkP/MD4akM4d6M/xD0Y6X6K7vktF6MdKJLUHoY/3WQdcbRS8EomkFIQQDdDbA5/TNO2tUJ9PjryX1HiEEA+jV8lPo/uX/w1slqIikZSMUYPzoncSigdeQHeDqVVxfjlXWB1FCPGMKBg9XexX3fYFSSt0V+kB9MGE6wm+95mkjlPa8y6EeKa67atiItHbnvagu7BM6OPzUqvi5NIVVkcRQjRG76VWIoF93CWSukBgL6gSyNQ0LZhuyZLLQAqLRCKRSCoV6QqTSCQSSaUihUUikUgklYoUFolEIpFUKlJYJBKJRFKp1LpxLIqifADcAqSpqtqljLgtgQ/R5yAyA0+rqvpl6K2USCSS+kttrLHMQ1+Pozw8B6iqqnZHn979P6EySiKRSCQ6ta7GoqrqD4qitA4MUxTlSvQp5uPQp4CeqqrqfvSRptFGtIboI7clEolEEkJqY42lJN4HHlNVtQf6BH15NZMXgQmKopxEnzytfOs1SyQSiaTC1HphURQlCn3m208URdmBPitwU2P3OGCeqqot0GdXXaAoSq3Ps0QikdRkap0rrARMwDlVVa8pYd9kjPYYVVU3KYoSDsQSMF23RCKRSCqXWv/1rqrqBeCYoihjARRFEYqidDN2H8dYE1tRlE7o60unl5iQRCKRSCqFWjdXmKIoi9EXPooFUtGng16DPuttU8AKfKyq6kuKonQGZgFR6A35T6mquqo67JZIJJL6Qq0TFolEIpHUbGq9K0wikUgkNYva1ngvq1cSiURSMURVnai2CQunT1dsjGNsbCwZGRmVbE3NRua5fiDzXD+4nDw3a9askq0pHekKq+OYUtOJ/us/iR0+rrpNkUgk9YRaV2ORlA9Tajrmv/+bhA8/Ac2PcHuq2ySJRFJPkMJSxzClphP15mzsS1aA34/weKvbJIlEUs+QwlKHMP92kiZ3/wHz8VNV10onkUgkRagTwqJpGg6Hg9LG5OTm5uL11rGvd02DnFzMmecwZZ5D5OSS88IfsSafpNm0dxB+DeHz5UeP+cMz5EwYjbtvDxBSeiQSSWioE8LicDiw2WxYrdbqNqXqiY6GxIRCQS6Xi1+G96fj/M+IXPApeH0Ir5fwtRuJ/PwbPFe2Imf8aHLG3oLWOKaaDJdIJHWVOtErTNO0+ikql8Bms+Hw+/n0d105vu4THONH4U5qT+q2r8j694toMQ1p+NK/Sew5kpjHniNs8y967UcikUgqgTpRY5EUx2w24/F42Jlyhp6vPp0fnqvcSq5yK5Z9h7AvXEbE0pVELvsKT/u25IwfRc6dt6DFRJeSskQikZROnaixSErGbDbjcrlK3Oft1I7zr/yF1O3fkPXG82j2CBq+8AaJPUYQ8/jzWLfslLUYiURSIeqtsEQsW0Z87940bdGC+N69iVi2rNLSzszMZOLEiQwYMIChQ4cyZcoUzp49WyjOq6++SqtWrZg+fXqhcL/fz9SpU/OPvfvuu0lOTs7ff//99zN06FCGDx/OqFGj2L1792XZqkVGkHv37WR8MZ+0bz4iZ+wthH+9lrg77idu2N1Ezl2COH/xss4hkUjqF/VSWCKWLaPhU09hOXUKoWlYTp2i4VNPVZq4CCF4+OGHWb9+PatXr6ZVq1a8+uqr+ftff/11du7cycaNG/nxxx955513Ch0/duxY1q1bx+rVq7nxxht56qmn8ve9+eabrF69mlWrVvHQQw/x5JNPVorNAN4uHTg/7RlSt3/NudeeRbNaiXnuNRKuvZGYJ/6OdfuvshYjkUjKpM61sUQ//zzWvXtLjRO2bRvC7S4UZsrNJebJJ4n86KNLHufp3JkLL71Upg2NGjWiX79++dvXXnst8+fPB+Cdd97hyJEjLFiwgLCwMD766CMee+wxZs2axdSpUzGZTAwfPjz/2B49ejB79uyC/EUXtH9cuHABk6nyvw00e6Tea2z8aKy79hG5cCkRy78mcskKPJ3b45gwmtzRI9EaRFX6uSUSSe2nzglLuSgiKmWGXwZ+v5/58+fni8UjjzxSaH94eDizZs265PFz585l2LBhhcL+/Oc/s27dOjRNY9GiRZVucyCeqztx/rXnuPC3PxKx/GvsC5cR88w0ov8xg9w7biRnwhg83TqH1AaJRFK7qG0LfWklzW588eJFGjRoUO5E4nv3xnLqVLFwb/PmpP3882UZWJRnnnmGlJQUZs+eHXTtYubMmaxcuZJPPvmEiIiIYvs//fRTPv/8cxYsWFBs37Fjxzh48CBRUVH87ne/q7D9xdA0rDv2ELlwGRGff4Mp14m7a0dyJowhd9QINHtk5Z2rAshZb+sHMs/BYcxuXGWjoutlG8vFp5/GX6Sg9kdEcPHppy9xRMV46aWXOHbsGDNnzgxaVObOncvy5ctZsGBBiaICcOedd7Jx40YyMzMrw9zyIQSe7l04/8bzelvMK39BeL3E/OUVErrfSMO/vIpl9/6qs0cikdQ46qWw5I4ezfnXXsPbvDmaEHibN+f8a6+RO3p0pZ1j2rRp7Nq1iw8++ACbzRbUsQsXLmThwoUsXryYRo0a5Yc7HA5OBdS0Vq1aRUxMTKE4VYkW3YCc+xTSv/2Y9M/n4rzpeiI//YL4G8cTe/M9RC7+DJGTWy22SSSS6qNeusJCzYEDB7j++utp27Yt4eHhALRs2ZI5c+aUeWx2djYdO3akRYsW+Xmy2Wx88cUXpKenM2nSJHJzczGZTMTExPD888/TtWvXYumEzBVWBuLcBSKXriRy4TKsB4/ib2And/RNOCaMwdu5XcjPL10k9QOZ5+CoaleYFJY6SnUJSz6aRtiWHUQuWEbEytUIlxv3tV1xTBiN87ZhaJdw710ussCpH8g8B4dsY5HUDYTA3bs75956mZStX3H+hScQ5y/Q6Im/k9BjJNF/ew3LgSPVbaVEIgkBUlgkIUdrHIPjgfGkr1tKxqfv4xzSD/vCZcRfr9DkjvuJ+HQl5Dqr20yJRFJJSGGRVB1C4O7bg3PvvErqtq84/7fHMWdk0ejx50nsOZLoF97AcvhYdVspkUguEykskmrB37gRjofuIW39MjKWzMQ1oA/2D1XiB91JkzFTiVj+Fbgqf8CqRCIJPfVz5L2k5iAE7v69cffvjSkjk8glK4hctIxGjz5HdOPXyVVuxTF+NL62LavbUolEUk5kjUVSY/DHNib7kftI+/Ezzi5+B/d1PbDP/oiEAaNoojxE+OerwO2pbjMlEkkZyBqLpOZhMuEaeB2ugddhSssg8uMVRH60nMZ/+Cu+Jo3Iues2csaPwtf6iuq2VCKRlEBIhEVRlHDgB8BmnONTVVVfKBLHBswHegBngbtUVU0OhT1FuSV2OPvC9hQL7+RO4ouMVZedfmZmJo8//jjJycnYbDZat27Nv/71L5o0aZIf59VXX+W9997j8ccf54knnsgP9/v9PPjgg+zfvx+bzUZsbCzTpk2jdevWhc4xffp03njjDb777js6dux42TbXVPzxsWT/z/1kP3oftnU/EblwKVHvLaTBfz7ENaAPjoljcA4fCHJpaomkxhAqV5gLuF5V1W7ANcAIRVGuKxJnMpClqupVwL+Bf4XIlmJc6+6BVQsrFGbVwrjW3bNS0g/leiwAv/76K9u3b6d58+aVYm+twGTCNaQfWXPeIHXzF1z480OYjyTT+IGnSOh9Mw2mvYP5RPHBsxKJpOoJSY1FVVUNyDY2rcav6BD/24EXjf8/Bd5WFEUYx1aYl6OfZ5+19PVY3LjxUthX78XLXutuft/kzkse18nTmb9dqN71WFwuF8888wzvvPMOY8eOvaQN5jNnML/yCtE2GxEPP1yp86BVN/6m8WT/aSrZ/3M/tu83Yl+4jKh35hH19ly0YQMJv+tWnEMHgEV6eiWS6iBkb56iKGZgG3AV8I6qqpuLRGkOnABQVdWrKMp5oAkQ8nkawggjzhdPujkNTWgITRDniyOMsLIPDpLKXo/l9ddfZ8yYMbRsWXovqd9aeHnleQ1n00zShj1GUvo0Vngqd0mAasdsxjV0AK6hAzCdSsH+8edELVlB48nr8CXGkXP37eT8/g58zZtWt6USSb0iZMKiqqoPuEZRlBhguaIoXVRVDVygvaR5a4rVVhRFeQB4wEiT2NjYYgfl5hbMoFueGgVAmimVwQl9ceHCho0VGV8T548v17HB8Nxzz2G325k0aVLQx86cOZNDhw7xySefALB161Z27NjBM888E1Q6YU743ZfniB1X/NrVGWJjoVsXtH8+i/e/32KevYioGXOImjEHbcQQfFPGo40YXCdrMRaLpcT3oi4j81yzCflbpqrqOUVR1gIjgEBhOQlcAZxUFMUCNASKLSyiqur7wPvGplbSJGxerzdou+L9CdzpuIuP7AsY47grJKKStx7LvHnzKrwey5IlS/LXY/npp584cuQI112nN1edOXOG8ePHM336dAYNGnTJtMx+ePovDnJOvU7u2LFodnvFM1XDiY2NJaNvd+jbHfPJM0R+tJzIjz/HOmYNvqYJOH5/Bzl3346/WUJ1m1ppyAkZ6weVMAlllRGSxntFUeKMmgqKokQAQ4Giqz+tAO41/r8TWHO57SvB8mj2H+nl7sNj2X+s9LRDsR7Lo48+yvbt29m8eTObN2+madOmLFq0qFRRQYNrtkOkx0LMs8+S0KsX0f/4B+aTJyuatVqDr0VTLj71B1I3f0Hm7P/D06EtDaa/T0KfW2g06Qls3/0IPl91mymR1DlCVWNpCnxotLOYAFVV1S8URXkJ2Kqq6gpgDrBAUZTD6DWVu0NkyyWJ9yew+OzSSk/3wIEDvPXWW7Rt25bbbrsNCG49lqeffpoWLVpw9936JclbjyUYhCHRAtjUH646Y+GuEyN5ZJqDju+/j/2993COHIljyhTcvXqBqLIZtaseqxXnyOtxjrwe8/FTRC5aTuSSFUSsWoe3eSI5vx+l12IS46rbUomkTiDXY6mj7Nj7I0+c+iu9j/i4v4vCu7ccYWXECvz4GXZuIH+YF8MNL32P+dx53FdfjWPKFHJvvRXCKr8DQ1VSbneB20P4qnXYFy7Dtn4zmtmMc/hAciaMxjXwOgjSdVmdSLdQ/aA2rccihaWOcvjYYV46/ALj7OMZ2e8mAFJNKSywz2OxfQHnTOfo4kziwW+TGPeXbdj3HcEXH4/jnnvImTgRfy1pJCxKRV4+87ETelvMkhWYz2bhbdmcnN/fQc5dt+GPr/nXQRay9QMpLKFDCks5KW0FyVyRy2cRS5lnn81h6yHifPHct3sgD75whuafb0Cz2ci94w6yJ0/Gm5RUTTmoGJdV4LjchH+9FvvCpdg2bkWzmHHeOBjHhDG4+/eqsbUYWcjWD6SwhA4pLOWkPEsTa2ist61jrn02P4R/j00LZ3TKUB55y0T3N7/FlJuLq18/HFOm4Bw6FMzmKs5F8FRWgWM+8hv2RcuIXPJfTOfO423dgpzxo8lRbsUf27gSLK08ZCFbP5DCEjqksJSTYNe8P2Q5yIf2OSyP+BSnycmA7H48tKwltzyzjrBTZ/C2aoXj/vvJuesutBp8rSu9wHG6iPhqDZELl2H7aTua1YJz5PU4JozG3a9njej0IAvZ+oEUltAhhaWcBCsseWSJTBbbF7LAPo80cypXeq5kyqY+3Pvsfhr9uB1/VBQ5d92F4/778RWZGLMmEMoCx3LoGJELlxH56ReYzl3A27YVjvGjyFVuwd+4UdkJhAhZyNYPpLCEDiks5aSiwpKHGzdfRXzBB/ZZ7A7bRYw/ht8fG8bD07K5ct634PPhHDZM767cr1+N+HKHKipwcp1ErPyOyIVLsW3ZiRZmJffmG8iZMAZ3n+5Vfi1kIVs/qE3CUjNbI6uI1HQTY+5tQlp65V6GzMxMJk6cyIABAxg6dChTpkzh7NmzheK8+uqrtGrViunTpxcK9/v9TJ06Nf/Yu+++m+Tk5Pz9ffr0YeDAgQwbNoxhw4axdu3aSrU9jzDCuD13NJ9lfMmSjOVc5+rHu22X0v39b7k79QbW/59C2NatxCoKccOGEfHxx+B0hsSWGkdEOLl33szZzz4g7bslOCaMJvy7H4kdM5W4IWOxz/oIkXW+uq2USKqNei0sb86MYvO2MN58N6pS0w31tPnvv/8+3377Ld9++y2DBw/6tyksAAAgAElEQVSuVNuL5QVBT3dv3smaxZq0DdzruJ81MRu58cklDDzZmgUr7sFr1mj05JMk9O5Ng9dew5SaGlKbahLejldx4eWnSN3+NVnTX0BrEEXDF98gsedIYv7necK27IDa5RWQSC6bOucKe/6f0ew9UPaiT243bN8VhqYJTEKjezc3YWUc1rmDh5f+eiFoo1euXMn8+fNZsmQJ77zzDrt372bGjBmEhYXhdDp57LHH6N27N1OnTi127K5du3j44YfZsGEDoNdYPvzwwzIX97pcV1hpZItsPo1cwof2ORy3/EZzbwsm7R7M5JdOkfDZWrBYyL31VhxTpuDp1q1Sz10WNcFFYtlzEPuiZUQs/RJTtgNPxyv1HmVjbkZrWPku25qQ56pG5jk4ZBtL6VSasBxNNpOWYUbTBEJoxMf6aNu69HmjKiIsfr+fcePGMXz4cCZPnhzUsQB/+tOfaNiwIS+++CKgC0teXnv16sXTTz9Nw4YNix0XSmHJw4ePNeGrmWufxWbbJux+O2NTRvLw24Iu/+9LTA4Hrl699O7KI0ZUyczCNanAETm5RHz+DZELlxG2Yw/+cBvO24bjmDAGz7VdKq0tpibluaqQeQ4OKSylUymN96npJvremIDLVXCdw20am75JJT7OXymG5vHMM8+QkpLC7Nmzg57heObMmaxcuZJPPvkkf4bjU6dO0bx5c1wuFy+88AIOh4O33nqr2LGHDx/jhVcOM16xc9PIfsX2VzZ7LLuZGzWLLyI+x4uXG7KH8NCyKxj+wvdYfzuOt3lzHJMmkTNuHFpMTMjsqKkFjvXXfUQuWEbEZ19jcuTg6dQOx4TR5I4eiRZ9ebWYmprnUCLzHByy8b4KeHNmFFoR/fD7qfS2lrxp82fOnFnhafMXLFiQLypA/nLENpuNe++9ly1btpR4/IWLJk6cEqxaE17xDARBkrcLr5+bwfrUn3kk+3G2R+7gzns/5LqDDXj3h8nkXNmChv/4Bwk9e9Lwr3/FcvhwldhVU/B07cT5154ldfvXnJv2DJrZRMyz/yLh2hE0/PNLWHfskW0xkjpDvRSWbTvDcHsKi7fbI9i6o/ImYAzFtPk5OTlcuKC74jRN4/PPPyfpElOuOHIEmgYbt4SxdkMYTlfF8xIMcf54/nTxf1mf+jP/PPc6XuHjTwPmkLTmKM8evocTE4YT+fHHxA8aROMJE7CtXVuvClQtyk7OxDFkfL2I9JXzyR01gojPviHu5nuIHTGeyAVLEdmO6jZTIrks6qUrLNQcOHCA66+/nrZt2xIertcYgpk2v2PHjrRo0SI/T3nT5v/2229MnToVv9+Pz+ejXbt2vPzyyyQkFF+0av2Px3nm7wfJcTUl5ewwzGaNdm29dO7oIamjhy4dPXTu6KFxTGjvv4bGxrD1fBA1i7XhawjTbNyRNZKH5zak12tfYU5Lw9OuHY777yf3zjvRIiMv63y10UUiLmYTsewr7AuXYd17EL89ktw7RpAzcTSerp3KPL425vlykXkODtnGUjq1QlhqAj/8eJxnDWE5e2Eo99zlIPmEhT37raSkFsz51SzRS1JHL0l5gtPJwxXNfSEZ43fUfJgPoz5gaYRKrimXfrn9ePDbJG772ybCd+3GHxODY/x4HPfei99w+QVLrS5wNA3r9t3YFy4lfMUqTE4X7m6dyZkwmtzbb0Szlyy6tTrPFUTmOTiksJSOFJZyEigsmReGMm6Mg1f/prvRzmaa2HPAwt79Vnbvs7Jnv5XDxyz4/fpz1yDKny80eb/2V3orbamWcyKLJfaPmG+fS4r5DK29bZi8Zyj3vvQbsZ+tBiFw3nQT2VOm4OnRI6jeU3WlwBHnLxK5dCWRC5dhPXAEf5Sd3NEjcUwYgzepfaG4sbGxZO7ZR9SbswnbtouMVYuryeqqo67c52CQwhI6pLCUk0BhSTk7jKSOblYtvfRDmeuEA4d0kdm9z8qeA1b2HrCQm6s3w1ktGu2u9BYTnIbRFX9+PHj4OvxL5kbNYmfYL0T7GzIu9VYeekuj/Tv/xXThAu5rrtEXIbv55nItQlbnChxNI2zrTiIXLiPii9UIpwt39y44Jo7BedswxIVs4t5diOnDT0DzI9weTp/aVt1Wh5w6d5/LgRSW0CGFpZxUxjgWnw+ST5jZs9/KHqNms2e/lbSMAlfaFc0DxUb/v3nT4F1p261bmRs1m2/CvwRgRPZwHl7akkEvf4vl6FF8iYkFi5A1vvS09XW5wBFZ5/VazIKlWA8no1kt4NfAJBAeb348KSx1EyksoUMKSzkJ5QDJtHQTew8U1Gz27LdwNNmCpunPbUy0P7+TQN6vXVsv1rLHrXLafIr59rl8HLmIi6YLXOPqzgM/9+POF3fRYM16tPBwckaPxjF5Mt4SZh+oFwWOphE7fBzWvYdKLClSdq3G36T6ZluuCurFfS6CFJbQIYWlnFTFyPtAcnIE+w5Z8ms1e/ZZ2XfQitMYhBpm1ejQrnDNpnMHDw2iSn7+HMLBsohPmBc1m2TLMRJ9TZmUfCuTX8ui6fz/YnI6cfXvT/bkybiGDs1f3bG+FDimtAyi3pyN/ePPwetD+ApmjdCEwHN1J1yD++Ia0g939y5VMutBVVJf7nMgUlhChxSWclLVwlISXi8c+y1PbCzsNtpvMrMKXGmtr/AWq900TfDnu9L8+Flr+44PomaxybaBCH8EY7Ju4w/zGnL1G//FfOYM3tatcUyeTI6i0KR163pV4JjSMow2FhX8ehvLhf99GNvajYRt+xXh9+OPjsI1oA+uIf1wDuqLv1nx7um1DSkswSGFpXSksJSTmiAsJaFp+pQ6eTWbvF5pyccLvqgbN/IV6gKd1NHDla29HArfw7yoOayIWI5buBmScz0Pru7CiL+vx7b9F/wNGqBNmkTGuHH4WrasxlxWLbGxsWTu3a/3Ctu6M79XmDh3AduPP2Nbu5Hw7zdhTkkDwNPxSlyD++Ec3A9372vAVnkDg6sKKSzBIYWldEIiLKbU9ErtqpmZmcnjjz9OcnIyNpuN1q1b869//YsmTZrkx3n11Vd57733ePzxx3niiSfyw/1+Pw8++CD79+/HZrMRGxvLtGnTaG2s1uh0OnnxxRdZv3494eHh9OjRg9dee62YDTVVWC5FtkOwz2iv2bPfyu79Vg4csuJy6+9CuE2jo+FKa9Upi9+uWc6aHm+Q1fA3Ong6MWXvMH7/yjEaLf8K/H6cw4fri5Bdd12NWYQsVJSrwNE0LAeO5ItM2M+/INwe/BHhuPv1xDmkH67B/fC1uaJqjL5MpLAER50QFkVRrgDmA4mAH3hfVdUZReIMBj4HjhlBy1RVfamMpCtVWPIExb7kv5XaVTMrK4t9+/bRr58++ePLL7/MuXPneOONNwB9PZYtW7Ywffp0HnvsMW644QYeeeQRQBeW1atXM3ToUEwmE3PnzuWrr75CVVUA/va3v2EymXjxxRcRQpCenk5cXFwxG2qbsJSExwNHki3FajfnzuvtKUJoxLXOwn31ZrKv/oHopKPc17odD88/Q5uZyzFnZeFJSiJ78mRyb78dwqtm3rSqpiIFjsjJJWzjVmxrNxH+/UYsyScA8LZuYdRm+uLu1/OSgzKrGykswVFXhKUp0FRV1e2KojQAtgF3qKq6NyDOYODPqqreEkTSZQpL9POvY917sPRU3B7MJ09jTtNXdRQB18DVt8clD/N0bs+Fl/4chLk6lbUei8PhoGfPnmzduhW73V7qOeuCsJSEpsHplAJXWt7v+MkCV5qITyGx0xmut2QyYutKep74kraNsnDdOwHHxIn44+OrMQeVT2UUsuZjJ7Ct00UmbMMWTLlOtDAr7t7dcQ7pi2twP7wdrqwxtT8pLMFR1cISkq4iqqqeAc4Y/19UFGUf0BzYW+qBVYT14FHExewqucp+v5/58+czfPhwgPyaSR7h4eHMmjXrksfPnTuXYcOGAZCcnEyjRo2YPn06GzduxG6389RTT9G7d+/QZaCGIQQ0b+qneVMXw4cUzKx5/oJg30ErW094WLrrMEf2RbNo/wAWeW6A6OlEarlc/f4Our23maSu0G58N9qNbEVERK1yBYcMX5sryGlzBTn3KeByE/bzDsLXbsS2diMNX54BL8/Alxivi8ygvrgG9EGLia5usyU1lJC3sSiK0hr4AeiiquqFgPDBwFLgJHAavfayp4TjHwAeAFBVtYfb7S52jhMnThSaWr4s8rtqLlmR35Mmj8oeXFaZ67Hs2rWLkSNH8vbbbzNq1Ci2b9/Offfdx4YNG4q5An/77TcOHjxIfHw8N9xwQ2VmqUZjsVjwer2c4xwfeOcx88gazuxOoMGuQTTeNoj0nYmcd+u1PRM+2jfNplsfO1d3hW5dNLolacQX9yzWaPLyHDJOnsG0+gfEqnWYvluPOHcBzWRC69Md//BBaMMHo13bNb/Ld1UQ8jzXQC4nz2H6rBW12xWWh6IoUcA64BVVVZcV2RcN+FVVzVYU5SZghqqq7cpIsnLbWEoQmMoUlpdeeol9+/Yxb968oKfOnzt3LosXL2bJkiX5U+dnZmbSvXt3kpOTEYZLYvDgwcyYMYNuRZYArquusLIo6i7w4mVV+FfMjZrN9rCt2H0NuGn3A/T4T0fOfnaRXY42/BLWk+Nai/xjEuN9xbpAt77CV5XlZlBUqVvI68X6yx6jNrMJ6869CE3D1zgG16DrcA3uh2vQdfjjmpSd1mUgXWHBUSfaWAAURbECXwDfqKo6vRzxk4GeqqqWduVC0yvMEJjArpqXy7Rp09i6dWuxhbrKw8KFC5k7dy6qqhbqSQYwbtw4HnroIQYNGsSRI0e4/fbb2bBhQ7HliaWwFGen9Rc+sM/iq4gv0NAYnnsjD313NYNeXUvO5oP8EtmHrd3vYVvc9ew+1ZhDRy14vfq7aI/006m9ly6dCsSmQzsP4cF9L4SE6ixkTWezsP3wE7bvN2FbtwlzRiYA7q4ddZEZ3Bd3j66Ua9qFIJDCEhx1QlgURRHAh0Cmqqp/vEScRCBVVVVNUZTewKdAK1VVSzOoVoxjCdV6LKC7uJ588kmysrKwWCz85S9/4frrry+WjhSWS3PGdJqF9g9ZbF/IedM5urq7MWX/CO7652Gil/0X4XbjvOEGzt77ALsTBxvT1hT8sh161cVs1riqjZekToVrN6Fe46YoNaaQ9fux7jmAbe0mbGs3EbZlJ8Lnw9/Ajqt/b11ohvTF17zpZZ+qxuS5CpHCoij9gfXAr+jdjQGeAVoCqKr6rqIojwIPA14gF3hCVdWNZSRdK4SlJiCFpWxyRA6fRXzKXPtsjlqPkOBL5J60sdw/08cVM1XMGRl4OnTAMXkyuaNHo0VE4PfD8ZPmQkKzu8gaN00TfXQp4kpr2SI0a9wEm+eqRFy4iG3DFr028/0GLKdTAfC0a5MvMq4+11KRal9NzXMoqffCEkKksJQTKSzlx4+fH2xrmWufxY/hPxDuD2dU9igeWt6K7v/3X6x79uiLkE2YoC9Cpr+khQhc4yZPcA4dLbzGTecOnkKutHZXeitl0HutKGQ1DcvhZGzf6z3NbD9tR7jc+MNtuPv1xDW4L87B/fC1bVmuLs21Is+VjBSW0CGFpZxIYakYByz7mWefw2eRS3ELFwOdg5m6tT83vbqViG9WgclE7s0345gyBc+115aaVuAaN3m/vQcs5Bhr3Fgs+nLRgWLTuYOHmIbBvZO1sZAVubmEbdqui8zaTViP/AaA94pm+ZNnun7XCy2q5PFatTHPl4sUltAhhaWcHD16lEOHDklhqSBnTWdZHLmAhfYPSTen0c7TnkknRjFhejpN5n+C6eJF3N27kz11Ks6bbip347TfD8eOF7jS8mo4qekFrrQWzbz5y0SXtcZNarqJ/3k6nrempREf5y8eoZZgPn7KaJvZiO3HLZgcOWhWC+6e3fTJMwf3w9u5XX5tRgpLcEhhKZ0ShSU7Oxur1Rp0l966itPp5MCBA6SkpEhhuUxcuPgy4r/Mtc9iT9huGvka8ftzdzFlYTRXvaliSU7WFyG77z4c48ejlbIIWWmkZwTOJmBhzwErR44VrHHTMFp3pRUIjr7GzfP/jGbhJ3YmKgVLT9d63B7Ctu7Mn9csbyYNX0IsrkG6yyzqjpFkaL4yEqpbSGEJHSUKi6ZpHDt2DE3TLjkI0WQy4ffX3i+68qJpGhcvXuTo0aPYbDbi4+Pp2bNndZtVZYTqS1ZDY0vYZj6wz2J1+DeYMXNzzq08uKYb/f7vO2zr1+MPDyd3zBh9EbIOHS77nMXWuNlvZd9BC05nwXLRXh9omsBs1vjfRy/Q4SovTRP9NEv00biRv6bMwHJZmFLS9elm1m7C9sNPmIwBmp5rkvLbZjzXdAazuezEajFSWEJHicIC+lf6V199hcvlwlzCAxYREUFubm6o7asx+Hw+GjduzMCBA/O7PNcHqsJFctz8Gx/a5/BJ5Mc4TA56ufow+eBIRr12gAafLkc4nTgHDsQxZQquIUMqdUS6zwdHjYk5//OBnb0HrEatRqNouRFu00hM8NEs0UfTRP1v4K9pgo+YhlrtEh+fD+vOvTT6eSf+laux/rIboWn4YxriGtgH5+C+uAb3xZ9Qy6ZPKAdSWELHJYUFwOVycfz4cZxOZ7F9DRs25Pz586G0rUYRHh5O9+7dyc7Orm5TqpSq9L1fFBf4JPJjPrR/wEnLCVp4r+C+9Lu5710Pie9/jDklBW/btvrsymPHopUxcWgwpKab6HtjAi5XQVlhs2nMnnEWp9PEmVQzp1OM3xkzp1NMpKab8fkKly0REX5DaPz5YpMvPk317egGNa+MyLvPIvMctvU/589rljexrKdz+/zJM909u0FY5Q7QrA6ksISOUoWlNGRjX/2gOvLsw8fq8G+Ya5/NFttmovxR3Jk9lgdXtKbz9M8I++UX/NHR5Iwbh2PSJHxXXP6aJ399KZqPl9lxewrKijCrxrgxl25r8fkgLcNUSHACBehMipnUdFN+u04eUXZ/4ZpO/v8F4ZGRNWBQqKZh2XtIF5nvNxK2ZQfC68Nvj8TVv1f+TAC+ls2r1NbKQgpL6JDCEgQyz1XPr9ZdzLPP4ouIFfjwMdR5Iw9sH8j1r20iYuWXoGk4R4zQFyHr3bvC09APHxPLnv3FB8EkdXSzamnF8+/xQFp6ntjoIpQvPmf0v+lni7uaG0b7i7nc8ms/Rs0nohI9suW5zyLbQdjGrYR/rwuN5YRednjbtiqozfS9Fi3IKZeqCyksoUMKSxDIPFcfqaYUFtjnsdi+gHOmcyS5u3D/yTu56/+l0ujDxZjOncPdtas+qv+22+AyejRWdZ5dbkhNM5dQ8ymoDWVmFRefxo18NE0oXPvJE51miT4SE3zlHjAadJ41DfPR4/mTZ9o2bkU4XWi2MFzXXWvMBNAP71Wta8yaM0WRwhI6pLAEgcxz9ZMrcvksYinz7LM5bD1EnC+eiefGM2mRnVZvL8F66BC+uDgc995LzsSJ+GNjgz5HTcsz6INDUwLbefLcbamGEKWYOXeheKeGuCaX6mzgp2mij4Q4H1ZrJeQ514nt51/06WbWbsR6SF/I1ts8sWC6mf690RpEVfwclYwUltAhhSUIZJ5rDhoa623rmGufzQ/h32PTwrk9ZxQPrLuGHm98Q/iaNWhhYeTecQfZkyfj7dKl3GnX1DyXRU6O4HSqKb99J8/VFtjuczG7sPiYTBrxsX5athDENnEVqvnkud8S4vxB9zw2nzyj12TWbcK2fjOmiw40i1kfoDmoL64hffEkdajSNWeKIoUldEhhCQKZ55rJIctBPrTPYXnEpzhNTvo7BzL50M3cPH0P9iWfYMrNxdW3L44pU3AOG1bm+IzakOeKcjFbFAhPwC/9bDjHT/o4fcacP0VOHmazRkKc0bmgaZGebkZtKK6J/9Ia4fEQtn13/rxmYb/uB8AX2zhfZFwDr8PfpFGIc18YKSyhQwpLEMg812yyRCYf2xexwD6PVHMKbT1Xcl/G7xk/2038ewuxnDqFt2VLHJMmkXP33WjRJS8FXJvyXFnk5VnT9GWpS+pkkBd2JsWM01W4TLVaCsb4FO5oUNAGlDfA1JR+Ftu6n/LnNTNnnUcTAk+3zgUDNLsngSUkK70Xy3NFkMJSOlJYgkDmuXbgxs3XESv5wD6LX8N20tAfw90XxzFlZWvazViK7eef8dvt5Nx9t95duU2bQsfXxjxfLsHkWdMg65xJ71xwxszp1MI1oDzxCey6DWAL02iaYLT3GK62ZnFuuuT+Ssej62m2YwMRu35F+P34GzbANaAPziH9cA3qi79pfLXmuShSWEpHCksQyDzXLjQ0toVtZa79fVaFf41AMDL3Fh7YOYTfvbGeiBUrwOvFNXQo2ZMnY05Pp8G0aZhPn8bXrBkXn36a3NGjqzsbVUJl32e/X1/6oGgng8CebilpxQeYNrVlMipiPSO939M3cx1NclIAyLqiHRf69UOMuA7LoGuojPURpLCEDiksQSDzXHs5aT7BfPsHLIlcTLbpIte6e3L/qbGMfvsU0R8uwnz2LJoQiID31x8RwfnXXqsX4lItA2F9kH7WVMjdVniGAxNxKYcY7l7LCO/3DPBuxoYbh4hgc3Q/djUfSHJSfyztWxjutwK3m91eejl8ubNYS2EpHSksQSDzXPvJFtl8GrmED+1zOG75jWbe5txzfiL/TXmNPUnFC5ird5lYuX8xns6dKzzTcm2gpt5nr1cXgdMpZtKSnUT8tI1mOzforjOHvubMIVMbvrYM4WvrENZa+pEjIgsGmCYU72rdNNHHu3PtfLys4rNYS2EpHSksQSDzXHfw4WNN+Grm2mex2bYJixv8JvAHtBeHOeG+OTDjUX3b26wZni5d8CYl4TF+viuuqLEDAIOhNt5n87ET2NZtIuy7jdg2bsHsdOKzWDnWshdbEwbyfeQgNuV04nSqhbOZJfcEDLdpbPomNehaixSW0pHCEgQyz3WTPZbdzP3pDpbfmluoqDB74YG5ETS9ViHx0AUSd6fTfMsJmm45TkyWhgD80dG6yHTunC823vbtIawS1kiuQmr9fXa5Cft5R/7kmdb9RwDwJcbjHNKX7N/1I7l9X044GvHmu1H8tMWGzy9oaUrhg8T/Y4DYSsaqxeU+nRSW0pHCEgQyz3WXiGXL+N+Gf+TjcT58FsAPEbngihD4TcXfaZvPSmy2nfgMMwmnPCQecZBwykd8KsSfNRNrvYJGjTrSqOk1RFzVA2/nJLSGDas+Y+Wkrt1n0+lUwtdt0sfOrN+M6UI2mtmMo2tXXj80nE1aN273fsMkt4rATzhuTp/aVu70pbCUjhSWIJB5rttc+HoufX//HM5wCM+Fnxa/QuSIiWSZsjhrSifDlE6GOYN0UxoZ5gx925ROhln/e9Z0Fr8o7lKxOdEFJyuMOGcMTUQCTcJb0SSmI42jriJWiyPWF0ucP54orQGi6sqrfOr0ffZ6CftlN7a1m8icv4GWmfsQgBcTFgruV00WlqBH9CiKIlRVrVVqJJHURaJHTGKM9yCLtYWM8U2kwYj7AIj1xxLrj6UDnUo93oevkAilm9M5m3OMzPMHOetMJoM0TkWf45dGaaTH/4rf/EWxNGw+K020eOL88cT6Y2niiyPOH0esL45Yvy4+sb5YYv1x1SZCtQ6LBXeva3D3uoa0ObtogcCMVkhUajoVGSo6HfhTaREURbkCmA8kAn7gfVVVZxSJI4AZwE1ADnCfqqrbK2CPRFJveTT7jxyLPMpj2X8M+lgz5uIiJICYwvFEtgOxZTfZR7eQlbKTs+cOkOk6TlpjD6mJHtKanSGlzXlSmx1jV6yXs1E5JdaEwjQbcb44Yv266MT64o2/hhj546QIFeGKH14m983Z2JesAL+GcLur26RyERJXmKIoTYGmqqpuVxSlAbANuENV1b0BcW4CHkMXlj7ADFVV+5SRtHSFBYHMc/2gWvLs9WI5ehTr7t1Y9+zBumcPlt27MWdl4TPB2SZwukczTvdoQUqXOM5cFUVqizDSo3M4a8og3ZxGhimDzEu547TwfJGJ88XRxBCjOEOMrmxwFdbMMEOEouq8CJnSMoh7dyGmD1Xw+xFuT412hZUpLIqizFFVdbLxvwBmqao6JZiTKIryOfC2qqrfBoS9B6xVVXWxsX0AGKyq6plSkpLCEgQyz/WDGpNnTcOUkqILTZ7g7N2LJTk5P4qvSRO9J5rRI82Z1JH0do3IsGYWtAeZMvLbgQrahMoWoTwXXBN/cXdck/yaUO0VodjYWDL37ifqzdmEbd1Zo3uFlccV1jbvH1VVNUVRrgzmBIqitAa6A5uL7GoOnAjYPmmEFRIWRVEeAB4wzk9sBdarALBYLBU+trYi81w/qFF5jouDrl3zN/2A+8IFxK+/InbsQOzaRdjOndjmzMl36yRGRKB16YJ29dVo3bqhdRuK1rUr2O16IhrgA5/Px1kySBNpZJjTSfGfIY1U0kQaaSKVNEsqKZYz7BQ7yCAdTRT/aI7QIogjnngtgfj8vwnEa/HEa4lGWDzxJBJFzRIhi8VC484d4f3XAaghd7xEyiMsGYqiTAE2An2Bs+VNXFGUKGAp8EdVVYsOFy3pjhV7ElRVfR94P29/Rb/MasxXXRUi81w/qBV57tBB/911l77t8WA5fLiQK826dCnmOXMA0ITA27ZtocGdnqQkTPHxJNKULrFdS82z3jEhs0iPON0Fd9b4/4j5CJtNP5FpOluiCIX7w3VXnD+OJkbtJ881F5vfJqT/tWv2kItQJcwVVmWUR1juRa8xPAIcAO4pT8KKoljRRWWRqqrLSohyErgiYLsFUDE/l0QiqV1YrXg7dcLbqRO5Y8fqYZqG+fRpLIGutF9+0SffNPDFx+NJSsLcsyfhbdroswm0aVNszRq9Y4Je6OMt3RQvXrJMmfndszNM6aQbLpvAajwAABFkSURBVLizRm+5E5bj/GLaVqoIBfaMK9ojLjbg/2BE6JbY4ewL21MQYOhDJ3cSX2SsKlca1UF5hMUFpAA+YCa6W2tLaQcYbTFzgH2qqk6/RLQVwKOKonyM3nh/voz2FYlEUpcRAl/z5viaN8c1fHhB8LlzWPftK1S7Mf373zT26orhj4zE26lToZqNp2NHiIgo12ktWIjz612mgxWhYm1ChghtN20ly5RZsjvOH1G8Z5y/YGxQk4D/r3X34LD1EB5R0BvMqoVxrbtnufJWXZRHWBYC64Bxqqq+pSjKP4GhZRzzO2Ai8KuiKDuMsGeAlgCqqr4LfIneI+wwenfjScGbL5FI6jpaTAzuvn1x9+2bHxbboAHnNm0qcKPt2UPE8uXY58/XjzGZ8F51VcG0NcZff5Mml2VLRUUoPb8zQgYZ5gIx+s2SzDbTlkuKULg/HA+FuxibMVWoe3lVUh5hiVNV9V1FUZTyJqqq6o+U0QPBGGT5SHnTlEgkknxsNrxduuDt0oXcvDBNw3ziRKFeaWGbNxO5fHn+Yb7ExMI1my5d8LVsGZK17ANFqFM5RSiwTSjdcMV9Z/uW3yzJaELDqoUxxnGXLmw1mPIIS5qiKHcBEYqijKJIry2JRCKpEQiBr2VLfC1b4hw5siA4MxPr3r2FukDb1q5F+HwA+KOi8ifl9Bpi42nfHmy2KjO9tJrQFNNDDE7oiwtXraitQPmE5X5gCrAdvYF9akgtkkgkkkpEa9wYd//+uPv3Lwh0OrEePJgvNpY9e4hUVUwOh36MxYK3XbtCs0B7kpLQGjWqcvvj/Qnc6biLxfaFtaK2AuUTlo6qqr6tKEo8cB/QGtgfSqMkEokkpISH47n6ajxXX10Q5vdj/u23Qq4024YNRC5dmh/F27x54ZpNUhK+Fi1CvsbN5UzdUx2UR1jeAG4AXkJvxJ+LPp5FIpFI6g4mE742bfC1aYPzllsKgjMy8l1pFqOjQPjq1Qi/PgtAoTVuDLHxtmtXqWvcxPsT+Mr7HRn+Gj5eyaA8whKpKIoNsKmqulhRlIdCbZREIpHUFPyxsbgGDsQ1cGB+mMjNxbJ/f6HaTeRHH2HK1bsSaFYr3vbtC3US8HTujBYdXV3ZqFLK2934c+AFRVHCgWOhNUkikUhqNlpEBJ7u3fF0714Q6PNhPnasUBdo25o1RKpqfhRvy5aFe6UlJeFv1qxOLBcdiFzoqw4j81w/kHmu2ZjS0grExqjdmI8dQxhlrz8mpnDNJikJ75VXgtUK6KuFNpg2DfPp0/iaNePi00+TO3p0UDbUxEkoJRKJRFJB/PHxuOLjcQ0Zkh8mHA4s+/YVqt3Y589HOJ0AaDYbng4d8Nvt2LZuRXg8AFhOnaLhU08BBC0uVcklhUVRlEaqqmZVpTESiURSH9Dsdjw9e+LpGTA1S94aN0V6peV1EsjDlJtLg2nTaqewAP9UFKURcAhYBWxUVbWM8aMSiUQiqRAWC9727fH+//buP8iu8q7j+DtmocUWoe12LEuoxRLUDgw/RKjQSSmFGjBD2hS/AkULddxBQauUwciUiYCOtIzaOIXSbUBgSkm/jVRSJ1P8gSmMhUqhVQq0isgMm6VDQwQCRjBh/eOc1WW7v9h9zj17975fMzt7z73POff7DCGfPOc55zmHHsquD3wAgAOWLZu06dI5Tgl0ypTBkpnnA0TEcuAUYLBeXPIbwG2ZOdyZEiWpN+0ZGKBv27ZJ31/IZpxjycx/oxq1XBsRS6lWIj6Aatl7SVJDdq5dy36XXPJ/lzEDvLzPPuxcu7bFqmb2qibvM3MP1QO/JEkNG5tHme9VYZ3mVWGStIDtWrOGXWvWdNcl1jM1iIiV9e9DIuKaiFgx0z6SpN41m4cQXFz/vpTqLvw/aa4cSVK3m02w7BsRbwX2ZOY9wAsN1yRJ6mKzCZY/Bv4QuLpeK+zeZkuSJHWz2UzeHwyszcyxO3IW9nVukqRWzSZYHgM+GRH7AV8BNmXmjmbLkiR1qxlPhWXm7Zl5DvBh4GTg8Yj4UkS8a4ZdJUk9aMYRS0ScCpwJvIFqxDJItfzy7YCXHkuSXmE2p8IOBy7NzFcsWBMRvz7VDhFxA7AKeCozD5vk8xOpgmnsoWG3ZeYVsy1akrRwzSZYvghcFhGvpzoddl5mbsjM702zz43Ap4Gbp2lzd2aumuZzSVIXms3lxtcDnwIG6rXCzppph8y8C3CCX5J60GxGLEsz87sRMbY9mzCajZ+PiH8GRoCLM/OhyRpFxCDVvA6ZSX9//5y+rK+vb877div73Bvsc2/opj7PJljujIjrgIGIWA/8bYHvfQD4icx8PiJOA/4KWD5Zw8wcAobqzdG5LsLWTQu4lWKfe4N97g3z6fNAh5/fMpvLja+kmi+5jOq02LXz/dLMfC4zn69fbwH2iojuiGJJ0rSmDJb6XpW9ATLzO5n5JeBFqscUz0tEvKV+GiURcWxdx9PzPa4kqX3TnQrbCGyJiDMy85mI+AXgSuBXZzpoRNwKnAj0R8QwsA7YCyAzrwPOAH4jInYDu4AzM3N0Xj2RJC0IS0ZHp/77PCKOB64G7gCOpwqAZzpU22RGR0ZGZm41Cc/J9gb73Bvs86tTz7EsKVrQNKY7FXYlsBLYBlwEfAu4KCK8kVGSNKXpToX9Xf3774FrOlCLJGkRmDJYMvNrnSxEkrQ4lLrZUZIkwGCRJBVmsEiSijJYJElFGSySpKIMFklSUQaLJKkog0WSVJTBIkkqymCRJBVlsEiSijJYJElFGSySpKIMFklSUQaLJKkog0WSVJTBIkkqymCRJBVlsEiSiprymffzERE3AKuApzLzsEk+XwKsB04D/gs4NzMfaKIWSVJnNTViuRFYOc3npwLL659B4DMN1SFJ6rBGgiUz7wJ2TNNkNXBzZo5m5r3A/hFxQBO1SJI6q5FTYbNwIPDEuO3h+r0nJzaMiEGqUQ2ZSX9//5y+sK+vb877div73Bvsc2/opj63FSxLJnlvdLKGmTkEDI212b59+5y+sL+/n7nu263sc2+wz71hPn0eGBgoXM302roqbBg4aNz2MmCkpVokSQW1NWLZDFwYERuB44BnM/OHToNJkrpPU5cb3wqcCPRHxDCwDtgLIDOvA7ZQXWr8KNXlxuc1UYckqfMaCZbMPGuGz0eBC5r4bklSu7zzXpJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkogwWSVJRBoskqSiDRZJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkogwWSVJRBoskqSiDRZJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkovqaOnBErATWA0uBDZl51YTPzwWuBrbVb306Mzc0VY8kqTMaCZaIWApcA5wCDAP3RcTmzHx4QtMvZuaFTdQgSWpHU6fCjgUezczHMvMlYCOwuqHvkiQtIE2dCjsQeGLc9jBw3CTtPhgRK4B/BX43M5+YpI0kqYs0FSxLJnlvdML2V4BbM/PFiDgfuAk4aeJOETEIDAJkJv39/XMqqK+vb877div73Bvsc2/opj43FSzDwEHjtpcBI+MbZObT4zY/B3xisgNl5hAwVG+Obt++fU4F9ff3M9d9u5V97g32uTfMp88DAwOFq5leU3Ms9wHLI+LgiNgbOBPYPL5BRBwwbvN04JGGapEkdVAjI5bM3B0RFwJ3UF1ufENmPhQRVwDfzMzNwG9HxOnAbmAHcG4TtUiSOmvJ6OjEqY8FbXRkZGTmVpNw6Nwb7HNvsM+vTn0qbLK570Z4570kqSiDRZJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkogwWSVJRBoskqSiDRZJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkogwWSVJRBoskqSiDRZJUlMEiSSrKYJEkFWWwSJKKMlgkSUUZLJKkovqaOnBErATWA0uBDZl51YTPXwPcDPws8DTwy5n5eFP1SJI6o5ERS0QsBa4BTgXeAZwVEe+Y0OzXgP/MzEOAPwM+0UQtkqTOaupU2LHAo5n5WGa+BGwEVk9osxq4qX69CXhvRCxpqB5JUoc0dSrsQOCJcdvDwHFTtcnM3RHxLPAmYPv4RhExCAzW7RgYGJhzUfPZt1vZ595gn3tDt/S5qRHLZCOP0Tm0ITOHMvOYzDym3mdOPxFx/3z278Yf+9wbP/a5N34K9LljmgqWYeCgcdvLgJGp2kREH7AfsKOheiRJHdLUqbD7gOURcTCwDTgTOHtCm83Ah4F7gDOAOzPzh0YskqTu0siIJTN3AxcCdwCPVG/lQxFxRUScXje7HnhTRDwKXASsbaKWcYYaPv5CZJ97g33uDV3T5yWjow4SJEnleOe9JKkog0WSVFRjS7osFBFxA7AKeCozD2u7nk6IiIOolst5C/AyMJSZ69utqlkR8VrgLuA1VH+uN2Xmunaral69ysU3gW2ZuartejohIh4HdgJ7gN31rQiLVkTsD2wADqO6JeMjmXlPu1VNrxdGLDcCK9suosN2Ax/LzJ8B3glcMMmSOovNi8BJmXkEcCSwMiLe2XJNnfBRqgtkes17MvPIxR4qtfXAVzPzp4Ej6IL/3ot+xJKZd0XE29quo5My80ngyfr1zoh4hGqlg4dbLaxB9aXqz9ebe9U/i/rKlIhYBvwi8EdUV1ZqkYmIHwNWAOcC1EtkvdRmTbOx6IOl19WhehTwjZZLaVx9Wuh+4BDgmsxc7H3+FHAJsG/bhXTYKPA3ETEKfDYzu+Yy3Dn4SeAHwF9ExBFUf74/mpkvtFvW9HrhVFjPiojXA38J/E5mPtd2PU3LzD2ZeSTVSg/HRsSinVOLiLF5w/vbrqUFJ2Tm0VSrp18QESvaLqhBfcDRwGcy8yjgBZq/52/eDJZFKiL2ogqVWzLztrbr6aTMfAbYyuKeWzsBOL2eyN4InBQRn2+3pM7IzJH691PAl6lWU1+shoHhcaPvTVRBs6AZLItQ/fiB64FHMvNP266nEyLizfXVM0TEPsDJwHfbrao5mfn7mbksM99GtWTSnZl5TstlNS4iXhcR+469Bt4HfKfdqpqTmd8HnoiIn6rfei9dMFe66OdYIuJW4ESgPyKGgXWZeX27VTXuBOBXgAcj4tv1e5dm5pYWa2raAcBN9TzLj1AtI/TXLdek8n4c+HJEQPX31xcy86vtltS43wJuiYi9gceA81quZ0Yu6SJJKspTYZKkogwWSVJRBoskqSiDRZJUlMEiSSpq0V9uLE0mIt4NrKP6x9Ue4LLM/HpEPAs8QLXW2EeAAeDkzPx4vd8fAFszc+u4Y/0o1fIqh9b7DWXmTfOobX+qBTV76sZWLR6OWNRzIqIfuBx4f2aeCLwf2FV//GBmvgf4GNU6XLOxDvhafax3Af8xzxL3B9bM8xhSaxyxqBedBnx+bP20zNwJfGtCm29TrTk2G8dn5u/Vxxqlei4MEfHnVEv4Pwd8iGox0JMz8+MRcW6971bgBmAHcDCwGhgETomIrcAvZeYPXn0XpfYYLOpFA8CDABFxNvCbwL2ZefG4NiuA7831CyLi54DXZeaKiDgHOJ+pV5h+A9USNGcBHwSGgLf2whItWpw8FaZe9CRVuJCZXwDOAfrrzw6PiH+gCpurgP+meirlmNfy/6fNpvN2qrkaqJ7weAivfD7MknGvH87Ml4FtVKfBpK7miEW9aAuwKSIyM5/llf8fjM2xABARLwFHRcTYP8KOBj454Xhfj4gPZeYt9QKgJ1Ct6fS++vNjgH8HnqVa0wzgcOBf6tcTA+d/gKXz6aDUJkcs6jn1nMXlwO0RcSdwLXDzFG2fpnr8wF3A3cCmzNwxodnlwLvrOZF/BN6emf8E7IqIu4GzgeuogmQgIrYAb56mxO8Db4yITRHxxjl2U2qNi1BKkopyxCJJKspgkSQVZbBIkooyWCRJRRkskqSiDBZJUlEGiySpqP8FOIn7Nb7qXVEAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Managed Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEkCAYAAAAvoUY9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXecFEX6/9/Vk3dniUtegoCKGQVUDJgx4KGnMmLWU1ROkno/Dj3P86t3nHdKBomKBNOIIAjmOz1FQAlGlCgILJkFdmd3Uk/X74/u3Z1dZmEWZthUb17LTHdXVT/d012fqnoqCCklCoVCoVBoVW2AQqFQKKoHShAUCoVCAShBUCgUCoWFEgSFQqFQAEoQFAqFQmGhBEGhUCgUgBKEtCCEaCeEkEKIi6ralrpAovttbd9ZFec+hrQ2CyGeitv+XAgx7QhxnhFCbDjWcydDqu6pEOJVIcSnqbBJcWSEEPcKIfRkwtrTbUwdZSvQAtgHIITIsfZdJqX8/GgTtTKdL4ETpJSbj93MWk0L4EBVG1FJugFFVW3EYaiJ91RRCZQgpAEpZQzYWVXnF0I4AF3W4VGHUsoqu/9Hi5RyT1XbcDhq4j1VVI4a0WQkhLhICPGVEKLA+vteCHG1dSxhlV0IsUEI8UzcthRCDBRCvCWEKBRCbBFC3CKEqC+EeM1K91chxM1xcYrTvl0I8ZEQokgIsUYIcYkQopUQ4n0rrZ+FEBcniFds01br8zNr/+bDXOsNQohvrXMdEEJ8I4Q4WwjRDrN2ALDJSudzK86rQohPrevbDISBTCGEQwjxvBAiVwgRsey8vdz5pBDij0KIWdY92CqEGFouTGMhxNvWte4SQjwnhJiRbLVfCJEjhHhHCLFXCBG07vP/iztuF0I8LYTYKIQIW/aOizs+WAjxnRAiIITYKYR4UwjR4gjnLNO8cRyu8wQhxH+s69skhLgjLt1kn9EyTUYJrsklhJgohDgohNgvhJgIuI5kWJLXniWEmCyE2COECAkhVggheiZIp7L3tKEofed2CSH+DogENg603q2QEGK9EOIvQgi7dayjECJfCPFoXPhTrDT7J3H9R/seCyHEVOu5LH5uhwshXHFhnrF+xxusNAuFEJ8JITqUuwezhZnnBIUQa4UQjwshRFwYzUp7j/WcvymEGCLKNfUIIa4SZl4YtN6T6UKIxuVsfk4Isbs4HaDhke5RCVLKav0H2IA8YCRwovX3e+Bi63g7QAIXlYu3AXgmbltiltrvAToCL2FWzz8A7rX2jQMKgcbl0t4I3AicBMwDtgOfWnacBLyDmek7EtkEnG1t3wQ0B5pUcK3NgQgwFDgBOAW4HTjDug+9rXS6WWEbWfFeBfIt2zpb4e3AC5jNVn0sO58EDOCKcvdlF9AP6AAMsvZdFhdmAbAOuAw4DZgOHAQ+TfI3XGDdr87WvbkMuC3u+AxgN3CXZcP5wKNxxwcDV1r3pDuwBPhf3PFDngFr+850X2fcubcDdwAnA3+37nPXSj6jm4Gn4rY/B6bFbY+y7tMNQCfgRet333CE+5/Mtb9tnf9qzOduDOaz2OkY7+k86zovt+7pbMvmT+PCPAP8hvk+nQBcB2wBnosLcwdmQeccwA38ALyT5PNXfP8r+x5r1m95npVGb2AH8H/lbC8EPgS6AGcB31L2+WwO/Nmy/QTgTiAA3BcX5jFr312YedxjmPmeHhfmcsw8a6AVphvwGfAFIOLelULMfO4kzLzkQHw6h71XxyNTP5Y/THWTwKVH+LGTEYTRcdtNrH3jEpzr+nJpD4kL083a93jcvuIM//RENgE5h7uGBOm0q+D4RYmOYwrCAcAbty8D8wX6Y7mw84D/lrsvY8uFWQP80/p+ohUmXkQcmC9OsoLwffxvUe5YRyv9WyrxTBTfp1YVPQMkzrxSfp1x536u3P4lwOxKPqObqUAQgEwgBPQrl8YKkhOEw1178W9wXbkwq4BXjuGeFqd7VdxxJ5BbfE+t57QIuKZcOncDB8rtm44p2NOte9Ugyeel+P5X6j2uIK1HgfVx288AOnGFPKAvZoHAfZh0xgCfxG3nJniG3qSsIHwOPF8uTBvL5s7W9jbgH+XCzCFJQaj2TUZSyv3ANOAjIcQHQohhQoiTjzK57+PS3QPEMEsa8eeKAE0rikepb+CHBPvKx6ssPwAfAT8JIeYJs6mkdZJxf5FSBuK2O2K+fF+UC/c/zJJaPN+V284FmlnfT7U+lxUflFJGMTOiZBkNPCmE+FoI8S8hRI+4Y+dYnx9XFFkIcalV1d8qhCgAFluH2lbCBkjvdS4tt/1VXJqpoANm89CScvsXJwibiGSuvfyz8gWHPitHk26JzVLKCLA8LvxpgAd4x2riCAghAsBkoL4Qoklc2AGYNd+7MWuYlXVwV/o9FkL0s57bXZZd/+TQ5267LOv/ycVsFmtqpaFZ+dZ3wmw2DQAPF6cjhKgHtCTu2bMo/0x1A4aUu08/W8dOtNJpxdE/I9VfEACklP0wq2OfAJdgZpgPWYcN67N8u6QjQVLRJPZJDr0v0XLHK9p3TPdTms7oazGrhsuBm4F1Qojrk4heWFGy5bZFgn2RBHHKX0v5OEkjpZyO+fBPwuyp8oEQYnYycYUQbYD3MUuEfYGumFV3MAWvMqT1OssR/zxW5hk9UnpHa18y157onEc63+HSPcRXkIDisH0wmxSL/87ArLXlxYXtiJlxSut7ZanUeyyE6ANMAN7CbMY6G3iWQ3+3RPegJB3gceAJzCbpqzCvbxqlz2+yv60G/Iuy96kz5n36oBLpHPYENQIp5U9SypFSymuBl4EHrUPFytyyOKwQoimmUlYXih8Y25ECSpNvpJTDpZQ9MEv091U2HczmiDCmgMbTA1idRPxiiksg3Yt3WM6+LpVIAynlDinldCnl3cD9wB1WiWaVFaRnBVG7YZYgh0gpv5JSrqW0BJpKjvU6zy+33R34xfqeimd0A+bvf2G5/RdUIo2KKH4eepTbfzGVe1YqSrfERiGEE/M3jQ8TAtpLKTck+ItZ8TIwm1DmYDbbvCSEOPEYbEuGHsC3Vr6zUkq5HrP56WjS+VBK+bKU8lsp5QbMTBwAKeVBTH9G93Lxyj9TK4DTKrhPASudXA59RspvV0i173YqhOiI6bR6D7M9tyXmg7oKQEoZFEJ8BQwVQqzBvKZ/YGaG1YW9mA6jnkKI1UDYap4qgxDiAuAKzOaTHZgPzZmYAgim480ArhNCvGWlczDRCaWURUKIscBzQog9mFX7PpgOyauSNVxKuV4I8R4wwaqV7cEs8dQjyZKIEGI8Zil/LaZD8CbM37JASpkvhHgN8wV3Y1aTGwEXSCnHAOut8zxuhTsLeDpZ+5MlBdd5v/X8rcB0GnYHhlhpH/MzKqUsFEJMAv4uhNiFeS/vx3Qu7042nQrS3iiEeBvzN3gI8znrD5yO2anhaNPdIIRYQOk93QUMA7LiwgSEEMOB4Vanm08w788ZwNlSyj9bQcdZ+/tLKQuEEFcBbwohulvNUOlgLebvegPwE3A95rN7NOncJYS4DDPDvhvTUR2fB4wA/s96Pr4BemEWkuKfvaeBj4UQozA7YhRg5hF9gAFSyqCVznNWOsswa9NXJmtoTaghFGJe9JuYDqV3MNvIBsSF+QNmhrvECjcFM0OtFkgpDeARwIeZEX5bQdCDmBnJfMyM8BXgNeA5K51dmFXPYZjXN/8Ip/4LMBWzDX81ZkZ1p5TyP5W8hPswX4gPMB1buZgvbijJ+MKy4SfMdulM4Fppebys9Cdj9uj4BdPxfQKAlPIHzF4VD2GW4v+EldGmgWO5zmGYtdYfMF/4e6SU8W3lqXhGhwHvArMwM40GmE0aqeABTP/VbMy29gsxO1esOcZ0/4BZGFmIWdvNxfx9S5BSPodZ6n/AOvdia3szgBDCh/ns9pVSFljR7sPsvfP8Mdp3OCZj3uvpmO/seZhO5MryHOa1z8cs8DQExpYLMxoYj+ls/hazdjCCuGdPSvkZZnPyGZhd0H/A7HlWQGnT1xgr7VGY9707ZjNXUojSd1KhSA4hhA2zN8kCKeXjVW1Puqgr16mongghXgHOklJWqnn2WKj2TUaKqsfqFdQUs+SShVl6a4fZ3bXWUFeuU1H9EEK0xBwP8Rlm78ffYdY0BxwuXqpRgqBIBhvwFGbPjihms8plUsofrV5APx8m7kNSyteOg42poMLrrFKrFIdFCPEBpl8xEV9aHVGqOzFMX8BzmH62DZj+kqnH0wjVZKQ4JqyeOO0OE2RXXLuvQpFyhBCtMHuiJSIopcw9nvbUZJQgKBQKhQKoGb2MFAqFQnEcUIKgUCgUCkAJgkKhUCgslCAoFAqFAqiB3U59Pt8rmEPId/v9/tOPELYt5mjfJpiTZN3p9/u3pd9KhUKhqHnUxBrCq8A1SYZ9EZjp9/vPxBy+/c90GaVQKBQ1nRpXQ/D7/V/4fL528ft8Pl8HzDldmmAuttHP7/evwZyPvXjZvc8w54FRKBQKRQJqYg0hEVOAgX6/vwvm5GcvWfu/x1xTAMxh4Vk+n69xgvgKhUJR56lxNYTy+Hw+L+Z862/7fL7i3cWLYP8JGO/z+e7FnGUzF3O5O4VCoVCUo8YLAmYt54Df7+9c/oDf79+ONX+5JRw3+/3+hOsHKBQKRV2nxjcZ+f3+fGCTz+frA+Dz+YTP5zvL+p7t8/mKr/EJzB5HCoVCoUhAjZvLyOfzvQFcCmRjrsD0N+C/wETM9XodwJt+v/9Zn893C2bPIonZZPSI3++vTiupKRQKRbWhxgmCQqFQKNJDjW8yUigUCkVqqGlOZVWdUSgUiqNDHClATRMEtm/fflTxsrOz2bt3b4qtqd6oa64bqGuuGxzLNbds2TKpcKrJSKFQKBSAEgSFQqFQWChBUCgUCgVQA30ICoVCUVdo1rkntj37SraLPQGxJo3Z9d3HKT9frRAEKSWFhYUcbkxFMBhE1+vGNEZCCDIzM6vaDIVCcYzEi0Ey+4+VWiEIhYWFuFwuHA5HVZtSLQiHw+zYsYOsrKyqNkWhUNQgaoUPQUqpxCAOl8tFUVER8+bNIxxWM3UoFIrkqBWCoDgUm81GNBrlxx9/rGpTFApFDUEJQi3GZrOpGoJCoUiaOisInrlzaXruubTIyaHpuefimTs3ZWnn5eVx1113cfHFF3PllVfywAMPsG9fWSfQ8OHDadu2LSNHjiyz3zAM+vXrVxK3b9++bN68+ZBzjBw5klatWrFmzZqU2a1QKKoXsSaJF3isaP+xUicFwTN3LvWHDsWem4uQEntuLvWHDk2ZKAgh6N+/P19++SWffvopbdu2Zfjw4SXHX3zxRb7//nuWLFnC4sWLmTBhQpn4ffr04X//+x+ffvopV199NUOHDi1z/Mcff2TVqlW0atUqJfYqFIrqye4Jn7PHk8tedpT87fHksnvC52k5X63oZRRPvaefxvHzz4cN41y5EhGJlNmnBYM0ePxxMl5/vcJ40VNPJf/ZZ49oQ8OGDbngggtKts855xxmzpwJwIQJE9i4cSOzZs3C6XTy+uuvM3DgQKZOnUq/fv3QNI2ePXuWxO3SpQvTpk0r2Q6Hwzz55JNMmDCBPn36HNEWhUJRQ8kT2O9ojIiWnZNOBDXsdzUm+uuOlJ+y1glCUpQTgyPuPwYMw2DmzJklmfwjjzxS5rjb7Wbq1KkVxp8+fTpXXXVVyfaLL77IzTffTJs2bVJuq0KhqAIKBWKdHbHWjljjMP/W2hG7bRVGEeEjTlx6VNQ6QUimBN/03HOx5+Yesj/WqhX75sxJqT1PPfUUmZmZ3HfffZWOO3HiRNavX8/bb78NwIoVK/juu+948sknU2qjQqE4DkRAbLQj1joQa+ylGf+W0mxYug3kSTrGpWHkyVFsz9c7pIYAIF3pWQmg1glCMhQMG0b9oUPRgsGSfYbHQ8GwYSk9z7PPPsumTZt49dVX0bTKuWumT5/OvHnzeOutt/B4PAAsW7aMjRs3cv755wOwY8cO7rjjDkaOHMkll1ySUtsVCsVREgO22A7N+H+1I3Qzc5c2ieygY5wVRd5ahOykI0+OQpsYxFUM5BlR7Pc0QgRL8w/pMdBn5KXF9DopCMGbbgIg6/nnsW3fTqxlSwqGDSvZnwqef/55fvjhB2bNmoXL5apU3NmzZzN79mz8fj8NGzYs2T9gwAAGDBhQsn3eeecxY8YMOnXqlDK7FQpFkkhgp1aa8Rd/rrMjQnEZeBsdebKO0TOEPMXM+GV7HZLIFuSFEfQZedjvaowIC6RLos/IQ16Y+uZtqKOCAKYopFIA4lm7di3jxo2jffv29O7dG4A2bdrw8ssvHzFuIBBg2LBh5OTk0LdvX8Acebxw4cK02KpQKJJgvyib8a+1Pg/EZfxNY2bGf6dV4u8URZ6kQ+axNe/ICyNEf91xXBYFqrOCkE5OPvlkchP4KJLB6/Wybdu2pMJ+/fXXR3UOhUJRAUXlHLzFGf/O0nYcWc9AnhzFuD5oZvqdzBoAjYwqNDw1KEFQKBR1j2ixg7dsxs9vNoS02vndEnliFOOisJnxn2yW+mlhJLE6cc1ECYJCoai9GJR18BaX+DfaS3rvSJuE9jrG6VHkLUVmxn9yFNqVdfDWBZQgKBSKmo8EdmtWP/74dn572R46rS0H75WhkoxfdtDBXXWmVyeUICgUiprFAYG+VqAtyyjN+NeUc/A2sRy8t5d26ZQn6ZCVnv77tQUlCAqFonoSFIj1dsQv9rIZ/04bRYCdBsgsw8z4ewVLM/5OOjSu+Q7eqkAJgkKhqFqiwCY7WvwgrjXlHLwuieyoY1wYRnbSyerm4WDLPGgZq7UO3qpACYJCoTg+GMA2W9lBXGsdiA1xDl5Nwgk68tQoxs3lHLxxuZUj2w17Y1VzHbWYOikIDev1xGFffcj+qH4a+/M/Pub08/LyGDx4MJs3b8blctGuXTv+9a9/0bhx6Rzmw4cPZ/LkyQwePJjHHnusZL9hGDz00EOsWbMGl8tFdnY2zz//PO3atQPM0ckul6tk9PNf/vIXLr300mO2WaFIGRLYo5V17haX/Ivi2vlbWQ7ey+McvB2Vg7cqqZOCENW7YLetR4jS4d9SOonqXVOSfvF6CMVTYD/33HMMHz6cESNGAGXXQxg4cCAul6vMLKh9+vThyiuvRNM0pk+fztChQ/H7/SXHp0yZoqarUFQPDgrEurguncW9fPbHDeRqFEN20jH6lnPw1lMO3upGrRMEb8bT2G2HXw8BIpgNl/Ho2G0/0SDrlgpj6bFTCRRV7XoICkWVEASxodycPWsciB1xGX+m5eC9NlQyiEt20iFbOXhrCrVOEJLDiWE0RdN2I4RESoFhNAGcKT9TqtdDAEomuOvWrRvDhg2jfv36KbZaUWfRgU12xBo7WnFzzy+Wg9ew2vmdloO3e7i0qecUHVopB29NJy2C4PP5XgGuB3b7/f7TExwXwBjgOqAIuNfv969KxbmTKcEDaGIXjRt0B8KAi/35H2LIpqkwoQypXA8BYO7cubRq1YpwOMzf/vY3nnrqKcaNG5dKkxV1AUlZB29xc88GOyIS5+BtF0OeEkXeGMQozvjb6XW2KFnbSdfP+iowHphZwfFrgROtv/OAidbnccOQzQiGb8XjmkUwfGtaxCDV6yEAJesou1wu7rnnnqMSGkXtQHzlxH5XY/LDAoerBfqsfYmnRa7IwVsY5+BtqZvt/JeErRK/NYLXc2hyitpLWgTB7/d/4fP52h0myA3ATL/fL4FlPp+vgc/na+H3+1O/SOhhKAoOwW5bR1FwSMrTTsd6CEVFRei6Tr169ZBSMn/+fE477bRUm66oAYivnObCKdZSiiIssN/diNhf8sFJ2Yx/X1w7f0PLweuzunR20pEnRaG+cvAqqq7i1wrYGre9zdp3iCD4fL4HgQcB/H4/2dnZhyQWjFv5rDIYshkHCt45qriHI13rIezZs4d+/fphGAaxWIwTTzyR4cOHJ0xH0zQ0TcPr9Sa8Z7UVu91eJ643/24HhMotvh7SsP+1gbmRKbGdJtF6S7RTdfP7aRLRzOwFZ/rLUu8zO17Uld/53CvsnNdV8uRjseNyzVUlCIlcTwmLKH6/fwowpThMogUidF1PnWUpIF3rIbRt25aPP05unIRhGBiGQSAQSPuiGtWJ47GISHXAGWpZ4bHI0l2QE4NErZT70mfT8aSu/M7f/9SSX9ZKZryhcc9tBg/fm0ezJpXvtdWyZcXPSzyVa9hOHduA1nHbOcD2KrJFoahxVLTIunRJc13eqnqzFSkjFDY/I1FBOCyYPluj+9XNeOLZeuzak54fuKpqCAuAAT6f701MZ/LB4+0/UChqMvqsfcd18XVF6jEM2LVHY+s2O79ts7Flm50t22zWn52du8suxhDVBegwy5/Juo0O3pmR+upeurqdvgFcCmT7fL5twN8AB4Df758EvI/Z5XQDZrdT1VVGoagEx3vxdcXRESgU/LbVxtZcO79ttTL9XDPT37rNTjhS2nouhKRFsxhtW8e45MIwbXJ0XhhXr+S40ynRBNz6+0KGPBxIi71CyhrVu0Bu335oy1JBQQFZWVlVYE71ZdOmTaxbtw6v18uFF15Y1eYcN+pK23I86pqrDl2H7Ttt/GZl8CUl/a02tuTayNtftpRfL8ugTY5Om5wYbXNitGmt06aV+dmqRQxXOT9/q9Na4nRINA3Lh7CHpkfvQzjisEE1vEShUCgqQErYf1CwZau9JNPfss3Gb9Zn7g4bsVhpPmu3S3JaxmiTo3PdqVEz07cEoE2OToNKdu89rVOErp0jDHk4wKmnNGLv3vROA6IEQaFQ1GlCYdi2Pb5kb2X6W83PQGFZB2524xhtcmKcc2aEG6+L0bZ1cYYfo0WzGLYUrsP88TvHtxZUpwVht7aLwQ3/yNj9E2lipG6kcjqnvw6FQjzzzDN8+eWXuN1uunTpwr///e+U2a5Q1DYMA3bvjXfempn91lyzpL9zV9kc3O2SJaX687uGreYdndY5Mdq0ipGZWaOa2StFnRaE8d7RLHd+zTjvaJ7NTzzA62hI5/TX//jHP3C5XCxevBghBHv27EmZ3QpFTSVQKExHbYnz1szst1r7QuGyztvmzQza5uj06B6mTSudNq1Lm3aaZhuIOjpJX60ThOfqPc0vjiNNfw0RInznXIUUktczZ/Gz4yecRxi5eUr0VP6aX3XTXxcWFjJnzhxWrFhhjTaFJk2aHNEehaKmo+uwY5etpMfOnn021qxrUNJrZ19e2VJ+ltegTU6ME9vrXHFJmNat9BInbk7LQ523CpNaJwjJkmuLHw0sybVt44RY+5SfJ5XTX2/evJmGDRsycuRIlixZQmZmJkOHDuXcc89Nud0KxfFESjhwULBlm/3QHjuW81bXyzpvW7Vw0iYnxjVXhGibE6N1jl7ixG1QX9bZUv6xUOsEIZkS/G5tF5c2644UZlugFJJ820HG7H0ppb4ESO3017FYjN9++43TTz+dv/71r6xatYp7772Xr776SnW7VVR7whHLebu1NLPfGtdjpyBQ1nnbuJHZZt/59Ai9rzW7abZupdO2dYwzTmvIgQNV3+20tlHrBCEZxntHY5SbOimGkXJfQqqnv87JycFut3PjjTcCZlNUo0aN+PXXXznrrLNSZrdCcTRIaTpvt8T1xf9tW7zzVkPK0mK72yVp3cp01p57juW8bW1m+m1yYngP47y118mcK/3Uydu6yrmSqCg7ojMqIqxyrkjZOdIx/XWjRo244IIL+OKLL7jkkkvYuHEje/fuLemBpFCkm8JCwZZcW5mSfbHzdkuujVCobMGneTOzh86F54ZLu2daA7GaZhtUspykSDNqpHIaWLt2LZdffjnt27fH7XYDlZv+ulOnTuTk5JRcU/H01wC//fYbjz/+OPv378dut/PnP/+Zyy+//JB01EjlukMqrzkWi3felvbFLy7p791X1nnrzTSskr1O61Zm5t+mtZnp57TScVeuLJQ06neuHGqkchWSrumvwZwCe86cOUdrmqKW0PPmbLqcFWFI/wCVnSI/3nm7ZWvp3DpbttrZVs55a7NJWrUwB11dfVnI7Itf4ryN0bBB3e2iWRtRgqBQ1EBWr3GyfqODt97NtOa40UrmyY9EYNuOss7b+Fk08wvKttM0bGA6bM88Pcr11wRLmnTa5sRo2Tym2uvrEOqnVihqKJGoWTR/eZbGtJnNaNI4BsDuvbYyzluXU5Jj9cPv2jlYZm6dNjkxsrw1qtlYkUaUICgUNQyj3PxmxZOr7dpjI7uxwaP9A2Uy/WZNlPNWkRxKEBSKGkIsBgs/cjN2StkOFMXTIxfPk3800yMrFKAEQaGo9kSjMG+Rh3FTs/h1s50T20cBUjJPvkIRj6pIKhTVlHAEZvkzuLhXUx79S0M8bsmUUXn8d/4eTusU4a6zf2S9dg4Txzg4vVdXPHPnVrXJihqOqiEoFNWMYAjeeCeTl17xsmOnjbPPiPDck/u48pJwSRfPL++YQv2hQ9GCQQDsubnUHzrUjH/TTVVluqKGowQB2KUJRnudrHTa+Xhv4TGnl671ELZu3cof/vCHkrD5+fkEAgFWr159zDYrqp7CQsHMtzKY/KqXPftsnNclzMjnDnBx9/Ahff2zhg8vEYNitGCQev/3fwR79YJKjo5XKKCOC0KxELyV6UQCkRSNsEnXegitW7fmk08+KQn39NNPE4vFUmKzourILxC88lomU2d6OXBQo8cFISY9tJ/zu0YOCavl5ZE5eTK2HTsSpmXbu5cWp55K5JxzCF9wAZHu3YmcfbYSCEVS1DpBeLqei58dh1/DLgJss2nstpkCIOOE4JbGGRXGOzUa49n88BFtSNd6CGWuIRJh3rx5vP7660e0R1E9yTsgmDbTy/TXM8kv0LjykhCDHiqgy1nRQ8Jqe/eSOXkyma++iggGiX0t4MwE4wd+shGachfOZcvIGjECISXS7TYFonv3UoGwplRRKOKpdYKQDOscGgVCcDzG3KdyPYR4Pv74Y5o3b84ZZ5yRWoMVaWf3Ho0pM7zMeDODoqDGdVcFGfxQAaefoh8SVtu1C++kSWTMnImIRAjeeCOBQYPw2p/CGV6/r1YXAAAgAElEQVQM8QX/MES07uQ/8wwA4sABnN98g2vpUpxLl5I1cqQpEC7XoTUIJRAKaqEgJFOC3x3XVGRQtqlozr6ilNqTyvUQ4nnrrbfo27dvKkxUHCe279SYNN3La29nEonCDdcGGfhggJM7JhCCHTvwTpxI5muvQTRK8KabKBg4kFiHDgAUaP+gkeMyBGZX0x004x8Zw/jqzBv5eM8epGyCbNCAcM+ehK3CiDh4EOfXX5sCsWwZWaNGIUaMKBGISPfuZi3inHOUQNRRap0gJENTQzI8P8yQQKRCYUgFqV4PoZidO3eydOlSxowZk0pzFWliyzYb46d58c/LQAI3/y7IgH4FtG97qP/HlpuLd8IEMt54AwyDoltuITBgALETTgBAiIN4XC+T4Z6G0Ay2y+YMdwxlhu1ODAQR4aZJwxOIxdoQ1bsQ1bsS1bugx05B1q9/qEDE1SC8o0eTNXKkEog6TJ0UhGLKC8MKZ+puRzrWQyjG7/dzxRVX0KhRo1SZq0gDGzfbGDcli7kLPdg06HtzEY/cH6B1qwRCsHUr3nHjyPD7ASjy+UwhaNMGACH2k+Gehsf1MppWwG/RvjzLk7zhaYWBRkyUPrsTdT9u20bc7vW4xdc4WIxDCkSsLSLWAU0/EfSTsTduiOOaq3Fc0xO7BFd+PhkrV+JdtozMr5aQMWZMWYE4//xSgShXSFHUDtR6CGkgneshAFx00UU899xzXHbZZRWmo9ZDqDrWrLczdrKX9z7y4HRK7uxTxMP3BWjR7NCRxLbNm00hmDMHNI2i224j8MgjxFq1AkCIPDLcU/C4p6OJAKHIdRwIDqFH43PZZNPS7gezGQYOPYYjEsEZCuGIRnFEdRyawO5w4nC5sLk92DWBU5olTIcEh5Sl35HWZ/n9YJcSJ2C34jji4thL4pRuO604TRo0oHD/AWtffFhppWtu16aZubOzs1mdt++ousir9RCqkHSuhwCwePHio0pbkV5+WO1g7BQvH3zqITPDoP99AfrdXUiT7ARCsHEjWWPH4pk3DxwOCu+5h0D//hgtWgAgxD4y3JPwuF9FECQcuZ69oSG85jqDlxq72G7XaBwzyNdAoBMRjpK0v95VQBTQhSAC6AKiCKICokTAvgnDtgnDtpmYbSsxESQqHIRlJmGjHWGjHUHZhrDRkigZVjw7UZsLIy8fY+9eYgf2EyssJGq3E3G7CDdqTKRxI0INGhDweonYNXSw4grLBogKYdlmfj96dGjqPWIouywrJMXiY0eWFTBLtMzteAErG7aMgJULa08gaKVhrfQPI2DOcmnYKM3Bd2mC/7PpzGjmTWkX+UPuV1pSVSjqECu+czBmchb//cJNvSyDR/sX8Ic7AzRqcGjt275+Pd6xY/G8+y7S6aTw/vsJPPwwRrNmAAixh0z3RDzumUCIcOQGdoce5VX3qUzOdrLbptEtrPPCwSCXhGPs0QSTmtRnhhYr8YPlxIrPm6j2b4NIR6BjSRhN24LDvtL6m4Xd9jNCi4EN9Fj7Mr6ImPck8JoDLEV+Ps7ly3F98iXOpUtx/PADwjCQTieRs88u9UF06ZKwiUlCnGhUJGCl4hGlVGA89eqxryC/TFgzrdJ4ZcTH2taBiAC9XPoR61iRgKjQyqZXHMaysThe7Dj0UnRIiQHEABtG2s+ZtiYjn893DTAGU+im+f3+58sdbwPMABpYYYb5/f73j5BsjWgyqg6oJqP0IiUsXe5kzOQsFi9z0bBBjAfvKeTe2wqpl5VACNasIWvMGNzvvYf0eCi8914KH3oIw1ruTBO7yPC8hMc1G4gQivyenaEhTPN0Ymqmk/02jYvDOoMLwpwfiZWp+2dnZ/Oz1ZSwIiWj7Ytw2L+LE4mVaFoeAIb0outnWyLRhah+DlI2AOIEwnJSVyQQ0S5dkMfog6gOTYMGHCI4peJh1YriakiJxS5O6A4JK/B7HGy3Je4in7s9P2lbq7TJyOfz2YAJwFXANmC5z+db4Pf7f44L9hTg9/v9E30+36nA+0C7dNijUKQKKeF/X7kYM9nLN6tcNGkc46//7yB39SkiMzOBEPz0E1ljxuB5/30Mr5fAgAEUPvgghtUhQBPbyfBMxON6DdAJRW5mW2gIUzJO4pUmTvI1wRWhKIPygnSNVjwqvbiDBBy52/WRySCqX0BULx5cKbFpm3HYV2K3r8RhX0GGeyxCmE1heuxEUxxcXYj27EL4iicBDVFQUNqLadkyvGPHkjV6NNLhKCsQXbses0BUBRrmMBBXSYUs/vdPTUH7nsL4npCCSJorJelqMjoX2OD3+38F8Pl8bwI3APGCIIF61vf6wKFFf4WimiAlfPKZizGTs/juJyctmsf4+5MH6HtzEZ4EPTIdP/yAd/RoPB99hFGvHgWPPkrg/vuRVq8xTcslwz0ej+tNwCAU7sPmyBAmZnRgRhMnRZrgumCUwQVhTterelprQcw4gVjkBIjcYu0JYI+rRbgcH1rXAoZRj2jsbKLuruhXd6HgykFI+ZQpEMuX41y6FNfSpXjHjydrzJhaIxDpIL4nZPmmwXSQLkFoBWyN294GnFcuzDPAxz6fbyCQCVyZKCGfz/cg8CCY3S2zE6woHiw3yZcCNE1D0zS8Xm/Ce1ZbsdvtKb3eWAzeXST45ygbP/6s0a6N5KUXde661cDpzADKTnUivvkG2/DhaB98gGzQAP3ppzEeeQRXgwbmoGK5GSH/Dcy0YtzNVjGMkVkteVkziAA+Q+PPUY1TbU5okHncrzk5sjEr9Deam1JisA74GqEtw6l9jdMxEoFEIoBToNH5yHbnQZ+HgBFECwKIJUvQvvgCxxdf4IwTCNmtG7JHD4wePZDdu0NG2ftcNddcdWQDLwk7T0ajDLfFWCaMtFx/ugQhkXyVr0PdBrzq9/tH+Hy+7sAsn893ut/vL1Mc8vv9U4ApxWkkajfU9UNHetZ1DMPAMAwCgUCVt7UeT1LVtqzrMP8DD2OneNnwq4MOJ0QZ88+D3HhdELsd8ss13zqWLydr9Gjcn3+O0aAB+X/+M4X33YfMygJdR8tbSaZ7HG7X24BGMHwbv0QfZVxGG/wZDiQxbimK8kggQvuY+QokexXVoT3dpDFwnfUHggLs9m/jfBHvoGmvAGAYDYg6zyF60TlEz++K/lg/CIjSGsSSJTheeAHb88+bNYjOncvUIBq3aVNNrvn4kZ2djX3vPp62titz9ZYP4YikSxC2Aa3jtnM4tEnofuAaAL/fv9Tn87kxhXB3mmwqoefN2XQ5K8KQ/gGapWGVqXRNfw3wySef8MILLyClRErJY489xnXXXZfya6irRCIwZ0EG46d5+W2rnVNOijJxRB69rgphSzBnYvEUEK7Fi4k1bkz+X/5C4d13I71ml0ib9isZnnG4ne8AdoLhu/ghMoQx3hzm1XdgA24rivJIIBzXO6h2IMkiqvcgqvew9hjYtI1xArECV8Z/zbBSoNfrhN6rC9Gru1Ck34qR3xTn8hUlAuGdMIGssWORdjuyWzeyunYlcsEFRLp2RWZUPCmlInnSJQjLgRN9Pt8JQC7QF7i9XJgtwBXAqz6f7xTADexJkz1lWL3GyfqNDt56N5NbbyxMuTCka/prKSWDBw9m7ty5dOrUiZ9//pkbb7yRa665ptJTYyjKEgrDm+9kMOFlL9t32jnztAivjMvjqktDhy5QLyXOJUtMIVi6lFiTJhx8+mmK7rqrJGOyaRvI8IzF7ZwHOAmG72NldDCjva14r74dl4Q/FEZ4OBChuVG7hKBiNGLGicQiJxKKmPNwCXEQh1WLsNtX4nLOx+OeDYBRryHR5ucQvbYrB/Un0A92xLn8F5xLl5K5fDnel15CjBuHtNuJdu5sdnFVAnFMpEUQ/H6/7vP5BgAfYXYpfcXv96/2+XzPAiv8fv8C4HFgqs/nexSzOelev99/zG/G0/+sx89rHUcMF4marVqz/JnM8mfSNDtGTssYTmfFcU49OcqzTxy5q1c6p78WQlBQUACYC+Q0bdpUicExUFQkmOXPYNJ0L7v32ujaOcK/n9nHpRcduigNUuL68ku8o0bh+uYbYs2bc/DZZym8/faSfvY2bR2ZnrG4nPMBJ8FQP5bEBjM6szkfNXCQaUgeCUToVxghu84IQcVIWZ9I9FIi0UutPQY223oc9hWlDmvnf8ywWRp6i1OIXtcF3fMQ+3Ofw75sN86ly8waRHmBOP/8UoHIPLIvRlELp65IRhCWLk80t5Akyys5/ZRD56IvJllBiMcwDG677TZ69uzJ/fffX6m4AI8++ij169fnGWtK4y+//JL+/fuTkZFBYWEhM2bMoGvXrofEU+MQDk9BQPDqG5lMmZFJ3n4bF54XZvBDBVxwbiSxEHz2GVmjRuFctYpYixYUDBhAUd++JZO+2WxryHSPxuVciMRDMHQvn8cGMcbbjM/cduobkgcCYe4rjNAwxa9c9fEhpAch9pfUIhz2Fdjt36IJc6yFYTQuGROhB05DLI3gXLwK19KlOL7/HqHrpkCcdVbZGkQNFIhj+Z3r7NQVyWTYrU4rdbA4HRJNg1t/X8iQhwM0TbFPIZXTX+u6zvjx45k+fTrdunVj+fLl9O/fn88//5zMGviAVwX7Dwheec3Ly7MzOZivcfnFIQY9lEe3sxMUBKTE9cknZI0ejfP779Fzcjjwr39R1KdPyQpkdttqMjyjcTvfx5CZFIYe4RNjMKO82Sx12WkUM3giP8Q9hRESjFdTJIGUDYlELycSvdzaEyO78W4CgU/jahEfQwbI3jb0XqcR0rsQKLwdVthwfLoe19JleCdNQowfX1Yguncn0q1bjRSIdFDrBCFZ0i0EkPrpr1evXs2uXbvo1q0bAN26dSMjI4P169fTuXPnlNtfm9i7T2PKjExefSOTwiKNa64IMvihAGeelkAIDAP3Rx+RNWoUjtWr0du2Zf+IEQRvvhkcZu3TbvuRTM9oXM4PMYwsAsEhLJQDGO1tzEqnnWYxg2cOhrijKEKGEoIUYwNxBqFwC0LhuwBzEsD4kdVu11to7ulwNcSuakJU70JR0aPwgwfbB7txLV5RViDOPLN0waA6LBB1UhBO6xSha+dI2oQA0jP9dYsWLdixYwcbNmygY8eOrF+/nj179tC2bdtUm19r2LlbY+IrXma/nUE4LPjdNSEGPVjAKScl6KpsGLgXLSJrzBgcv/yC3q4d+0eNIvj738cJwXeWEHyCYdQnv+hPzBN/ZHRmI35y2milGww/EOTWoijHYwWB3dou7rLfykhtHE2MpsfhjNUTKRsRiV5FJFq8uqCO3bamxBdht6/C1eBD6AHyYjt67HRCwdthTX20T/JxfvBTqUDYbIfWILxHnkivNlDrfAjVgXROfz137lwmTJiAsBq6//SnP3HNNdcckk5d9yFs225jwjQvb87NIGbATdebi9J0PCHB9A+xGJ733sM7ZgyOdeuIduxIYPBggr17g90sM9ltq8j0jMLl/C+G0YCC0IO8Jf7IWG8D1jpstNNjDCqIcFMwypG7NKSORxsM4D3Pu/QMXcvDgQHYpA0NgYbN/Ce1ku+aLNlr7os7ViYc1b+TwtG0pwuxF4d9VVxN4luECAEQM5qjh87C+DUb7X9hHHN/xfHND6YPwmY7tAZxnAVit7aLx5sOYuTuoxP+ZH0IShBqKXVVEA4UZPPcv6PMWZCBAHw3FvHIAwHatk4gBLqO59138Y4di2PjRqInn0zB4MGErr+e4kEHdvtyUwgc/8MwGnIw1J/Z4mHGZdVjk93GSdEYgwJhfhfUj1t1e7stl0Xu95jnmcNa5y9pOUexeBxWOOLDoFliZElKSTjrU5Z8KxeuVKKKv2vWv/g45rHS83k9XsJF4bj0yobTiv8vY3v5cBKHtgOntgmXbSNO20Yc2h5sgCbtyGhb2N0E+4/g/GwfriWbsId1NDRkx5OJnXUOsbPOwTijMyIjK6HYlrdBQ0McxSoNT9d7gjcyZ3Nb4V08mz+80vHrrFNZUTdZt8HOuKle3n3fgdPh4O5bC3n4vgCtWiRoEoxG8cydS9bYsdg3byZ6yinkTZlC6NprKR504LAvI9MzCqdjMYbRmH1FT/Oq9iDj62Wxza5xeiTG1Lwirgnpx6U8vVvbxQeeRSz0zGeVcwUAjWKNsUkbMRHDJu1cHO5B36I7MYhZ/wwMUfKNWMn3GDFhYBTvrzCc+f1I6UmkuU+UpF7y3TxXaZyoiCY4ZlkhSi2KlWyX2lve9pjXDJMedHBtBO9GaI85E1sJBvCL9fdapVOunNhqGBjk2rYhheSdzLcYGBiStuZBJQiKGs1Pv9gZOyWL9z9x43FLBj9scLdvT2LfUCRCxpw5eMeNw75lC5EzziDvlVcIXXWVJQQSh/0rSwiWEjOasKvoWabZ7mdSfS87bRrnRHSG7wtxeVhP+2pceVoeH7oXscizgK+dS5FC0il6Co/n/5nzwxdwZ7aPmDBrPjGhs8y1hOcPjKgTvoTiJiOJLJEQU1TkocKRrMBY32W8BIowaJsRtnUI2zqwbQQtjxigY0MPNEHf7kT+EoRV+zAOGsQcgmhOS8Lt2xI5oQ2R1i2JuexHLbbfOVaVXHcMg3He0UdVS0gGJQiKGsm3P5iL0nzyuZssr8HAfubqZCed2Ii9e8uJQThMxltv4R0/HntuLpHOndn33HOEr7jCmmde4rB/QaZnNE7H18SMZmwvHM5kx71Mrp/JPptG97DO6P1BLiq3FkGqyRcH+dj9IQs9C1ji+pKYiNE+2oGBgUfpFexNR/1EwGxCMMpND5buzKI6IhAlTTVA2RnT0tQaromdJX4Iu2Mljg4/IDpG4HdgFGVjrGuE9kUI+wtfI1YuAUMjeuaZpQsGnXuuOcdVEuzWdnFps+5IYV5MVETSWktQgqCoUXy90snoSV6+WOKmQX2DPw3I5w93FFK/XoK3PxQi4403yBo/HtvOnUTOOYd9//oX4UsvLRECp+NzMtyjcDpWEDOas7XoBSbY7+Tlhpkc0ASXhXQG7Q9ybqTitQiOlUJRyH/cH7PIvYAv3J8TERFy9Nb0C/SnV/B3nKKfdki78yrnSqIiUmZfVERKmpMU6cOQzQlHexGO9rL2hLHbfip1Vp+xEq3zDoxBIGMOjC1NEMt2kbFgCpl/mgh7kxeI8d7Rx1X4lSAoqj1SwpfLnIyZlMWyFS6yG8f4y2P53N23EG+CRWlEMEjG7Nl4J07EtmsX4XPPZf+oUUQuvjhOCP5DpmcUDvu3xGIt2VQ0krGOO3i1gYeAJrg6GGVQIEznaHraqEME+dz9XxZ5FvBf16eEtBDNY825s/Aerg/ewJnRzod1Pi7c+3HJ99o+Urn640KPdUGPdSForU+kie2mODhW4Gi9Enu7nzBuMwsVRp4XVm7F89EPZI6ciFwt0E8tJxD1zKVijrfwK0FQVFukhP984WL0pCy+/cFJ82Yxnn3iILffXITHk0AIiorQRo6k6YgR2PbuJXzBBewfP55I9+5xQvCxJQQ/EIvlsLZoHGOctzKrgZuwgN+FdAYWhDk1DYvSRIiw2PU/FnoW8Kn7Iwq1QhrHsukT7EuvYG+6RLrViC6fiiNjyJaEoy0JR39n7Qlht/9oioR3JY4rViKvMogBMmJD/Pwrnv/+SIZ/Evw/gd7CFIhlj+9Eyzk0/djeA+xLw6OiBEFR7TAM+OBTN2MmZ7F6jYPWrXSe/9sBfDcW4Uow+aAIBMh89VUyJ0/GlpdHqEcP9g8ZQuS84jWZDJyODy0hWE0s1pbVRZMZ5bqJNxu60IHfB6MMDETomGIh0NFZ6vqKRe4FfOT5gHztIPWNBvQK3sD1wd6cF+mOXb2GdQA3ut4NXe+GuZyXRNNyS5uZTl+J/ayfEI8ZgETkrsHzxY/ITQY0BeKf+xDYPjxQvOxESqn0k+jz+UQqZiWtasRXTux3NUaEBdIl0WftQ14YOXLEJEjnegiffvopL7zwArqu06BBA0aNGkWbNm1SYndVo+vw3ofmojTrNjo4oa3OyL/v56brg8UDhcsg8vPJfOUVvFOnoh04QOjyyzH+9jfyOna0Qhi4HO+T4RmNw/4Leqwd3xW9zAhXb+Y0dJnjFKy1CNqmcC2CGDFWOL9hoWc+H7rfJ8+2D6/h5crQ1VwfvIELwxfj5DDT6irqAALDyCEcySEcKe7TGsRh/8EUiCYrsftWYLMlaAo0QPtrYfUQBGAk8GiqDTmeiK+c2O9phAibbbQiLLDf0wh9Rl5KRCFd6yEcOHCAIUOGMH/+fDp06MA777zDE088wWuvVb4vdHUiGoW5Cz2MnZLF5i12Tu4YZcK/9/O7a4IJF6URBw6YQjBtGtrBg4SuvJKCRx8l2rmzuazg3l24nAvJdI/Bbl+LHuvAN8EZjHD14t2GZlZ8t7UWQasUTUFd3D1woWc+H3gWscu2E4/h4fLwVfQK9ubS0GW4jstkFoqai4eofh5RvbhmK2l6Uxf46y64BDO3DoGYCYatVVosqLQg+P3+ai0GtqfrIX4+/OQBYpkTIcs67ERQw35rY+T5FQuCPDVK7NmqWw9h8+bNNGnShA4dOgBw+eWXM2jQIPLy8mjUqNER7apuhCPw1rwMJkzzsm27ndNPiTB1dB7XXJFgURpA5OXhnTaNzFdeQSsoIHjNNQSGDCF6xhlWiBjIN2lU/+/YbevRYyfyZfANXnT35P2GTjIMyUOFER4MRGiaAiGQSFY7fmKRewGLPAvItW/DKZ1cErqc64O9uTx8FRlSLdSiOFoEBXc+Rb0B/w+5KmTm1gYw2k3BsGFpOeMRBcHn873s9/vvt74LYKrf738gLdYcJ8qLQfz+VLeFGYbBzJkzSzL5+JoAgNvtZurUqRXGnz59OlddZU7Y1b59e3bv3s13331H586dmTdvHgC5ubk1ShCCQcFrczKY+IqXnbttnH1mhH88tY8reiRYlAbQ9u0jc8oUMqdPRyssJNirFwWDB6OfdpoVQsftnEeGZyya/BVDduI/wbd50XMFnzZ0kGVIBheEeSAQoVEKpmpZZ1/LQs98FnkWsNm+Cbu0c2G4B0MK/sRVoavJkvWO+RwKBUDwppsAyHpnGNxWCHMzyf/T8yX7U00yNYT2xV/8fr/0+Xwd0mJJikimBO9o36KkuSge6ZLoc/al1J5UrodQr149Jk6cyDPPPEM4HOayyy6jfv362O01wykZKBTMeCOTyTMy2Zdno3u3MKOG7+fi8xMsSgNoe/bgnTSJjBkzEKEQwd69CQwahN6pkxUiits5lwzPWOy2zUT1U/lc+4RnHV340uugYcxgaH6Iewsj1D9GHdhk+5VFngUs9CxgvWMtmtQ4P3IB/QL9uTp4LQ1lzRFkRc0ieNNNhMWFNLYNYl+vcRgyfSPRk8lJ9vp8vgeAJUB3ILU5ZhWgz9pn+hCCpe0S0mOgz8hL6XlSvR4CQI8ePejRw1y0fM+ePUyaNKnaT399MF/wyuxMps3yciBf45ILQgx+eD/ndUncPKft3Il34kQyZs9GRCIEb7yRwODB6CXO4ghu1xwy3eOw2bYQ0c9gXmgRIzwX8LXTTpOYwV8PhrirKEKCYQpJk2vbxiL3eyz0zGe180cAuobP5ZkD/+Ca0HV1YooIRfXAkM2Q2n8wZHrHmyQjCPcADwKPAGuBu9Nq0XFAXhhBn5FXtpdRihzKxaRjPQSA3bt307RpUwzD4Pnnn+fOO+8ko5ouKJ63X2PKzExefT2TgoBGz8uCDHowwNlnJl6mVNu+He9LL5H5+uug6wRvvpmCgQOJtS+upIZxu/xkusdjs20jop/F2+GpjPCcy3deOy1iBqN0G7/bnY8n4RmOzC5tJ+97FrLIs4BvnSsBODPSmScPPs21wetpaaTHmadQVAeSEYQwsBOIAROBs4Hl6TTqeCAvjBD9dUda0l67di3jxo2jffv29O7dG6jcegjDhg0jJyeHvn37AmXXQ/j3v//N8uXLiUaj9OjRgyeffDIt13As7NqjMflVLzPfyiAUElx3lbkozemnJFiUBrBt24Z3/Hgy3noLDIOiPn0IDBxIrKTmE8LjepMM93hsth2E9C7MDs9gREYXfnHYaKMb/PtAkD5FUVpmZ1PZMtQ+bR8fuRex0LOAb5zLkEJySvRU/pQ/jF7B3rSJVe8amEKRKpIRhNnA/4Db/H7/OJ/P90/gyvSaVbM5+eSTyc3NPaq4Xq+Xbdu2VXj8xRdfPFqz0k7uDnN1stfnZBLV4cZeQQY+EOCkjhUIwZYteMeNI8PvByEouvVWAgMGEGvd2goRxON6nQzPS9i0nRRFz2NW7C1Ge85kg8NGh2iMMfuD3BiMVrq73EFxgI/dH7LIs4AlrsXERIwO0Y4MKniMXqHedNA7HjkRhaKWkcx71MTv90/y+Xy+tFujqJH8ttXG+Kle3p6fgQT63FDEI/cHOKFt4gnhbJs2kTVuHJ45c8Bmo+jOOyn44x8xWhU3xwTxuGaR4ZmITdtNIHoR0/V3GJNxGr/ZbZwSjTExr4heIZ0EwxQqJCAC5iRyngV84fqcqIjSWm/Dg4H+9Ar2ppN+6lEtXqJQ1BaSEYTdPp/vVsDj8/l+D6SnnUVR49jwq7kozbxFHmwa3H5LEX+8P0BOywqEYMMGssaOxTNvHjidFN57L4H+/TFatLBCFOFxzyTTPRFN28vB6KVMjS1kbMbJ7LBpnBWJ8cy+Iq4MJ78oTVAE+dz1HxZ65vOZ+7+ERYjmsRbcXXgf1wdv4IzoWUoEFAqLZAThD8ADwCogB+iXVosU1Z6f15qL0iz8yI3LJfnDHebqZM2bJp4HyL5uHd4xY/DMn490uyns14/Aww9jNDV76QgCeNwzyHBPQtPyyIv25KXYP3kpoyN7bBrdwjovHghySTi5tQjChPnS9T8WWZPIFWlFNI5l4yu8jetDvTkn0lVNIqdQJCAZQejk9/vH+3y+psC9QDtgTTZCLQsAACAASURBVDqNUlRPvv/JwZjJXj76r4fMDINH7jcXpcluXIEQ/PwzWWPG4F60COnxEPjjHyl88EGM7GwABAV43NPJcE9G0w6wO9KLcdo/mJxxAvttGheHdSbuD3J+EovSRImy1PUVn9o+YkHzdynQ8mlgNOB3wd+XTCJnq1QDk0JR90hGEEYAVwDPYjqXp2OOR1DUEZavcjJmspfPFrupX8/g8Ufyue/2Qho2SNzJ3/7TT2SNHo3ngw8wvF4CAwdS2K8fhjWaWoiDeFyvkOGeiqYdZHv0RkYbf+dlbxvyNcEVoSiD8oJ0jR5+UZoYMZY7v7YmkVvEftt+6sl6XBnsWTKJnIPDT2OiUChKSUYQMnw+nwtw+f3+N3w+38PpNkpR9UgJX33tZMzkLJZ846JRwxhPDMnnntsKyfImFgLH99+TNWoU7k8+wahXj4JHHyVw//1IayyFEPvJcL+Mx/UympbPlqiPkfL/mJ7ZiiJNcF0wyuCCMKcfZgpqA4NvHStZ5HmP9z3vsce2G4/h4YpQT3qFenOT9xYCBwJpuScKRW0n2W6n84G/+Xw+N7ApvSaln2ade2Lbc+iA61iTxuz67uMEMSrHsUx/DfDEE0+wfPlyhBA4HA6eeOIJLv7/7d13fFRV+sfxz/SZFAISihEUFHWXRUREsCAqoKIiWA/KoiIqPxVxUemLYqMIKkVBBKQtKhxQpOgiNlQQFFilyGIBVJIgLaQnkyn398dc2BADJCE3g5nn/XrlZWbmzJ3nLNl8c9tzLr8ciNyd/Oijj7Jr1y68Xi9jxoyhZcuWJ1zzIYYBn63yMGFKIuu/c1OvTojhA7PocXs+cXFHCYING0gcPx7vp58SrlmT7P79yevVCyMpCQCbLYM47zR83hnYbbn8HLiLF3mSNxPqUwTcVBDgkdwizj1KEBgYbHFtZplvMR94l5LuTMNteLjSbCJ3lb/j4SZy3gQvuUggCFERxw0ErfUkYFKxp3qWZcNKqU7ABMABTNdajy5ljAKeJrIc9katdfeybPtElRYGx3q+vE60/fWQIUOoYS6h9/3339OtWzc2b96MzWZj1KhRtGnThrfffptvvvmGvn37smrVKmylNQMqh3AYVnzmZcLrCWz63k1K/SAjhmVyxy35eI9yo7V73ToSxo3D+/nnhGrVInvwYPJ69jy8PqzNdoA471R83pnYyGdb4F7GOocyL6EuBnBbfoA+uUWcGSo9CH5wbmOZbzHLfEv4zfkLTsNJW387Hs8ZSMfCa0k0yrZQuRCibCzpiqaUchAJkauBVGCdUmqJ1nprsTFnA0OAy7TWB82T1iesxlMv4tr6Y4XfX/u23kd9LdD0HLKf7X/cbZxI+2vgcBgA5OTkHPHLfunSpXz99dcAtG7dGo/Hw8aNG2nRokX5JmoKhWDZCi8TX09k208uzmgY5MVnM7n1xnzcR1nDxb1mDYnjxuFZvZpQ7dpkDRtG/t13Y8THA2Cz7SPOO4U472ygkE3B/+MF5yDeTayNA+ieH+DhXD8NSlmUZqdjO8t8kXbSP7l+xG7YuaToMh7M7cO1BddR06j1h/cIISrHUQNBKVVLa32wgtttDfystd5hbmse0BXYWmzMA8CkQ5+htd5bwc86qVW0/fXYsWNZtGgRWVlZTJs2DZvNRkZGBoZhHNHq+rTTTiM9Pb3cgRAIwHsf+HhlWgLbd7pocmaAiaMP0vW6AkptnmoYuFetInH8eDxr1xKqW5es4cPJv+suDLP5nt22hzjfa/g8/wKK2BDsy2jn4yxJqIXHgF7mojT1S6xFkOrYxfu+JbzvXcL37i0AXORvwzOZI+hUeAPJ4TrlmpsQomKOtYcwSilVC/gJWAF8pbUuvQfBH50G7Cr2OBVoU2LMOQBKqdVEDis9rbVeXnJDSqneRJrrobWOrIhVQkFBweHvy/IXfMppFx71tQMLpx73/eVR0fbXAwYMYMCAAaxatYoRI0YcXvugrOx2O3a7nYSEhCP+N/P74V/z7Yx9xcEvv9k4r2mYt6YFuOkGA4cjHog/ckOGge2jj3CMHIl9zRqMlBSCL79MuFcvfD5fpImckYbNeAl4AwiwwTaYkc5HWeqLI9GAAWE7fUMO6ro9YGZZOmm8Z3+Hd+wLWG//BoALwxcxMjiGm8K3cpqtAcQR+SoHp9NZ6s9IdSZzjg1VMeejBoLW+kE4fGjnaqC3uUDO18C7WuujN9yh1MvGSx4fcAJnA1cSueHtS6VUM611Zok6pgKHfksb+/f/sXVZMFjWnKpaJ9L++pC2bduSk5PDtm3baN68OcARK6SlpaWRkpLyh/eFw2HC4TC5ubns37+fgkJ4+514Js9IYPfvDlo0K2LmqzlcfWVkUZqDJfcFDQPPJ5+QOH48rm+/JZiSQvbIkeR36wZeL+TlYS/4kTjvZHyet4EgK0ODGeV6mM88NUgKGzyRXci9eUXUMv/lt9n3s9y7jPd9S1nn/hrDZtA08DcG5A7h+oIbj2git7/cLeoikpOTKe1npDqTOceGE5lzab8jSlOWk8o/EdlLmGyeG2gDnErkr/6jSQUaFnvcAEgvZcxarXUA2KmU+oFIQFjeSTVUp/ZRrzKqLBVtf20YBtu3b6eJ2ft/48aNHDhwgNNPPx2Azp07M2fOHPr168c333xDYWHh4aAoLmRewu/3w5SZ8UyZmcC+Aw5at/Tz0rOZtLu09NXJMAw8H31E4rhxuDdtItiwIZljxpB/++0cOqlgt6cS530Vn2ceBgbLQ08x2vUAX/kSqB0KMzS7kLvzikg0INN2EB23nGW+xaxxryZsC3NWoAn/yHmCGwpu5MyQNJET4mRRrpPKWusQkYVyjmcdcLZSqjGQBtwBlLyC6D3gTmCWUiqZyCGkHeWpp6Iq49LSYzmR9teGYTBw4EAyMzNxOBx4vV5ee+01atasCcDQoUPp27cvCxYswOfzMWHChFL3PnbvcTDjTTvpvyexIy2Jthf7ee2lg1zcqvTVyQiH8S5fTuK4cbi2biXYqBEHX345slSfK3Jzl93+G/HeV/B6NAY2Foee4wX3vaz3xVEvFObprEL+nl9EiBw+8a5gmW8xqzxfELAFOD14Bv+X24fOBV04N/hX6R8kxEnIkquMtNZBpdQjwIdEzg/M0Fp/r5R6FlivtV5ivnaNUmorkbUWBmit//SrscGJtb+22+28++67R329bt26zJ8//7jbMQxI322jsAg6dShg5JNZ1KtTyuWdoRDe998nccIEXNu2EWzcmIPjx1Nw880cOrvssP9CnG8iXvdCwjiZHx7LC+4ebPF5aRAMMyqzgC4F2az2fET/mkv5zPsJRTY/pwZTuDuvl9lErrmEgBAnOZtxnEXHlVKdtNbLlVJNgMeA+VrrL6qkuj8y0tNLHnmKXJqZmCjXpBf3xarf+OczP5LvP5U9GR1pc2ER78wulrehEL4lS0iYMAHXTz8RaNKE3H79KOjSBRyRnj8O+3YzCBYRwsOb4bGMcXfjR5eHRsEQD+XkUdtYwXLve3zq/Yh8ez7JoTpcX3AjNxR2oWXRhVXeRE6OLccGmXP5mOcQjvsXWVn2EPoDy4GhwDRgInBRhaoSVcrhALfT4O5uefR70Lx7NxjEt2gRiRMn4tyxg8Bf/kLGa69ReMMNxYLgJ+J9E/G43yNAHNPCU3jJcws7nG7OCQTpk7OOA47XGZv0Abn2HGqFatGl4GY6F3SlddHF0kROiD+psgRColLqdCCktV6jlMqzuihROVqcZ3BN+2yuvy4bAgF8896JBMGvvxJo2pSMadMo7NQJzHMQDscPxHsn4HEvoZAkXgnPZJznRlKdLhoHs7iqcAb/dY3ijcQMEsKJXFPYic4FXbnU31aayAlRDZQlEEYBzwPPm72M1lpbUvkZhoFhGCfcvqG6MAyDU/076Pz1c9T7zkPSx83xrF6Nc9cuipo358DMmfivvppDZ5cdjq3E+8bjdb9PrpHMK8ZbTPB0Yo/DSd3QTpJDz5Pu1GTZ4w43kWtXeAUevFGeqRCiMpUlEBoDg7XWhw7eD7awngrxer1kZWUdvhIn1mXt3s3+dzSGYeDMyCB+3jyCZ5zBgTlz8LdvfzgInI4txPnG43X/m0yjPmPDC5jgvYosuwevsRYXz1NgX0v7wg50LpjCVf6O+AxflGcnhLBKWQJhBzBGKZUELAUWaq0zrC2rfNxuN6mpqezfvx+Ho/Tj13a7nXD46G2Vqwt7WhrZGzaQt3kzfrud0/PMI3zBIP4OHQBwOjYR7xuHx72C/eEG9ONt5nrb47fFYWcFPmMcV/o9dC64lQ6Fr0sTOSFixHGvMjpEKXUKMAXoROSS0Qla61UW1laaUq8ygshhkg0bNpCWllbqL/64uDjy8/Otri+qbHl5JEyZAoDTMDg3K4sLzVuQDZuNfbuXmkHwCV8ZZzDEOYy1zs6EbQk4jGW0LPqY7gV/5ZqCTn/aJnJy9UlskDmXT6VdZaSUuo7IjWW1iOwh9DY3vBhoV6HqLGCz2WjVqhWtWrUq9fVq/QMUChE3Zw41XniB8Ec5cD78DvR0w+wiqAeQC9mJnRlvO4M3nOPZa78TDC91wp/TI283PfMvJjl80vxzCiGioCyHjM4Dhmqtj7jTSin1gDUlifJwbdxI0uDBuDdtorBdO4xgHi7/BkbHwxobPOGsxy7bIL6t3ZYi2ypC3A2GgxZFPzAsK45LAkdv9CeEiC1lCYT5wJNKqQTgHuBerfV0rfUP1pYmjsWWnU3imDHEz5pFuE4dMiZPprBLF+z2PRS5WjHLURe/bRDznXcBLsCB3TiHmwqyGJjj4oxQw+N9hBAixpTlNtI3gPFAitnL6E5rSxLHZBh4Fy+m7hVXED9rFnk9e7L3888p7HoDHs9ivkt6iL/5XiLPtoUQvQAvmDeKrd2Tz6RMJ2eUsjCNEEKUZQ/BobXeFlntEihbiAgLOHbsoObQoXi+/JKi5s3JmDWLwPnn4PPMp9A7mac9aUxyfInB+ZR2/ui0sASBEOLoyhIInyqlpgApSqkJwEcW1yRKKiwkYfJkEl99FcPtJnPECAruvhFf3FzivT2Y4crgKedVZDEFw9YCjEIiewVhoOytt4UQsa0s6yE8p5RqBnwC/AD8ZnlV4jD3F19Qc+hQnDt3kt+1K7nPPYSnwXvU9l7C5458HnTcxE5HXwwu4pRQEV5jIgeco4A4ggwiRA8iO3VyV7EQ4tiOtabyAuDvWusirfUWYItS6lwiy2m2rqoCY5V9715qPPMMce+9F1mbYOlLONuup5bnRrbbDO5x3sc6570Y/I3kUC5P5BSg8gN46Qn0BCKX2m7NOMD4BAfr3aFoTkcI8SdwrD2EecAHSqnbtNaZSqlrgeeAu6umtBgVChH3r39RY/RobH4/uS93h54HSPD2J4MElHMIK5y3EbY1Jjm0l6HZWdxaYMNJ6Xdh1w0bjMz2A/6qnYcQ4k/nWGsqv6OU2g28r5T6ELgUuKbkmsei8rg2bSJp8GBcGzdS1K8Z4SFuvDXeIitcn/vtr7DQ3YmQrR61Qtv5Z/YuuhUkyRl+IUSlOervE6XUc0TaVKQBjwPfAo+bq56JSmTLzqbGsGHUvvF67BfupCi1IY5RW8hMyKM3C6jvW898zz3EGb/xTNYGNu+pw50SBkKISnasQ0Yfm//9BJhUBbXEHsPAu2QJNUYNx3btPoLbE7HXzSYt3JSRxhxm+5oRtHnxGit4KCeHATntZfEZIYRljnXI6POqLCTWOHbsIGnEINzNviK0xgm14efQ5Yw2nuctX2NC2HCwEJW/k6ezupNkSGtvIYS1ynIfgqhMhYUkzBlDvHc6xpwQ4UT4NtidMQxlUdypGASx22ZziX81o7Ie5uzgddGuWAgRIyQQqpB3w3xqHBwOj+UQdsHqgkd4gf4sT6iNwyjEZnuVM4Lv8lT2w1xdOA7b8bvVCiFEpZFAqAKurJXU2DMAR4d0jAB8mPkYL9QfwKraiXiMfNzGGOKMGfTJ6cF9uVqWphRCRIUEgmUMXPbPScx6Cmfj7YRr2Xh3Wz9G/20o357ho0Y4n1rhUeTbJ3Jz/jUMzF5M/fCp0S5aCBHDJBAqXQiP69/E8yLOxJ8IFjh48/M+jGr7JD+0jKd+qJCzAi+T6hrJeUXn8lTWXC4MXBTtooUQQgKh8vjxehYS556E0/UrhT97mPHpQ4y57kl+vTaRswJFtC18g/We/thJYnTmCG7L74Zd7iYQQpwkJBBOkI0cfN65+LzTcNj3kLsliemb+/LSzYP4vVkS5xcFuCxvESt8fdnjzOO+vF70zXmMRKNGtEsXQogjSCBUkM22jzjvdHyeOdjt2ezb1IgpW4fwys2PcLBVDdr6g9yX9RWL4vrwbvzPXF54JU9mP8NZwSbRLl0IIUplWSAopToBE4g05p+utR59lHG3AQuAi7TW662qp7I47L/g807B59FAEbt+vIRJW27k9a73ktc6nmvzi7j1wE8sjh/ES0kfcXqwEVMPzKS9/2q5jFQIcVKzJBCUUg4i7S6uBlKBdUqpJVrrrSXGJQKPAl9bUUdlcjq2EOedhMe9DHDyQ9ptTNx6FbOvvZlgMxc3ZeVy7769fOIdR/9TpuIyXAzMHkrP3PvxyCI1Qog/Aav2EFoDP2utdwAopeYBXYGtJcY9B4wB+ltUxwkycDm/Is47GY97JeFwIhuyBzFux+UsbH0pzoYhuv+Wzv01TuE790L6nDKSvY493Jx/GwOyh1AvXD/aExBCiDKzKhBOA3YVe5wKtCk+QCl1AdBQa71MKXXUQFBK9QZ6A2itSU5OrlBBTqez7O81wsASbMaL2FiHQT3WMo0xe1qztPGZxMfn0m/NevqefxG7G2UzyHE/6+xf0zLcircCC7jI2QZOqVCZlapcc64mZM6xQeZs0WdYtN3SDpYfXuFdKWUHxnFoaa9j0FpPBaYe2sb+/fsrVFBycjLHf68fr3sRcb7JOB3bCYQasTw4l3HBy/my9inUSspg2Iy5dP9rc4qa1OZJR28WuuaTHKrDCwdf5paC27FjZz8Vq7GylW3O1YvMOTbInMsnJSWlTOOsCoRUoGGxxw2A9GKPE4FmwEqlFEB9YIlSqks0TizbyMXrnUucdxoO++/4g81ZULSUca5L+DbJw6npuxkzfCSqYWOM665ldsJMXk0ch9/m54Hch+iT8w8SjcSqLlsIISqVVYGwDjhbKdWYyAI7dwDdD72otc4CDu/7KKVWAv2rOgxstv3Eed/A55mN3Z5FfuBy5gY0E+Ka86PLwZm//MaUUS9yq+Ek8PjjfFpnHSOSOrLTuYMrC9szLOtpGofOqsqShRDCMpYEgtY6qJR6BPiQyGWnM7TW35urra3XWi+x4nNLqlXjGlzO7yMPwlDXPK4fDJ5NUfAyfJ55gJ+sQBdmMpxX487iN6edpjt/5c1/PsNN234mb8QIfmiZyIikR/jM+wmNg2cy/cAcrvJ3qIopCCFElbEZhnH8UScPIz09/fijTAlxQ/B55mGzFf1vA4YdCAMu9vl78Lp9IFPiU9jrsHPhb6k8+dhgbvj4M3IHDmTPPTczqearzIqfjsfw8EjOY9yT1ws37sqfmQXkOGtskDnHhko4h3DcG6Gq9Z3K+QX98HnmH368m3qMcA3iS66nU4GXGTVOIdNuo93v+5j7xBA6vjWfgq5d2bvyUxY2+oKxNa5kv2Mft+V3o3/2YOqE60ZxNkIIYa1qHQhhox4F/m5kej5mpKs/sxx3EcBF2OZgayJ0ysxlyMixtBv7MsFGjch46y2+7pjIs0kPsNH9LRcUtWRaxiyaB1pEeypCCGG5ah0Ie+w2hrpGMM/7EgGchG3/m+7qaXO5uP8gbIWF5Dz+ONsfvZ0xdV5iUdxC6oTqMvbgeG4quFW6kQohYka1DoSHa/n4xu0gbPvjobNLez+E//LL2TNyOG80/4xJCR0J2AL8X04fHs59lAQjIQoVCyFE9FTrQJj55jtMDOQx8+47CXvsFNn/11MoY9KrfNAtgeeT7udX5y90KLyaoVnDaRRqHMWKhRAieqr18ZAmTw1n8oP/YGfjZtz/2mx8+fm4CwsBuLPXOzxQuydOw8nMA28yNWOWhIEQIqZV60BwmJeoGra9/NjsCT7ucB5Nt83BHt7If9zr+WfWcN7f9zHt/FdGt1AhhDgJVOtDRqGUFJxpaYx8ElZfDjd8she/9wkemBfHw+3XkByOreZYQghxLNV6DyFn8GDSGnuZ9QAYdvB74L2ubkZlvCBhIIQQJVTrQCi45RaeXdwKm3kztisI745oTcEtt0S3MCGEOAlV60DYa9/D/GbrCJidJoo8MP+89eyz741uYUIIcRKq1oHwasJ4whzZqylEmFcSxkepIiGEOHlV60D4j3sDgWKN7QACtiL+467yJReEEOKkV62vMlq2f8Xh72OxO6IQQpRHtd5DEEIIUXYSCEIIIQAJBCGEECYJBCGEEIAEghBCCJMEghBCCEACQQghhEkCQQghBCCBIIQQwiSBIIQQApBAEEIIYZJAEEIIAUggCCGEMEkgCCGEACxsf62U6gRMABzAdK316BKvPw7cDwSBfUAvrfWvVtUjhBDi2CzZQ1BKOYBJwHVAU+BOpVTTEsO+BVpprZsDC4ExVtQihBCibKzaQ2gN/Ky13gGglJoHdAW2Hhqgtf6s2Pi1QA+LahFCCFEGVgXCacCuYo9TgTbHGH8f8O/SXlBK9QZ6A2itSU5OrlBBTqezwu/9s5I5xwaZc2yoijlbFQi2Up4zSnkOpVQPoBVwRWmva62nAlMPbaOiy2DG4hKaMufYIHOODScy55SUlDKNsyoQUoGGxR43ANJLDlJKdQT+CVyhtfZbVIsQQogysCoQ1gFnK6UaA2nAHUD34gOUUhcArwOdtNZ7LapDCCFEGVlylZHWOgg8AnwI/DfylP5eKfWsUqqLOWwskAAsUEp9p5RaYkUtQgghysZmGKUe2j9ZGenpfzjyVCZyzDE2yJxjg8y5fMxzCKWd2z2C3KkshBACkEAQQghhkkAQQggBSCAIIYQwSSAIIYQAJBCEEEKYJBCEEEIAEghCCCFMEghCCCEACQQhhBAmCQQhhBCABIIQQgiTBIIQQghAAkEIIYRJAkEIIQQggSCEEMIkgSCEEAKQQBBCCGGSQBBCCAFIIAghhDBJIAghhAAkEIQQQpgkEIQQQgASCEIIIUwSCEIIIQAJBCGEECYJBCGEEIAEghBCCJPTqg0rpToBEwAHMF1rPbrE6x5gDnAhcADoprX+xap6hBBCHJslewhKKQcwCbgOaArcqZRqWmLYfcBBrXUTYBzwghW1CCGEKBurDhm1Bn7WWu/QWhcB84CuJcZ0BWab3y8EOiilbBbVI4QQ4jisOmR0GrCr2ONUoM3Rxmitg0qpLKA2sL/4IKVUb6C3OY6UlJQKF3Ui7/2zkjnHBplzbLB6zlbtIZT2l75RgTForadqrVtprVuZ76nQl1Jqw4m8/8/4JXOOjS+Zc2x8VcKcj8uqQEgFGhZ73ABIP9oYpZQTSAIyLKpHCCHEcVh1yGgdcLZSqjGQBtwBdC8xZglwD7AGuA34VGv9hz0EIYQQVcOSPQStdRB4BPgQ+G/kKf29UupZpVQXc9gbQG2l1M/A48BgK2opZqrF2z8ZyZxjg8w5Nlg+Z5thyB/lQggh5E5lIYQQJgkEIYQQgIWtK04WSqkZQGdgr9a6WbTrqQpKqYZE2oLUB8LAVK31hOhWZS2llBf4AvAQ+bleqLUeHt2qrGd2BVgPpGmtO0e7HqsppX4BcoAQEDQvR6/WlFI1gelAMyKX5vfSWq+x4rNiYQ9hFtAp2kVUsSDwhNb6r8DFQJ9SWodUN36gvdb6fKAF0EkpdXGUa6oK/yBy4UYsuUpr3SIWwsA0AViutf4LcD4W/ntX+z0ErfUXSqlG0a6jKmmtdwO7ze9zlFL/JXJn+NaoFmYh85LlXPOhy/yq1ldMKKUaADcAI4hcqSeqGaVUDaAd0BPAbAVUZNXnVftAiHVmGF4AfB3lUixnHj7ZADQBJmmtq/ucxwMDgcRoF1KFDGCFUsoAXtdaV/fLT88E9gEzlVLnE/n5/ofWOs+KD4uFQ0YxSymVALwD9NNaZ0e7HqtprUNa6xZE7oxvrZSqtueMlFKHzottiHYtVewyrXVLIp2U+yil2kW7IIs5gZbAa1rrC4A8LLxnSwKhmlJKuYiEwZta63ejXU9V0lpnAiup3ueOLgO6mCdZ5wHtlVJzo1uS9bTW6eZ/9wKLiHRWrs5SgdRie7sLiQSEJSQQqiGzjfgbwH+11i9Hu56qoJSqY16NgVLKB3QEtkW3KutorYdorRtorRsRaQ3zqda6R5TLspRSKl4plXjoe+AaYEt0q7KW1vp3YJdS6lzzqQ5YeC6w2p9DUEq9DVwJJCulUoHhWus3oluV5S4D7gI2K6W+M58bqrX+IIo1We1UYLZ5HsFOpF3KsijXJCpXPWCRUgoiv7ve0lovj25JVaIv8KZSyg3sAO616oOkdYUQQghADhkJIYQwSSAIIYQAJBCEEEKYJBCEEEIAEghCCCFM1f6yUyFKo5S6AhhO5I+iEPCk1vorpVQW8B8ivZB6ASlAR631MPN9TwMrtdYri20rjkgbiXPM903VWs8+gdpqEmnUF1M3FIrokz0EEXOUUsnAM8BNWusrgZuAAvPlzVrrq4AniPQJKovhwOfmttoCO0+wxJrALSe4DSHKTfYQRCy6Hph7qL+T1joH+LbEmO+I9EQqi0u11oPMbRlE1mVAKTWRSCvubODvRJoMdtRaD1NK9TTfuxKYAWQAjYGuQG/gaqXUSuB2rfW+8k9RiPKTQBCxKAXYDKCU6g48DKzVWvcvNqYd8ENFP0ApdREQr7Vup5TqATzI0TvO1iLSauNO4FYii6mfXt1bUYiTjxwyErFoN5FQQGv9iFS1LwAAARFJREFUFtADSDZfO08p9RmRkBgNFBJZhe0QL/87vHQsZxE5FwGRFc2acOT6DLZi32/VWoeBNCKHi4SICtlDELHoA2ChUkprrbM48v8Hh84hAKCUKgIuUEod+uOpJTCmxPa+Ukr9XWv9ptlY8DIiPWeuMV9vBWwHsoj0XAI4D9hkfl8yKAKA40QmKERFyB6CiDnmMflngMVKqU+ByUTWoC5t7AEibcS/AL4kslZzRolhzwBXmMf8VwNnaa2/AQqUUl8C3YEpRAIgRSn1AVDnGCX+DpyilFqolDqlgtMUotykuZ0QQghA9hCEEEKYJBCEEEIAEghCCCFMEghCCCEACQQhhBAmCQQhhBCABIIQQgjT/wORPmR8QQigZAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEkCAYAAAAvoUY9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXd8FEX/x99zd7mSKhB6aHbA8iiCgggIAUX5+VjXjqhgQxREEVG6IioiiAiCAgKirgjqgzQpVgRFlCZSpHdC6l1yl9zd/P7YS3JpJIFcCpn363W57O7s7Hf2Zvcz850mpJQoFAqFQmGqaAMUCoVCUTlQgqBQKBQKQAmCQqFQKAIoQVAoFAoFoARBoVAoFAGUICgUCoUCUIJQbgghmgohpBCifUXbUhKEECOEELsq2o6iyG+fEKKXEMJbEdc+g3g6BfJEXGC7RHlECLFXCPHKmV6/BPaV2T0NpOuBsohLUTxCiO+FEB+W9jwlCOXHAaA+sA5ACBEXeEg6VahVZw+fAw0r2ohSsgYjTxyuaEOKoCreU8UZYKloA6oLUkofcLSi7ThbkVJmABkVbUdpkFJmUonzRFW8p4ozo8rWEIQQ7YUQvwgh0gKfjUKIGwLHCq16CyF2CSFGBG1LIUQ/IcTnQgiXEGK/EOJOIUSMEOKTQLy7hRB3BJ2THfd9QohlQoh0IcQ/QoiOQoiGQojFgbj+FkJcV8h52TYdCHyvDuzfW0Q6ewshDhYSz9ygfQ8LIY4JIURgu64QYpYQ4kQgDb8IIToEhRdCiOlCiH+FEBmBNI4RQthOcb9rBuL5QQhxzql+m0D4/woh/gzcn2QhxG9CiCuCjp8nhPhCCJEYCLNJCNEjcKyGEGJu4PfIEEJsF0IMzE5fEdfL497I3hZCXCuE2BC4xu9CiFb5zosXQmwWQrgDNnQsqXsjkAd2B85dIYRoFnSsgFspkGelEKJpYDuPy6iIa1wuhFgTuMYOIYRWArtKmvZrhBA/Bu5xkhBinhCiTv54TiPe6wP3MvueXl+IjcXl0RcD+aZp0L7hQoiTp7pfQWFHCON514QQOwO2fiWEiBZC3B7IU2lCiPlCiJig864UQiwRQhwXQjgD6bsxX9x7hRCjhBATA/n3mBBinBDCHBSmqzDcNolCiJTAc9MmXzzNhBDLA/dpvxCir8jn6hFCWAJp2RMIt1UI8Xi+eJoIIZYGfsf9Qoh+xd2foqiSghC48d9guF+uDHxGAOmnEd3LwGLgcmARMBv4DPgOuAL4FpgthKiV77zRwBTgP8A24FPgY2B64LxtwDwhRFgR170y8H0HhtugdRHhVgINhRAXBba7ACeAzkFhOgOrpZRSCOEAVgNRQPeALYuB74QQzQPhBXAMuA9oDvQHHgaGFGaAEKIx8DNwBOgmpUwuwtbs8PWALzDuSUugLTAB8AYdXwPUAG4BLgWGAv5AFDZgM3Ar0ALjXo8Eep3quoVgAl4HnsW430mALoSwBOxoSN58NAAYX8K46wNPAXcD12Hc76+EKFq0Skvgt1wMJANXAw8BLwB1TnVegOLSXg9YDhwE2gD/B1wCfHmG8TbAeI7+CBwfCEwsJF3F5dE3MX6XTwMvxeuAV4CHpZQHKRn1Me7ZHYHrXAvMB3oDGnATxm8XnO+jMZ7/TgH7lwHfCCEuzBd3P4zn4WrgGYxnqGfQ8UhgMnAN0A7YCSzNfo8E8slCIAbogPEc3By4F8F8CNwOPI7xrI4C3hBCPJovnloBm28JfK7kdJBSVrkPxotEAp2KON40cLx9vv27gBFB2xKYELRdO7BvUiHX6pEv7v5BYVoH9g0M2ndFYN8lhdkExJ0qDfns3gM8Ffj/E4yXYyrQIrDvIPBY4P9egW1LvjhWBae1kGsMAHYGbY8I3K/LgEMYmdtUwt8nO+1Nizg+GsNVElGK33wi8F1++4K2ewHefNsSuDJo3zWBfRcFtl8D9gLmoDA3BsI8cApbRgTCnB+078LAvvjC7Avsax98XzAeYAnEFZFHegNOoEZQHJcEwrxyCvtKkvbRgXxiDQpzeSBMhzO4p68C+4LzH9Aj+J5SwjyKIXxHgPcxatQTS5FfRmAUQGKD9k0GfEDtfPlqfTFxbQReDtreC3yTL8xS4NNTxGHCEM/7A9tdC8lDNTEKtR8GtpthFJIuzhfXMOCvwP/xgXguDDpeG8PV92FJ71f2p0q2IUgpkwLVqmVCiFXAD8BCKeX204huY1C8J4QQPmBTvmtlUrBUtjHo/2w/8KZC9pWkNFccqzFqAe8D12Nk7KuAzgF7G2I8TGCIUz0gOV9h1UaQP1gI0QfjhdMUiMBoT8pfY6wN/IiRsZ4vhb2bMEpWW4QQ3wHfAwuklNluslbAGimlq7CThRAmYBBwD4Zw2oEwjBdNaZDk/Z0OBb7rAtsxah+/S6N9J5tfSxj3CSlljktISrlDCJEQiHNFKe0sihbANillUtB1tgghUkpwbnFpbwmslUY7RnbcGwNxt8T43U8n3hbAb1LK4N5JP+eLo0R5VEp5XAjxCEbtYSNGnigNh6SUCUHbR4GjUsoT+fYFu8lqYxS4OgdstGDkvyb54v4r/7UwXuDZ8TTDKM23DcRvAsKD4mkBJOTLQ4lCiOB32FUYtfn1+e6TBUPYguPZERTPiXzxlJgqKQgAUso+QoiJQDcMtR0thHhaSvkBua6H/NX3wtw3WSXYJyn4sszKd7yofWXhllsFTBRCtMSoZv8W2NcFI2McCMpYJgx31W2FxJMOIIS4C0NUBmOIaSpwF0aJOZhkjJf7f4UQE2QJq+pSSp8QojvGgx+PUWUfK4S4S0q5KDvYKaIYCLwEPAdsANIwajA3l+T6QfjzvewL+03y23Em0/8G5zc/Jct/xcV3uvacTtqL21+SeAuzOf92sXk0iI4Yebwuhnvl+Clsy09hz3Fxz/YsoDGG+OzBEKjPAGu+8zLzbeePZxGQAPTFqN1kYgijNd85pyI7vnYUvC/Z555JHinyglUSKeUWKeV4KWV34CPgscCh7BJAg+ywwmgsq0xd6LIzlPmUoQxWYlQnBwA/BkpfqzAelnhyawcA64FzgVQp5a58n+zujR2APwP37g8p5U6MmkJ+sjD8l5uBH4QQ+UtJRSINfpNSjpFSdsAQnocDh/8ArhVCRBRxegdgqZTyIynlnwGxu6Ck1y4FfwOtgxsDMUp0JaG2EOK87I2Aj7kWxosOjBdXnXxxl9avuxVoIYIa8QOFgpiiTylV3G2FEDkvKCHE5YG4t55hvFfnS3f+cRUlyaMIIeKB5zF84vuAj8uyjaYIOgDvSym/kVJuxnBZnVuaCALtBC2AsVLKZVLKvwE3eb0Ff2PkofODzquB4XrM5o/Ad+NC7tO/gWNbA/FcEBRPbL54SkyVFAQhxPlCiDeE0WujiRCiLUbj0N+Q013uF2CQMHpptMJoLPZUnNUFSMDwD3cTQtQLZIZCkVIewaiOP0Tuy/8vjFLoLeQVhE8wSjbfCiG6CaNX0tVCiJeEELcGwmwHLhVGT6DzhBDPYrz4C7t2FkYD3HoMUSj24RBCtBNCDA1ct7EQogtGW8TfgSDvY+S9r4XRY6WZEKJHoFaRbV8nYfRWuVAI8SpG411Z8z5GyXOKEKK5MHrDZNeSiit1pQMzhRCthBBXYXQo2Eyuu2g1hotgdOAe34VRWiwN8zBqR3MD+fgaYAZl0xX0PYwG1FlCiEuE0fttDvCzlPKnM4h3CoarcVrgnnahYM2z2DwacN3MAcZJKRcD92KUlJ87A9tKwnbgfiHEpUKI/2B0jChJoS2YJIxCaZ9A/m0biCf4d1uB4QabLYRoHRDjORjtHkYDp1EQmgFMF0I8GHjvXS6EeEQI8WIgnpWBeOYKIdoEbP4kEE+pqZKCALgwSoyfATswekasAZ4OCvMIxgt3TSDcNAy1rxRIKf0YLwgNo0r5ZzGnrMRw8a0KnC8xSt05+wL73Rg1h/XATIz7swCjJ0m2D/4DjMw3M3DdqzEa4Yqy1YvRI+lnDFEorrSeglHS/hqjd8UMjEw6OhDfEYxSYxqGf3grxksju/Q3OpC2rzF8+jWAd4u5ZqmRUh7CENR2GAI7EaMnCxglulNxBCNPfYlR+MgAbgv8LgTas/pgtINswciPhfbiOoV96Rg9YWphuAk/Ad6hdG6TouI+huFujQN+x3BxbMFw751JvIcweiy1IfeePpcvzCnzaKAWMAsjvw4NnLMHeAIYExDgUPEwxnvxN+ArjMbi30sTQeDZvgs4D8PlOgujl92RoDASw2XmAn7CuP9LMAQpOO89hvGbv4xRoFqJUTDcHRTPrRjP3I+BeBZjuFpLjQjkX4VCAQijL/wPwGUBl4FCUS4IIaIwel+9IqWcVBE2VNlGZYWiLBBCPIlR5T6M4fd9B1inxEARaoQQt2C4drZhtC8Mx3AX6RVlU1V1GSkqECHEEGGM4iz0U9H2lZImGC7F7Rj+758ofW8mRTkSaJcqMv8JIe6vaBtLSDgwDsNlugjjfdw+4M6rEJTLSFFqhBA1MXo9FUpw32qFoqwRxqjopqcIckxKmVZO5pxVKEFQKBQKBaBcRgqFQqEIoARBoVAoFIASBIVCoVAEUIKgUCgUCqAKjkPQNG0GxnS6x3Vdv6SYsE0wRsnWBhKBB3RdL+lc6gqFQlGtqIo1hFkYc9aXhHHAbF3XL8OYivb1UBmlUCgUVZ0qV0PQdf1HTdOaBu/TNO08jOmca2NMOtZH1/V/MEaeDggEW40xN4lCoVAoCqEq1hAKYxrQT9f1VhjT5b4f2L+R3Mm6bgOiNE3LvxSmQqFQKKiCNYT8aJoWiTFb5RealrP+ePZi8c8D72ma1gtjJsBDnOa0sAqFQnG2U+UFAaOWk6zr+n/yH9B1/TCBef4DwnGHruslWX5QoVAoqh1V3mWk63oqsEfTtLsANE0TmqZdHvg/VtO07DS+hNHjSKFQKBSFUOXmMtI07VOgExALHMOYMnYVxkyV9THWrf1M1/VRmqbdidGzSGK4jPrqul6ZVk1TKBSKSkOVEwSFQqFQhIYq7zJSKBQKRdlQ1RqVVXVGoVAoTg9RXICqJggcPnz4tM6LjY0lISGhjK2p3Kg0Vw9UmqsHZ5LmBg0alChcSAShuPmGNE27H3gxsOkEntR1fWMobFEoFApFyQhVG8IsTj3f0B6gY2COodEYI40VCoVCUYGEpIZQ2HxD+Y6vCdpcC8SFwg6FQqFQlJzK0IbwKLCkqIOapj0GPAag6zqxsbGndRGLxXLa51ZVVJqrByrN1QOLxUJslg/zmHcR6zbg/a3I1+bpX6PMYywFmqZdjyEI7YsKo+v6NHJdSrKwRhUpJS6Xi1ONqbBYLHi91WMaIyEEERER1K5dWzW8VQNUms9+TMdOUHvqXMI+/gKkH5GZVar0V2ijcknQNO0y4EOgu67rJ88kLpfLhc1mIywsrGyMq+J4PB6OHDlCVFRURZuiUCjOANOxE0S/OhHHN8sBgQhxobZCBqZpmtYYWAA8qOv6jjONT0qpxCAIm81Geno6CxcuxONRM3UoFFUN84HDREydTZ3rbid8wRKE1xdyMYDQdTvNmW9I07SDGPMNhQHouj4VGAbUAt4PTFnt1XX9qlDYUl0xm81kZWWxefNmrrpK3VqForJjPnAY+6IVOBatwPrXVgAyLz4fGW7HunUHSBCZmSG1IVS9jO4t5nhvoHcorq3IxWw2qxqCQlGJMe8/hGPRCuyLVmDd+DcAmZc1J3VIPzJu7oKvaSMATMcTqD11LqaPdfAbbQihoNrOZeRYsIA6bdpQPy6OOm3a4FiwoMziTkxM5MEHH+S6664jPj6e3r17c/Jk3maSMWPG0KRJE8aPH59nv9/vp0+fPjnn3nPPPezdu7fANcaPH0/Dhg35559/ysxuhUIResz7DhI5eRax3R+gbttbiH7tXQBSX36GY2u+JmHJXJx9e+WIAYC/Tiy+d1/l2K/f4Lr3VjJbXhgS2ypDt9Nyx7FgATGDBmHKyADAcugQMYMGAZBx++1nHL8QgieffJJ27doBMHr0aMaMGcPbb78NwLhx49i4cSNr1qyhX79+2Gw2+vbtm3P+XXfdRXx8PCaTiZkzZzJo0CB0Xc85vnnzZjZs2EDDhg3P2FaFQhF6zHsP5NYENhuFuMz/tCTllWdx39wFX+OSPcv+OrGkjhkcMjvPOkGIHjaMsL//PmUY6x9/FPDFmTIyOGfgQMLnzSvyvKwWLUgdNapYG2rUqJEjBgBXXnkls2fPBmDy5Mn8+++/zJkzB6vVyrx58+jXrx/Tp0+nT58+mEwmunXrlnNuq1at+PDDD3O2PR4PQ4YMYfLkydx1113F2qJQKCoG854DOBZ9Z4jAlu0AZF7RkpShz+K+OR5fo5J1BS1PzjpBKBFFNcyEoMHG7/cze/bsnJd8cE0AwG63M3369CLPnzlzJl27ds3ZHjduHHfccQeNGzcuc1sVCsWZYd69H0egYThsa7YIXELK0P64e8Tji6tfwRaemrNOEEpSgq/Tpg2WQ4cK7Pc1bMjJ+fPL1J5XXnmFiIgIHn744VKfO2XKFHbu3MkXX3wBwPr16/nrr78YMmRImdqoUChOH/O/+3JF4G+jF33mlZeSMmwA7h5d8DWs3CIQzFknCCUhbfDgPG0IAH6Hg7TBZeubGzVqFHv27GHWrFmYTKVrv585cyYLFy7k888/x+FwALB27Vr+/fdfrrnmGgCOHDnC/fffz/jx4+nYsWOZ2q5QKIrGsmsP9v+twPHtCsK27QIgs9VlpAx/joybu+BvWK+CLTw9qqUgZDccR40di/nwYXwNGpA2eHCZNChnM3bsWDZt2sScOXOw2WylOnfu3LnMnTsXXdepUaNGzv6nn36ap59+Omf76quv5uOPP+biiy8uM7sVCkXhWHbuwb7oO6Mm8M+/AGRedTkpIwaScVPnKisCwVRLQQBDFMpSAILZvn07kyZN4txzz+WWW24BoHHjxnz00UfFnut0Ohk8eDBxcXHcc889gDHyeNGiRSGxVaFQFI1lx+6cwWJh2w0R8LS+nJSRA8m4qQv+BnUr2MKypdoKQii56KKLOFRIG0VJiIyM5ODBgyUKu27dutO6hkKhKBrL9n9zuoiG7diNFILM1peTMup5QwTq16loE0OGEgSFQlG9kTKvCOzcY4hAm/+QMvoFQwTq1a5oK8sFJQgKhaL6ISWWf3blisCuvYYIXH0FyQ/dhfumzvjrVg8RCEYJgkKhqB5IiWXbrpzBYmH/7jNE4JorSX74btzdr6+WIhCMEgSFQnH2IiWWv3fmjBOw7N6HNJmMmsAj9xg1gTqVf+W1YydM3P2ohUljTdSp7Q/ZdZQgKBSKswspsWzdkSsCe/YbItC2Fc4+9xk1gdq1KtrKUjFhSiRrfhNMmBrJmKGpIbuOEgSFQlH1kRLL1u04/hcQgb0HDBFodxXOxx8wRCC2ZkVbeVqs/d3KvPkR+P2CzxdG0P8JZ8hqCUoQFApF1URKwrb8kzNOwLL3INJsNkTgyZ6GCNSqUXw8lQwpYeOWMJastLNkhZ1/9+SuBun3E9JaQrUUhB6x3dhm3Vpgf/PMlixKWH7G8ScmJvLss8+yd+9ebDYbTZs25Y033qBWrdxq6pgxY/jggw949tlnee6553L2+/1+Hn/8cf755x9sNhuxsbGMHTuWpk2bAsboZJvNljP6+eWXX6ZTp05nbLNCUSWQkrDN/2Be+Qt1vvgGy75DSLMZz7VX4XzqIdw3Vk0R8Hrhtw1Wlqyws3SlncNHLVgsklaXZ7LvgMTrFQBkZoW2llAtBeHKzFbsCttJlsid3TRMWrkys2yWmgz1egjTpk1T01Uoqg9SErZpm1ET+HZFrgi0b43z6Ydx39gJf82qJwJuD/y81saSFXaWrbKTlGzGbpN0au9m0DNpxHd08+a7Ufy5yZrnvFDWEs46QRgdPYxtYadeDyGTTLzkXYLOi5e/w7ZwX607izyveVYLhqZW7HoICkW1QErCNv5tjBP4diWW/YeQFjOe9m1w9nuE8PvuIFH6KtrKUuN0CVb9ZGPJCgcrf7DhSjcRFemnayc3N3Zxc317D+HhMif8HxutZGaJPHFkZgnW/2XNH3WZcNYJQkmwYqW2rw4nzMeRQiKkoLavNlbK/iaX9XoIQM4Ed61bt2bw4MHExMSUsdUKRQUgJWF/bc0VgQOHDRG47mrSnn0U9w2dkDWMvB5eqwYkJFSwwSUjMcnE8tWGCPz0qw1PpiC2lo9bb87gpng37dp4sBbx6ln+ZW4aY2NjSQhxms86QShJCR7guOkYneq2xYMHGza+SVhKbX/Zz1FSlushACxYsICGDRvi8XgYPnw4r7zyCpMmTSpLkxWK8kNKwv7ckisCB48YItDhGtIG9MHdrWOOCFQlDh0xsWyVg8Xf2Vn3hxW/X9CooZee97i4Kd5Nq/9kYjZXtJUFOesEoaTU8dflTtfdzIuYwx2uu0MiBmW9HgKQs46yzWbjoYceOi2hUSgqFCkJ27DFGDH87Uosh44iwyxGTeC5x4yawDnRFW1lqdm1x8zSFQ6WrLDz1xajyH/R+Vk885iT7vEZtLzYixDFRFLBVFtBAHja2Z+dYTvo5+xf5nGHYj2E9PR0vF4v0dHRSCn5+uuvadmyZVmbrlCUPX4/YX9sNmoCi1diOXzMEIEO15D2/BNGTaCKiYCUsGVbGEtWGN1Dd/xrdA+94tJMhgxI5cb4DM5rWrXaOaq1INTx1+XTk1+WebyhWg/hxIkT9OnTB7/fj8/n44ILLmDMmDFlbr9CUSb4/Vj/2GSsLLZ4FeYjx5DWMEMEBj1liEBMVEVbWSp8Plj/l5XF3xndQw8etmA2S665KpOedydzQxc3DeqFbmqJUFOtBSFUhGo9hCZNmrB8+ZmPk1AoQobfj3X9xkAX0VWYjx43RKBjW1IH98XdrQMyumqJQGYm/LIu0D10tZ2Ek2ZsVkmHdh6eeyqNrp081KxRdUUgGCUICoXizPD7sf6+0VhecvEqzEdPIG1W3B3b4h7SD3fX66qcCLhcgtU/21i60s6KH+ykOU1ERvjp0sFN965G99DICFl8RFUMJQgKhaL0+HwBEViBY/FKzMcSDBHo1Bb3y/G4u3ZARkVWtJWlIilZ8N33hivoh1/suD2CmjV89Lghg+7xbtpf48EWmu7/lYaQCIKmaTOAHsBxXdcvKeS4ACYCNwHpQC9d1zeEwhaFQlFG+HxYf/srp2HYfPykIQLXt8PdIx53/HVVTgSOHjexdKWdJSsc/Pq7FZ9P0KCel/vvctE93k3rKzKxVKNic6iSOgt4D5hdxPHuwAWBz9XAlMC3QqGoTPh8WNf9aYjAklWGCNhtuDtfS0aPeDxd2iMjIyraylKxZ5+ZpSvtLF7hYMNGo8h//rlZPPWok+5d3FzWMqvSdw8NFSERBF3Xf9Q0rekpgvwXmK3rugTWapp2jqZp9XVdPxIKexQKRSnw+bCu3RAQgdWYT5zEb7fh6dzeEIH49siI8Iq2ssRICX9vt7BkhYOlK+1s22F0D72sZSYvPptK9y5uLjjPW8FWVg4qqjLUEDgQtH0wsK+AIGia9hjwGICu68TGFlzdKCMjIzRWVmFMJhMmk4nIyMhC79nZisViqVbphTJKs9eL+Gkdpi8XY/p6KeJ4AtJhR3bvjPeOm/HfeD3myAgigcrgFCouzX4/rPtD8NW3Jr5ebGLPPoHJJGl/jeTRB73c0t1Pk0YA9sCn8lMeebuiBKGwClmhTfa6rk8DpmWHKWwuD6/39NT92AkTTz1fgynjksp0KtlQTn/tdrsZMWIEP/30E3a7nVatWvHmm28WsMHv9+P3+3E6nSGf/6QyUR7zvVQ2TjvNXi/WXzfkuoNOJuF32HF3aZ/rDgoPjJJ3ZxifSkJhac7KgjW/2Viy0s6ylXaOJ5ixhknat/XQ91E33a53U6tm7nNe1bLJmeTtBg0alChcRQnCQaBR0HYccLi8jZgwJZJ1f1jLfCrZUE5//dprr2Gz2fj5558RQnDixIkys1tRDfB6sa75I1cEEpPxO+x44q8zRKDztbkiUAXIyBB8/4shAiu+t5OSaiLc4adzBw83xWfQuYOHqMizr3toqKgoQfgGeFrTtM8wGpNTyqr9YNjr0fy9PazYcJmZsGGTFSkFcz6PYMs/YViLOa3FRVmMeql44QjV9Ncul4v58+ezfv16RKDVq3bt2sXao6jmeL3Y1qzHnt0mkJiMP9wRJALtkI6qIwIpqYLl35v4YmENVv1sw+02cU6Mnxs6u+ken8F1bT04qoYXqNIRqm6nnwKdgFhN0w4Cw4EwAF3XpwKLMbqc7sLodlruM7QdPJw71aAEDh4yc24I5h0py+mv9+7dS40aNRg/fjxr1qwhIiKCQYMG0aZNmzK3W1HFycrKKwJJKfjDHbi7dsDdIx7P9W2rlAgcP2Fi2Wo7S76z88tvNrxeQb26gntvT+fGLm6uuap6dQ8NFaHqZXRvMccl0PdUYU6XkpTgj50w0faGukhplLKlFKSkmnn/rYQyX5auLKe/9vl87Nu3j0suuYShQ4eyYcMGevXqxS+//EJUVNUaCaooW0zHThA5fhr2n9bhaXcVjiXfY0pOwR8RjrvrdcY4gU7tqEpF530HzCxZaWfpCjvr/zJq882aeHn8ISf33GGnaaMTlHISYUUxVEtNnTAlEpnvvR+KZenKevrruLg4LBYLt956K2C4omrWrMnu3bu5/PLLy8xuRdXBdOgI5t4vUHf59+DzIwBTQhLubkZNwN2xbZURASlh+y4LS76zs2Slg63/GD7clhdnMbBvGjd1dXPhecYU0rGxtirXKFwVqJaCUB7L0oVi+uuaNWvSrl07fvzxRzp27Mi///5LQkJCTg8kRfXBdPQE5wwcie37Xwt02Tu6aQXYS5fnKgq/H/7cHMbSFcZAsb37LQghaX1FJsNeSKF7vJvGcVVrCumqjJCySrXAy8OHC3ZGSktLq1Quk+3bt9O5c2fOPfdc7HajdFaa6a8vvvhi4uLictKUPf01wL59+xg4cCBJSUlYLBZefPFFOnfuXCCePXv2sGPHDiIjI7n22mvLMHVMaWBvAAAgAElEQVSVm+rQ7dTyzy5ib+uNKTWt0OOHD/1RzhaVjqwsWPuHlaWBgWJHj5uxWCTtr/ZwY7ybG653F+u6rQ6/c37KoNtpseOvq2UNIdSEavprMKbAnj9//umapqjCiKQUot7+gIjZ83FG+NjY3sOVf1gx+cCeWbnnWshww09rbCxZ6WD5ajvJKSYcDj/XX+uhe1c3XTq4iYmuUoXTsxIlCApFZcfrJXzuAqLfmopITSP9gdsZOjKBGU0WUPNYCoNHR9BzZnilE4Y0p2DlD3YWr7Cz+icb6RkmYqL9xHdyc1O8m47tPDgcSgQqE0oQFIpKjPWX34kZPo6wbbvwtG1FyqgX8La4gJ6mI3zEfI7V8zNgchpjh7oYPCqC63+JYcQ5/YjzNaaRtzFxvkY08jWmnq8+ZkK/qnvCSRPLVxtLSv681kZmlqBOrI87bzGmkG7b2kNY8cOEFBWEEgSFohJi3n+I6NETcCxehTeuPonT3sR9U2cQgs1hmxgeMwSv8CKkQArJibqCSRPr8Jm/DgfNv/GN+Sv8ItcPb5EWGvgaEudrRJy3MY18jWjkbUIjXyPifI2p5a+FKN7FXCgHD5tZssJYR+C3DVb8fkGTRl4eecDFjV0yaHV5luoeWkVQgqBQVCJEegaRk2YS+cEcpMlE6gtP4nz8AXDYSRUpjI9+k0/CZ1PTX4sRya/yesxoPHiwEsa8k/Op7a8DQBZZHDEf5oB5HwcsBzho3s8B8wEOWvaz0r6ck+a8jZMOv8OoVQQJhlHLML6jZN5OGzv/tbA4IAKbthq985pfmEX/J5x0j8+g+YXeajuFdFVGCYJCURmQEsfCJUS/Ngnz0eOk33YjqUOewd+gLhLJ144vGRM9iiRTIg+4ejEg7XmiZQw7LTv4NGIud7juzhEDgDDCaOxrQmNfE8gseLl0kc5Bc0AoAoKx37yfg5b9/GZdi9PkzBM+xncOtf7ojnfRrZxY3IGkf41rXfKfVAYPdNKjSxbNmqjuoVUdJQgKRQUTtvFvYoaNw7p+I5mXXkzS1NfJbP0fAHZadjA8ZgjrbL9yeeYVzEicyyVZl+ac+7SzP3vCd9PP2b9U1wyX4VzovYgLvReBJ+8xiSRZJLGXg3z/ZyY/rohl67KLOHa4Fpi9mNv/gOWJUZhv/opd9Y8wUQo+89clLqjNopG3MY18jYnzNqKuv165tF8ozhwlCApFBWE6nkDUG+8T/vk3+GvVIHncUNK1/wOzGZdw8V7kBGZETiNCRvJa8pto6fdiIq8zvo6/Lku8K0nwl02ffE8m/PSrnSUrmrF8dXMSk8zYbZKO17rp/nQS8R3dxJxzEcdMj3DQ0pUDSfs5aD7AAYvxvc76K1+bFyBFbu+hMBlGA1/DPI3ccd7At68xNf01T7v9QlG2KEEgMA/MhA+x/rGJhOWfnnF8oVoP4cCBAzzyyCM5YVNTU3E6nWzduvWMbVaUI5lZRMz4lKh3PkS43bgeu5+0/r2R0VFIJMvtixkdPZwjlsPc5bqHF9KGUMtfq/h4TxOnS7DqJxtLVjhY9aMNp8tEVKSf+I5uuse7ub69h/Dw4O6hJur7G1A/swGtC1n5NpNMDpsPcdBygAPm/cYnIBjLw5aQaE7MEz7CHxHUXtEop4dUdjtGhKxaS3RWZaq1IGQLQcTn/wPpR2RmlUm8oVoPoVGjRnz33Xc54YYNG4bPp/y2VQnbip+IGTEey579uDtfS8rw5/Cd3xSAfea9jIwZyg/2VVyc1ZwJCe9zVWbrkNiRmGRi+WpDBH761YYnUxBby8d/b8qgexc3117twXqaM7lYsdLU14ymvmaFHncKJ4fMBziQU7MwxGK/ZT9rbD+TbkrPE76mryZxPqN20djbhDhfI1qKS4g2x9DA1xArZTflTHXnrBOE6GHjCPt7x6kDZWZhPngY8/GTAIig6Ttq3flYkadltbiQ1FHPF2tDqNZDyJOEzEwWLlzIvHnzirVHUfFYdu0heuQ72Ff9gvfcJpycPRFPl/YAeHAzLXIK70dNIkxaeCVlBA+6HsZSxo/noSMmlq1ysGSFnbXrje6hcQ289LzHRfd4N1f9JxNzObj6I2UkF3mbc5G3eaHtF4mmxECvqP05tYyD5gNsDdvMd/alZIlAwa0umKSJur56AfdTowJuqbr+egXcbIqiOesEoSSE7diNSHOWi9eyLNdDCGb58uXUq1ePSy+9tJCzFJUFkZJG1DvTiZj5GdJhJ2XYAFwP3032akw/2r5nRMzL7LPs5eaMWxiSMox6/vpldv1/95pZ8p0hAn9tMUrSF52fRb8+Tm7qmkHLiytX91CBoJa/FrX8tbg864oCx334OGY+SmrNFLY4N+cRjDW2nzjmOJan/cIqrTT0xRXalbaRtxHnyBqq/SKIs04QSlKCNx1PCLiKvgF/XlfRyfnTTnFm6SnL9RCC+fzzz7nnnnvKwkRFKPD5CP/sa6LeeB9TYjLp9/6XtEFP4a9ttAUcMR3mtZgRLHF8SzPvucxO+JRrMzuU+jLHTpi4+1ELk8aaqFPbj5Sw9R8LiwMisONfQ3iuuDSTl/qncmN8Buc3q7puRjNmGvgacpm8nIszWhQ47sHDYfPB3K60AZfUAfMBNlv/ItmUnCd8pD8yZ7Be40Ajd7ZgxPkaES7DyytplYKzThBKgr9OLKljBuPs37tIYSgLyno9hGyOHj3Kr7/+ysSJE8vSXEUZYV33J9HD3sK6ZTue1peTOncSWZc1B4wBYx9HfMTEqLfxCT/PpQ6it/MJbJzedNUTpkSyZp3gpdExNGroY8kKOwcPWzCZJNdclUnPu5O5oYubBvXKduGnyooNG81859HMd16hx9NEWp6xF9mD9fZZ9vCz7QfcJnee8LV8sYUO1mvka0J9XwPCOLvm4aiWgpBNfmGwrt9YZnGHYj2EbHRdp0uXLtSsWbOszFWUAaZDR4l+7V3Cv16Gr35dkia/RsZ/byDbJ/O7dR3DYoawI+wfrnd3YXjKqzTyNT7t6235x8In8yPwS8HSlQ6sYZIO7Tw891QaXTt5qFmjeohAaYiSUTT3tqS5t2WBYxLJSVNCoFdUXsHYZP2LpeZv8QpvTniTNFHf16DQrrSNvI2o7a9T5dov1HoIISCU6yEAtG/fntGjR3P99dcXGY9aD6EcyXATOXU2ke/NQgDOJx7E2bcXMtyo2SWYEngj+lUWhH9BA29DhqWOJt7d7bR910eOmXj/o0hmfRqB3w8gMJsld9+azlujUsosWZWZividvXg5Zj5aoCtt9vdx87E84W3STkNvQ+J8AXdUvlpGjDyn2Gv2iO3GNmvBbuXNM1uyKGF5iW1X6yFUIKFcDwHg559/Pq24FWWMlNgXrSB69AQsh46S0SOe1FeexdeoAWA0gH4e/glvRY8lQ6TzZNrTPOV89rT90ocOm5n8USSffhmOz5/9dBt/fT7BgkXhvNAvrczXBVcYWLDQ0BdHQ18c19CuwHE3GRyyHAo0chu1jOz//7JuINWUV6yj/NF53VHZkw76mhDnjcOOgyszW7ErbCdZInf+kTBp5crMq0KURoVCUWosW3cQM3wctl//IKv5BSRMGElmu9yHdHPYJobFvMQm619c42nHyJQxnO+94LSudeCQmUnTI9EXGkKi3ZZORoZg0TIHvqB3fyjWBVeUHDsOzvOez3ne8ws9nipSclxQwV1qd1t28YN9NR6Rt/2itq8OdX318OHNs9+MqdRTlZQUJQgKRSkwJSYR9cYUwuctxB8TRfLrL5F+361gMR6lFJGcMyNprL827yS9x/9l3Hpa7qF9Bwwh+OLrcEwC7rsznb6POmnYwEe3O2JDvi64omyJljG09MbQ0ntJgWMSyQnT8QJjLw5Y9uOQDly4QBi1g/wTGZYlShDOUqpY21DlJyuLiI+/IGr8NIQzHdfDGmkDHkPWiAGMB/orx5e8Hj2aJFMiPV2PMCDteaJkdKkvtXufmUnTovjyfw4sZuh5t4unHnVSv25udWD5l7n+8+q4vvDZhkBQx1+XOv66tMrKOzr9uOkYneq2xYMnpLUDOEsEQQiBx+MpdW+esxW3201aWuELsCtKj+2HX4ke/jZhO/fgue5qUkYOxHtRbrfGHZbtDI8Zwm+2tVyReSUzT35SaCmwOHbtMfPutCgWLnJgtUoeud/Fk484qavaBKo1dfx1udN1d6HTnJc1Z4UgREREsGfPHqSURfb3N5lM+P1n/4MlpSQtLY3du3djs9mUSJ4B5j0HiB71Do7lP+Bt0pDEGW/j7tYxpxupS7iYFPUOMyOmEykjGZP8Fnel31PqroY7dlmY+EEkXy9xYLdLHnvIxRO9nNSOPfvzq6JknO4056XlrBAEIQQNGjRgyZIleDwezIVMyOJwOMjIyKgA6yoGn89HREQEl1xS+pJqdUc4XUS++xGR0+chLRZSX3oaZ5/7wWb45yWSZfbFjI4ZzlHzETTXvbyQNoSa/tKNC9m2w8LED6JYtMyOwy556hEnj/dyUaumEgJFXsp6mvOiCJkgaJp2IzARMAMf6ro+Nt/xxsDHwDmBMIN1XV98utez2+3cdNNN7N+/H7fbXeB4TEwMKSnVo482GPfjiiuuwOl0Fh9YYeD345j/LdGvT8J8/CTpd95M6kv98NernRNkr3kPI2OG8qN9Nc2zWvBu4pQCPt/i2LLNwsSpUSxe4SAywk+/Pk769HSpgWSKCickgqBpmhmYDHQFDgK/a5r2ja7rfwcFewXQdV2fomlaC2Ax0PRMrmuz2bjggsK79lXHhje73a4EoYSEbdhMzLC3sP65lcwrWpL40dtkXZk7caAHNx9Evs+UqPewyjBeSRnJg65epZqRdNPWMCZMjWTZKgfRUX4GPJnGow84qXGO6gCgqByEqobQBtil6/puAE3TPgP+CwQLggSyu2DEAAWHICsUIcZ09ATRYyYR/uW3+OrUImnCSDLuuAmC2qJ+sK1mRMwr7LfspUf6fxmSOoy6/nolvsafm8J4Z2oUK3+wc060n+efTuWR+13ERCshUFQuQiUIDYEDQdsHocDSSiOA5Zqm9QMigPjCItI07THgMTDm8ImNjT0tgywWy2mfW1VRaT4Fbjemdz/CPPY9yPLie+EpfC/2JSIqkuz1uQ5ygJcsL/CNaSEXyAv5OmsJnSydoYRNBWt/F7w23szyVSZq1pCMesnLk4/6iY6yA/bTTWIB1O9cPSiPNIdKEAobhZO/OHQvMEvX9bc1TWsLzNE07RJd1/M4UnVdnwZkz0ktT9ftUx1dRirNhSAl9mXfEz3qHSz7DpFxQ0dShw7A16wReNzgcZNFFrMiPuLdqLfx42dg6os86nwcGzYSKP5+/vaHlfFTovjpVys1a/gYMiCVh+51ERkhyfRAgqfYKEqF+p2rB2eS5sBcRsUSqqn4DgKNgrbjKOgSehTQAXRd/xWjyFS9JF9Rrli2/0ute/tS89HnkTYbJz+dTNKM8YYYBPjNupb/q30DY2NG0zbzWpad+J6nnM+UaHrqNb9ZuevhWtzWM5ZtOywMfSGFdcuP07e3k8gI5R5SnB6OBQuo06YNYXY7ddq0wbFgQciuFaoawu/ABZqmNQMOAfcA9+ULsx/oAszSNK05hiCcCJE9imqMSEohavwHRHw8HxkZTsqo53H1vBPCcueyTzCdYGz0qywMn09DbxwfnJxJvKfbKWI1kBJ+XmdlwpQo1q63USfWx4gXU3jgrnQcDiUCijPDsWABMYMGYQp0mbccOkTMoEEAZNx+e5lfLySCoOu6V9O0p4FlGF1KZ+i6vlXTtFHAel3XvwEGAtM1TRuA4U7qpeu6eoIUZYfXS/gnC4l6awqmlDTSH7idtBeewF8zd40JHz4+C5/LuOg3AjOS9qOv81kc0nGKiA0h+HGNjXemRPL7nzbq1fXx6pBk7rkjHUfZNQ+cEseCBUSNHYv58GHqNGhA2uDBIXlJKApBSmM2Qa8X4fOB15vn/yL35TueE87nQ+QLj89H9OjROWKQjSkjg6ixY0PyW58V6yGUBOVzrB5kp9m6Zj0xw8YRtm0nnratSBn5PN6WF+YJuynsL4bFDGGzdSNtPdcyMmVMkTNVZiMlrPrJxjtTovhzk5UG9bw83cfJ3belYy/HQeH5S44AfoeDlDffDL0oSGm82Hy+0r0M878Ai3kZ5t8XfCzCZiM9NTU3XFD4Avuyzw++bv59weHyHcuzL1sEvN7i71MofwIhOFLMNPnBqPUQFNWTvQeo8dxwHN+uxBtXn8QP3sB9cxeCV5JPEcm8Hf0G88LnEOuvzYTE9+nhvuWUM5JKCd99b2PClCg2brUS18DLG8OT0W5Nx1oBE4xGjR1baMkx5sUXsf3wQ9Evw/wvwNK8DIO/KwGRFgtYLEizOc83ZjMy+Dv/vuxwNhv+iIg8x7BY8p6bL25pNhfcV1i4/PtOFV9R6bBYqHXLLViOHi2Qdl8JG4lLixIExVmBSM8g8r2ZhE2di0VA6vNP4HziQYL9NxLJQsd8Xo8eTbIpiYdcj9C/mBlJ/X5YtsrOhKmRbNlmpUkjL2+PTuKO/8sIboIoVyzbtmEuYgEmkZ6Odd26ol+G2d8OR8EXXyHhSvTiM5mKD1fUi6+UL8js/2Pr1KkWtd+0l18utCaYNnhwSK6nBEFRtZESx1dLiX71XcxHj+O7+xZODHwcf8O8A8e2W/5heMwQfret44rMVnx8ch4tTjEjqd8Pi7+zM2FqFNt2hNG0sZd3XkvitpsrSAjcbhzffkv4nDnYfv+9QB/ubHwNG3J87dpyNU0ROrLdf9ltRb4QtxUpQVBUWcI2bSN62FvYft9I5qUXkzRlDNE3dcUfVHJ0CRfvRo1nZsR0omQUryeP4870u4uckdTng0XL7UycGsX2XWGc1yyLSW8kccuNGdlr4JQr5r17iZg7F8dnn2FOSsLbrBkpw4bhDw8nZuTIcis5KiqOjNtvJ+P228ulTVAJgqLKYTpxkqg3JhP+2Tf4a9UgedxQ0rX/M9wPASSSpfZveTVmOEfNR9Fc9/FC2ktFzkjq9cI3Sx1M/CCSXbvDuPC8LN5/K5EeN7iDoy0fvF7sK1YQPmcO9u+/R5rNuG+4AdeDD5LZvn3utBoREeVWclRUD5QgKKoOmVlEzPiMqAnTERluXI/dT1r/3sjoqDzB9pr3MCLmFX6yf0+LrJZMSvyAK7MKX5Tc64WF3zqY+EEUe/ZZuPiCLKaOT+Tmrm6KWFojZJiOHiX800+J+OQTzEeO4KtXj9Tnnyf93nvx1ys4d1J5lhwV1QMlCIoqgW3lz8SMGI9l9z7cna8lZfhz+M5vmieMmwxeN49mfJ23sMowhqaM4gHXQ4XOSJqVBV/+z8G706LYd8BCi4uymD4hkRu7lLMQ+P1Yf/6ZiDlzsC9bhvD5cHfqRMprr+Hu0oUK8VMpqi2lzm2apgk1gExRXph37SVm5Hjsq37Be24TTs6eiKdL+wLhvretYmTMK+w37+P/0m9lSOow6vjrFgiXmQlffB3OpOmRHDhk4bKWmcycdJKu13uCe6aGHJGYSPgXXxAxZw6WPXvw1aiB67HHcD3wAL6mTcvPEIUiiNMpfowHBpS1IQpFMCI1jah3phMx4zOkw07K0P64HrkHrHm7+Bw2HeLVmBEscyzm3Kzz+CZrKS2TLy0QnycTPl8YznvTIzl0xMIVl2by6ssn6dKhHIVASsI2bCBi9mwc//sfwuPB07o1ac89R8ZNN4G9nIY4KxRFUGpB0HVdiYEidPh8hH/+DVFjJ2NKTCb9nltIe7Ev/tq18gTLIouZER8yKWo8fvw8nzqYR52P0yC2QZ4ZSd0e+HR+OO99FMXRY2Za/SeTN0ecpOO15ScEwuXCsWABEXPmELZ1K/6ICNLvvhvXgw/ibdGifIxQKEpAsYKgadpHuq4/GvhfANN1Xe8dcssU1Q7rb38SPWwc1s3/4Gl9OalzJ5F1WfMC4dZZf2V4zBB2hu0gPuMGhqaOJM7XKE+YDDd88kUE738UybETZtpc6eGd15K47prMchMCyz//GLWBL7/E5HSS1aIFyWPHknHbbcjIyPIxQqEoBSWpIZyb/Y+u61LTtPNCaI+iGmI6dJToMe8S/tUyfPXqkDT5NTL+ewP539wJphO8Hj2ar8K/JM7biGknZ9Il34ykLhdMnRXB1BmRnDhppm1rD++9mUTb1uUkBB5P7gCy335D2mxk9OiBq2dPslq1KpAmhaIyURJBSNA0rTewBmgLnAytSYpqQ4abyKlziJw8C+H3k9a/N86+vZDheWca9eFjXvgc3o5+A7fI4Km0Z3jK+UyeGUldLsHHn0Uw7eMwTpy00v4aD1PHJ3HNVZnlkhTzvn2Ez51L+GefYU5MxNu0KSlDh5KuaciaJVxiTaGoYEoiCA9hLGHZF9gO9AypRYqzHymxf7uS6NETsBw8QsbNXUgd2h9fo4ITdm0M+5NhMUPYYt1EO097Ria/xrm+3BlJ05yCWZ9G8MGsCJKSzXS93k/fR07S+spyEAKvF/vKlYTPnp07gKxbN9J79sQTPIBMoagilEQQPMBRwAdMAa7AWABHoSg1lr93EjPsLWy//kFW8/NJ0KeSeW3rAuGSRRJvR7/Bp+Fzqe2vw8TE97k5aEbSlFTBjE8i+HB2JMmpJjp3cDPgyUS6dY4hISG0YmA6dozwefPyDiAbONAYQFa/fkivrVCEkpIIwlzgB+BeXdcnaZr2OhAfWrMUZxumxCSi3pxC+CcLkdFRJI8ZTPr9txUYeCWRfOnQeSP6VVJMKfRyPcqzac8TJY3RyMkpgo/mRvLhnAhS00x0uz6D/k84ufySrNAmQEpjANns2diXL0d4vbg7diRl9GjcXbuqAWSKs4KS5OLauq5P1TRNC7k1irOPrCwiZs8n6u0PEM50XL3uIu25x5E1YgoE3W7ZxrCYIay3/caVmVcx6uQYmntbApCYLPhwdiQzPokgzWmie3wG/Z9I45LmoV2oRCQl5Q4g273bGEDWu7cxgKxZs5BeW6Eob0oiCMc1TbsbcGiadhtwJMQ2Kc4SbD+uJXr424Tt2I3nuqtJGTkQ70UFO6k5hZN3o95mVsRHRPujGZv0NndkaJgwcTLRxLSPI5g5L4L0DMHN3dw8+3gaLS4KoRBISdiff+YOIHO7ybzqKpL69yfj5pvVADLFWUtJBOERoDewAYgD+oTUIkWVx7z3ANGj3sGx7Ae8TRqSOONt3N06FuhyKZEssS/i1ZgRHDMf5W7X/byQOpgasiYnEkx8MCuSjz8LJ8MtuOXGDJ59wslF54dOCITLheOrrwifPRvrli3GALK77jIGkLVsGbLrKhSVhZIIwsW6rr+naVodoBfQFPgnlEYpqibC6SJy0gwip32CtFhIfelpnL3vo7DFhveYdzMy5hV+sv9Ay8xLmJw4jSuyWnHshIkRMyKZo4eTmSm49aYMnn3cyfnnhk4ILNu35w4gS0sjq3lzkl9/nYzbb1cDyBTVipIIwttAF2AURuPyTIzxCAqFgd+P48vFRL8+CfOxBNLvvJnUl/rhr1e7QFA3GUyJeo9pke9jlTaGpYzmfldPThyzMmxGJJ98EUGWF27vkUG/x9I4r2mI1u/1eHAsWUL47NnY1q0zBpDdfLMxgOyqq9QAMkW1pCSCEK5pmg2w6br+qaZpT4TaKEXVIWzDZmKGjcP65xYyr2hJ4vRxZLUqOLkcwGrbSkbGvMIBy37+m347g1NfIetQfYZ9GMWnX4bjl3DnLek83dtJsyahEQLz/v25A8hOnswZQJahafjVADJFNaek3U6/BoZrmmYH9oTWJEVVwHTsBNGvv0f4F4vw1alF0jsjyLjz5kIHYx02H2J09HCWO5ZwXtb5zE3QidvTgbc/jOTzBeEAaLem83QfJ43jQiAEPh+2lSuJmDMH2+rVYDIZA8gefBDPddepAWQKRYBiBUHX9cnA5KBdvUJmjaLy48kkcvonRL47A5GVRVrfh3A+8ygyMqJA0EwymRk5nUmR7wAwKHUIXbY9ydTpNfji63BMAu69I52+vZ3ENSh7ITAdP074vHmEf/IJlsOH8dWrh3PAAFz33ou/QcFR0QpFdUeNplGUDCmxL/+B6FHvYNl7kIxuHUkdNgBfs0aFBl9rXcPwmCHsCttJ14wbeWjTWPSpF/Da/xxYzPCg5uKpR500qOcvcztzBpAtW2YMIOvQgdSRI40BZGFhxcehUFRTihQETdNq6LqeVJ7GKConlh27iR4+DvuP68i6oBkn572Hp2Ph/QpOmI7zevRovg5fQJy3EaN+X8C6925AW+TAGgYP3+fiyUec1KtTtkIgkpMJ/+ILwubNI3bHDvznnIPr0UeNAWTnnlt8BAqF4pQ1hNc1TasB7ASWA2t0XS9x3z9N024EJgJm4ENd18cWEkYDRgAS2Kjr+n2lsF0RYkRyKlHjPyBi1hfIyHBSRj2Pq+edhZayffj4JHw2b0e/QabwcM8fr5P4Tl8GL4nEapX06eniiV5O6tQuQyGQkrC//jK6jH7zDcLtxn/NNSRPnGgMIHM4io9DoVDkIKQ89fLImqZdAHQF2gECWAcs0HX94CnOMQM7AucdxJgM715d1//OF68OdNZ1PUnTtDq6rh8vxl55+PDh4lNVCLGxsSQkJBQf8CzitNPs8xH+yUKi3nwfU0oa6fffRtoLT+KvVaPQ4H+FbWBYzBC2Wjdz+V8PEfHmW6xeGovDLul1r4vHe7mIrVV2QiDS03MHkG3ejD88nIzbb8fVsyfndOyofudqgEpz6WhgtJkV25e6JI3KOzFqCe8HXvRXA/UxXvRF0QbYpev6bgBN0z4D/gv8HRSmDzA52y1VAjFQlAPWNeuJGTaOsG078bRtRcqIgXgvuajQsMkiibeiX+fz8Hmcs+l6Wozdxbql5xEZ4efpPk4e6+miZo2yEwLLjh2Ez55N+LGppYoAAB6tSURBVPz5uQPIxowxBpBFRZXZdRSK6kqpGpV1XfdhLJRTHA2BA0HbBzGEJJgLATRN+wXDrTRC1/WlpbFHUXaYDx4xppv4diXehvVInDoWd4/4Qgdo+fGzwPEFb0S/SuLmc6n/+gb2LP8Pzkg//Z9Io/eDTmqcc+qaZ4nxeLAvXUrE7NnY1q5FWq1k9OhBes+eZKoBZApFmRKqXkaFPaX53xAW4AKgE8YcST9pmnaJruvJwYE0TXsMY4EedF0nNjb2tAyyWCynfW5VpURpdqVjHjcF0/gPQAi8w57D/9zjRDrsFDZpwxaxmYHmZ1jzZxaRb32Ne1l7kmIkwwZ56dvbzzkxNqDgVBWlZu9ezB9+iGnWLMSJE8hmzfCOGYO/Z08stWsTXcRp6neuHqg0h+gaxQXQNO1GXdeXapp2PjAA+FzX9R+LOe0gENwfMQ7I7/w/CKzVdT0L2KNp2nYMgciz+I6u69OAaYFNebo+NOVzzIeUOL5eRvSr72I+coz0W28gdcgz+BvWA5fT+AThFE4mRo1jxta/kW+NJHNlPP4YH4OeSeXh+1xER0m8WXBGt9jnw7ZqlVEbWL0ahMDdtauxAlmHDrkDyE5xEfU7Vw9UmktHgxKOuylJDeF5YCkwBJgOvAsUXOIqL78DF2ia1gw4BNwD5O9B9BVwLzBL07RYDBfS7hJZrTgjwjZvI3roW9h+30jmJReR9P5rZLa5otCwEsli+/8Yum0JJ8b1xf99N2rU8DJwQCoP3esiMuLMXUOm48cJ//RTYwDZoUP46tbF2b8/rvvuUwPIFIpypCSCEKVpWmPAp+v6r5qmuYo7Qdd1r6ZpTwPLMNoHZui6vlXTtFHAel3Xvwkc66Zp2t8Yy3O+oOv6ydNPiqI4TAmJRL0xmf9v787DoyjSB45/58g9k3Aph4Co8POCVVFZF5VDEXVVVMQSUBFQ0VVcBFE5Re5bAohA5MYDCkRhvRAVxF1YBRZFERFUUAhHSEIySSaZzEz//uhBEhIgBDrBmffzPHnIzFR3v2WQN11d9Vb828sJVqvC4fGDyHugHTgcpbb/1fEzvbYuYXPyPQS/fJIq1Qt4pm8WD6s8Ek43ERgG0evWkbBwIbEffYTN76fghhvIHjKE/LZtZQGZEJWgLNNO7wHaAyOA3zAf/vargNhKI9NOT0GNGjXI2LoN1yspxK76EnteHjZvPrndO+J59nGMpNJn5ngNL/2/+5hlU5sQWNcC9zk59O5WSBeVT1zc6SUCW1YW8UuWEL9wIVE7dxKsUoU8pcwFZBeV3DznVEXqz1n6HP7OimmnwAVAP631kX+JKysZiFNgP5CGY+gkas5dDIWF2ID8m64na0gfAg0blHqMYcCUDVuZMr0K+V8/TVytdJ4ZsIce99mJO81NwqK++Yb4hQuJe+897Pn5+Jo2JXPSJLx33SULyIQ4S5QlIfwCjFNKJQH/ApZqrTOsDUuUl/1AGq7kWSQsWmEmgiJ3gBkLp5R6jGHA0nUehs4MkrnpFpx19vH4y/+jX7taxMaUvxKoLS+PuOXLzQVkW7aYC8juu4/cLl3wN25c7vMKIaxx0iGjI5RS1YAZwG2Y4/+Ttdb/tjC20siQ0UlUv6c70Ru+LfXeMHXvpmKvDQM+WuPgpZRC9m2pi73ub9z+1GYm3n4F7ujocsfg/Okn4hcuNBeQZWdTeMkl5D78MN777rN8AVmk/JyLkj5HhrNiyEgpdTvmLKGqmHcIPUInXg60KFd04swzDGJXrMK5aw82wHDYweHA5iss0TQYhE9WxzJiho1ff6iK7fxfuGJCMtPb3sD5jpNNIDsOn4/Yjz4y9xxYv95cQHbHHeYCsmuvlQVkQvwJlGXIqAkwQGu9t+ibSqnHrQlJnCrnjztJGjSOmPWb8DW+mIzXxxM4/zzOmfEG9vkagkFsvkKCQfhwVSwTZsaxY3sctgt3UHPKCMbeciW3BFW5ru34/fejO5AdOoS/fn2yBw4k74EHCFavfoZ7KoSwUlkSwmJgsFLKBTwCdNNaz9Jab7c2NHEytmwP7ldeJ2HOIgx3AodH9yfvwXvB4eBAmp37d49h+vtdOH9+CvlrvuPme2vw085oHA1/InbGKJ5uW4OnvE8RGzzFh7qBADGrV5sLyD7/3FxA1qaNuYCsZUvZgUyIP6myJITZQE/gNa11QCnVCZhlbVjihIJB4pZ+QOKoqdgPZZD3YHs8L/6DYLWj1UiTp7tY95WNpyY0ZP/BiezwRBFb+2eiXh9Ey9sP8HLOMC7wnto+Afa0tKMLyPbsIXDuueT06mUuIDvvvDPdSyFEBStLQnBorX80ty4AQH79q0RR320jaeA4ojdtwXdVYzIWTKbwL5f+8fmdNdqydb8H37IdYNj48r+x0GgbUXOGUPOO/zAo5yVuz7oT28mfL5kMg+j1648uICssNBeQDR5M/q23ygIyIcJIWRLC50qpGUAdpdRkYJXFMYlS2DIOkzjuNeLfWEawelUyXxmC9/47iw3PZB62YZ8/BN+Uv0NhaJaQsxD7jZ9z+R1bWZy+GpdRWsm6Uq6XlUX80qXmArIdO8wdyLp2NReQNWxoRReFEJWsLPshDFdKNQY+A7ZjrlYWFSUQIP6t90gcMw2bJ8dcZfzcE8VWGf+2x8HrCxJ4e1k8Xu/9YC+yYb0/iuBb3ZjQtSWu6idPBlHffkv8ggVHF5BddRWZr7yCt107WUAmRJg70Z7KS4AHtdY+rfX3wPdKqYsxt9NsVlEBRrKojVtIGjSW6O9+NDerGf48/ksb/fH55i1RzJjn4sNVsTjs0ObOg3wT2M6+lc3Ad7Q+kT0YxcLXGjBqcHap17F5vcQuX07CggVEf/stwbg4WUAmRAQ60R3CIuBDpVQHrfVhpdStwHCgS8WEFrnsaekkjppKvP4XgVrnkPHaKPLbtQWbjWAQPv0ihhlzXXy1KYZEd5Du3dPx9RjHuw2TyWuxAXzF60wEfVFs/KbkAkTnjh3mArIlS8wFZBdfzOGRI80dyBKPt+OAECJcHTchaK3fUUrtAz5QSq3E3FO57bEb2IgzyO8nYZ7GPWEGtvwCPE8/Qk6vxzAS4skvgHdWxDNzfgI//xrFebX9DHohA1uXFFJqj8Vjy6ZDXkd6LwlybjCVlxL783bCG3TKfZhh2aOOXsPnO7oD2fr1GFFRRxeQNWsmC8iEiGAnGjIajrnL2V6gDzAd6KOUQmv9UgXFFzGi120kafA4on78mfyW15E17HkCDRuQkWln/oJ45r2dwKF0B00u8/HquAxi2i1nQvVh7Hbu4ob8FvTPHswl/ssAiFu2jJfnfMLuSUGGPruSuEevwdes2dEFZGlp5gKyAQPMBWQRtvOUEKJ0Jxoy+jT052fAtAqIJSLZUw+QOGIy8ctX4q9bm4zZE8i/tRW7fneSMtzF4vfiyM+3c1OLfJ7smon7+o2MThrK1zH/pWFhI2anL6RlQes/ppHGLVtG0gsvUNXrZVUrgP0YvXqZ9SrsdgpuvpnDXbpQ0KqVLCATQhRzoiGjLyoykIhT4MM16y1cybOwBYN4+vTA89QjbPrJzYzeLj76NBanA9rf5eWJrjm4/283ExPH8l78O1QLVGf44dGovM44j/kRuseMwe71FnvPFgwSdLtJ++wzArKATAhxHGVZhyDOsJg160gaPAHnL7vx3taKzEF9+PiXi5jxeAIbNseQlBjk6cdy6P5gLgnnZpPieo1ZrpkYGDzp6cmTOT1xG6VUDTUMHHv3lnwfsOXkSDIQQpyQJIQK5PhtL4lDXyHu4zX4L6hP6pypLDjchpSnXfy620m98/wM659Fx3vziE3wsyR+EZPc4znkSOOuvHt43tOf8wJ1Sz139NdfkzhixHHXHwdkb2IhxElIQqgI3nxc0xfgnjYPw2Zjf69nmBTbg1nDq5KR6eCKy31Mn5jB39vk43TClzFfMCpxGD9F/cjVBdcyM2MOVxY2LfXUzh07cI8eTdzKlQRq1SK3UydzUVmRYaNgXByefrLRnRDixCQhWMkwiF25hsSXX8H5eyoH29zGyJoDSVl0EfkFNtq0zOfJbplcd40Pmw1+cm5ndOJw1saupr7/fF7NmMlt+XeUWnfIfuAA7okTiX/7bYz4eLJffJHcxx/HiIvD17w57jFjcKSmEqhTB0+/fnjbt6+E/wBCiD8TSQgWcfy8m6QhE4hdvY7s+g0Z0eItJmxqRZQTOrTLo8cjuTS6yA/AIXsaye4JLI5/iwTDRf+swTyc240YYkqc1+bx4Jo+nYSUFGx+P7ndupHTq1exvQe87dvjbd8+IneVEkKUnySEM8yWm4drymxcM9+g0BnLhIZD6J/2KK7dDv7ZI4eunXI595wgAPl4meuaxQzXq+Tb8nk4txvPeJ6lqlGt5Il9PuLffBP3pEk40tPJu/tuPC+8QKBBg4rtoBAibElCOFMMg9gVn5A4NBnngYMsqXo/PQODiY2tyssDc3jgXi/x8Wb5iCBB3o9bznj3aFKde2njvZV+2QO5IHBR6ed9/30Sx4zBuWsXBc2bkzFoEIVXXFHBHRRChDtJCGeA88edxL84DtfGTXwT3YR/uGZT0PgvDO+Ww+1tDuI4WmeOjdFfMzJxKFuiv+FyX2PGH0rmOl/zUs8bvX49iSNHEr15M4WXXEL6woUUtG4t5SWEEJaQhHAabFkeAkNSqL50MYdJpG/8WH67tT0vdsvn2qaHiv27vduxi3GJI/k47kNqBWoxPjOZe7z3YS9lvyHn9u0kjhpF7KefEqhd2yw/3aEDxTKLEEKcYZIQysEIBNk/8WMunJFMUkEGs+Ie5n8dnqHzE1E0vKB4ieks22GmuSezIGEuUUYUz2b35bHcJ4kzSu4tYE9NNWcOaY3hcpE9YAA53bvLPgRCiAohCeEU+P2wYfbPNJo0mqs9m/k65mq+7DaLW3o34M7qQeDoxjQ+fLyZsIBX3ZPIsmVxf15Henue59xgzRLntWVn45o2DdesWRAMkvvYY3ieeQajWikPl4UQwiKWJQSl1G3AZMABzNJajzlOuw7AEuBarfVGq+I5HXl5Npa/UUDNKdPonPkmGc7qvN9xNJcOu4VOCTYg+EdbA4NVsSsZmziCXc5fub7gRvpnDeZS/+UlT1xQQMLChbiSk3FkZpLXvr05c6hevYrrnBBChFiSEJRSDswKqbcAe4ANSqkVWusfjmnnBv4JfGVFHKfrYJqdeW/EYp/9HgMPjyPJyOaHtg9R5ZVHaVq1ZC2h76K2MDpxGF/FrKdhYSNmpS+gVcFNJReWBYPErViBe+xYnL/9RsGNN5IxcCCFTZpUUM+EEKIkq+4QmgE7tda/ACilFgF3Az8c0244MA7oa1Ec5fLTTicpCxLY884PJHsG0DTwPWlNriUjuS/VLim5wfw+eyoTEsf8UYl02OFRPJD3YIlKpADR//63OXNoyxYKL7uM9LfeoqBly4rolhBCnJBVCeE84Pcir/cAfy3aQCl1FVBPa/2+Uuq4CUEp1QPoAaC1pkY5N3NxOp0nPNYwYO06G5Nec/C/Tw4x3jeAh/M1hTVr4Z/4Kkkd7iwx3TOHHJIdE3jVnkyQIL0Dz9M78DxJ8UkQX/z8tu++wzFgAPZPPsGoXx//3LkYHTvittsppW7pGXGyPocj6XNkkD5bdA2LzlvaRPk/NvVVStmBSUDXk51Ia50CpBw5R3lLMRyvjIPfDx+simXmXBdbv4cXnK/zTsEEYgL5eHp2I+ef3TES4iE9/Y9jAgRYGr+YSe7xpDkOclfePfT19KNuoB6FFHKIo9dx7N2Le/x44pYuxUhKImvwYHK7doXYWMjIKFdfTrfP4Uz6HBmkz6emThmrHVuVEPYARZ+M1gVSi7x2A42BNUopgFrACqVUu4p6sJyTa2PRsnheX5DAnlQnnaqvZVXcQM7Zv5P81s1JG9qXwEXnlzju3zFrGZU4jO1R22jqu4YZGbNLrURqO3zYnDk0ezYAuU8+iadnT4wqVSzvmxBClIdVCWED0EgpdQHmnswdgc5HPtRaZwF/3PsopdYAfa1KBgfS7DzwqJOpY+wEDZj7ZgILdQJZ2XbuvGwXX7iHceFXH+GvV4eMORPJb9uyxPDQDudPjE4czhexn1PPX//4lUjz80mYNw/31KnYsrLwduiA5/nnZXMaIcRZz5KEoLX2K6V6Aisxp53O0VpvVUoNAzZqrVdYcd3jSZ7u4j9f2ejQtTq/7XESCMJdrbMY7Z7BxYtnYjMMsvs+Qc6TXSAuttixh+yHmByqRBpvJBy/EmkwSNy775ozh/buJb91a7L798d/eSnTTYUQ4ixkMwzj5K3OHkZqaurJWxVxIM3OX9vUpNBvo1ZwP/NqT6CF5wuiYuw4f/0N7+2tyR7Sh0C94mNsBeQz1zWL6a6p5Nvy6ZzbhWdyelMtWHKxWMzatSSOGEHU1q34mjQhe+BAfDfeeFodPRNknDUySJ8jwxl4hnDSImhhv1I5ebqLWsED9MtLprvvbaKzC7Fj4L/wfNLfnEpBq+KF5QwM/hW3nPHuUaFKpG15MXsgFwZKTjd1fv89iSNHErt2Lf569cicNg1vu3ZgL1mfSAghznZhnRDSf0in+YLJzPAuJgo/ziKlJQ5+thiio4q13xi9gVGJQ/k2ejOX+xoz7tAk/ua7vsR5Hb//jnvcOOKXLSNQtSpZL79MbpcuEFNyQxshhPizCOuEENVlII95N+KglGGxIslgt2MX4xNH8VHcB9QM1GJc5iTu9XYoUYnUlpmJe8oUEubNA7sdT8+e5Dz9NEZiosU9EUII64V1Qnii9muo9Ffp5luMEz/R+It9blYincKChDk4DSfPZvfl0dwniDeOWVXm9eKaOxfXq69i83jIUwrPc88RLOPcXiGE+DMI64Tw9r/swD/JPtiZc2a8gTFfQzCIzVfI/IQ5THFPJMuWRYe8B+jteZ6awVrFTxAIELd0KYnjx+PYt4/8m28me8AA/JdcUin9EUIIK4V1QrizRlu2RW+FOsAMqPmynX7DE2i+LoZhSYNpXnADA7JeKlmJ1DCIWb2axFGjiNq2Dd+VV5I5dSq+v/2tUvohhBAVIawTQlPf1eyM2kGhzQfAgVpBek/zkBiwMSt9Pq0Kbi6xsCzq229JHDGCmHXr8DdoQMb06eTfdZdsWymECHthnRB65jzL0oTFxd5zGk4+SPuUOsHiK4cdu3fjHjuW+OXLCVSrxuERI8h78EGIjq7IkIUQotKEdUI4N1iTDrkPoBPeptBWSJQRhcrtXCwZ2DMycCUnk7BgAYbTiadXL3L+8Q8Mt1U1SIUQ4uwU1gkBit8lOHDwTM6zANi8XhJefx3Xa69hy80lr1MnPH36EKxV60SnE0KIsBX2CeH8pf/hIbeTuQ8V8NBCB/Wz12IrLMQ9YQKO/fvx3nornv798TdqVNmhCiFEpQrrhBC3bBlJL7zAwCQv2xvAwH65VDn4LDbDwNe0KZnTp+Nr1qyywxRCiLNCWCcE95gx2L1eanthVasj7xoEqlXj0IoVMnNICCGKCOsqbI7jVEa1Z2ZKMhBCiGOEdUIIHKe0xPHeF0KISBbWCcHTrx/BuLhi7wXj4vD061dJEQkhxNkrrJ8heNu3B8xnCY7UVAJ16uDp1++P94UQQhwV1gkBzKTgbd8+IndYEkKIUxHWQ0ZCCCHKThKCEEIIQBKCEEKIEEkIQgghAEkIQgghQiQhCCGEACQhCCGECJGEIIQQArBwYZpS6jZgMuAAZmmtxxzzeR/gMcAPpAHdtda7rYpHCCHEiVlyh6CUcgDTgNuBy4BOSqnLjmm2GbhGa/0XYCkwzopYhBBClI1VdwjNgJ1a618AlFKLgLuBH4400FqvLtL+v8BDFsUihBCiDKxKCOcBvxd5vQf46wnaPwp8VNoHSqkeQA8ArTU1atQoV0BOp7Pcx/5ZSZ8jg/Q5MlREn61KCKXtPmOU1lAp9RBwDdCytM+11ilAypFzlLdAXSQWt5M+Rwbpc2Q4nT7XKeMeMFYlhD1AvSKv6wIlti9TSrUBBgIttdYFFsUihBCiDKxKCBuARkqpC4C9QEegc9EGSqmrgJnAbVrrgxbFIYQQoowsmWWktfYDPYGVwDbzLb1VKTVMKdUu1Gw84AKWKKW+UUqtsCIWIYQQZWMzjFKH9s9WRmpqiZGnMpExx8ggfY4M0udTE3qGUNqz3WJkpbIQQghAEoIQQogQSQhCCCEASQhCCCFCJCEIIYQAJCEIIYQIkYQghBACkIQghBAiRBKCEEIIQBKCEEKIEEkIQgghAEkIQgghQiQhCCGEACQhCCGECJGEIIQQApCEIIQQIkQSghBCCEASghBCiBBJCEIIIQBJCEIIIUIkIQghhAAkIQghhAiRhCCEEAKQhCCEECJEEoIQQghAEoIQQogQSQhCCCEAcFp1YqXUbcBkwAHM0lqPOebzGGABcDWQDjygtd5lVTxCCCFOzJI7BKWUA5gG3A5cBnRSSl12TLNHgUytdUNgEjDWiliEEEKUjVVDRs2AnVrrX7TWPmARcPcxbe4G5oe+XwrcrJSyWRSPEEKIk7BqyOg84Pcir/cAfz1eG621XymVBVQHDhVtpJTqAfQItaNOnTrlDup0jv2zkj5HBulzZLC6z1bdIZT2m75RjjZorVO01tdora8JHVOuL6XUptM5/s/4JX2OjC/pc2R8nYE+n5RVCWEPUK/I67pA6vHaKKWcQBKQYVE8QgghTsKqIaMNQCOl1AXAXqAj0PmYNiuAR4D1QAfgc611iTsEIYQQFcOSOwSttR/oCawEtplv6a1KqWFKqXahZrOB6kqpnUAfoJ8VsRSRYvH5z0bS58ggfY4MlvfZZhjyS7kQQghZqSyEECJEEoIQQgjAwtIVZwul1BzgTuCg1rpxZcdTEZRS9TDLgtQCgkCK1npy5UZlLaVULLAWiMH8e71Uaz2kcqOyXqgqwEZgr9b6zsqOpyIopXYBHiAA+ENT0sOWUqoKMAtojDk1v7vWer0V14qEO4R5wG2VHUQF8wPPaa0vBa4Dni6ldEi4KQBu0lpfAVwJ3KaUuq6SY6oIvTAnbkSa1lrrK8M9GYRMBj7WWl8CXIGFP++wv0PQWq9VSjWo7DgqktZ6H7Av9L1HKbUNc2X4D5UamIVCU5ZzQi+jQl9hPWNCKVUXuAMYiTlTT4QZpVQi0ALoChAqBeSz6nphnxAiXSgZXgV8VcmhWC40fLIJaAhM01qHe5+TgRcAd2UHUsEM4BOllAHM1FqH8xTUC4E0YK5S6grMv9+9tNa5VlwsEoaMIpZSygW8Azyrtc6u7HisprUOaK2vxFwZ30wpFbbPjJRSR56LbarsWCrB9VrrppjVlJ9WSrWo7IAs5ASaAtO11lcBuVi4ZksSQphSSkVhJoM3tdbLKjueiqS1PgysIbyfHV0PtAs9YF0E3KSUeqNyQ6oYWuvU0J8HgXcxqyuHqz3AniJ3u0sxE4QlJCGEoVAZ8dnANq31K5UdT0VQSp0Tmo2BUioOaAP8WLlRWUdr3V9rXVdr3QCzNMznWuuHKjksyymlEpRS7iPfA22B7ys3KutorfcDvyulLg69dTMWPgsM+2cISqm3gVZADaXUHmCI1np25UZlueuBh4HvlFLfhN4boLX+sBJjslptYH7oOYIds1zK+5UckzjzagLvKqXA/PfrLa31x5UbkuWeAd5USkUDvwDdrLqQlK4QQggByJCREEKIEEkIQgghAEkIQgghQiQhCCGEACQhCCGECAn7aadClEYp1RIYgvlLUQAYrLVep5TKAv6HWQupO1AHaKO1HhQ67mVgjdZ6TZFzxWOWkfi/0HEpWuv5pxFbFcxCfRG1oFBUPrlDEBFHKVUDGArco7VuBdwDeEMff6e1bg08h1knqCyGAF+EznUD8OtphlgFaH+a5xDilMkdgohEfwfeOFLfSWvtATYf0+YbzJpIZdFca/1i6FwG5r4MKKWmYJbizgYexCwy2EZrPUgp1TV07BpgDpABXADcDfQAblFKrQHu11qnnXoXhTh1khBEJKoDfAeglOoMPAX8V2vdt0ibFsD28l5AKXUtkKC1bqGUegh4kuNXnK2KWWqjE3Af5mbq9SOhFIU4u8iQkYhE+zCTAlrrt4CHgBqhz5oopVZjJokxQD7mLmxHxHJ0eOlELsJ8FgHmjmYNKb4/g63I9z9orYPAXszhIiEqhdwhiEj0IbBUKaW11lkU///gyDMEAJRSPuAqpdSRX56aAuOOOd86pdSDWus3Q4UFr8esOdM29Pk1wM9AFmbNJYAmwJbQ98cmikLAcTodFKI85A5BRJzQmPxQYLlS6nPgNcw9qEtrm45ZRnwt8CXmXs0ZxzQbCrQMjfn/B7hIa/014FVKfQl0BmZgJoA6SqkPgXNOEOJ+oJpSaqlSqlo5uynEKZPidkIIIQC5QxBCCBEiCUEIIQQgCUEIIUSIJAQhhBCAJAQhhBAhkhCEEEIAkhCEEEKE/D9qCZVE8EbhSwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEkCAYAAAA1naazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXmcFMX1wL/Vc88ewF6c4Ua8RRFRBBQhiMYYRRkxaowHHlGCUUMIMcafGqJGUaN436hAg3jEiBg0ghwqiBciyOmBuMDuAnvO1fX7o3t3Z2Znd2eXmd3Zpb77mc9Od1dXv+quqddVr+o9IaVEoVAoFIpYtNYWQKFQKBTpiVIQCoVCoYiLUhAKhUKhiItSEAqFQqGIi1IQCoVCoYiLUhAKhUKhiMtBqyCEEL2FEFIIMby1ZTkYiHe/re2LW+j6LXatgwkhxHYhxC1JyOc2IcTmZMikaBwhxKnWb6JHQ+kOWgUBfA90BT4CEEL0sG7YqQeSqRBiuJVP7wOWsP3TFViQrtcSQmwWQtyWGnGST7LqcBMZAtzfgtdTtCD21hagtZBShoGfWuv6QggHEJIH8UpFKWWL3f+WvFYsQggB2KWUwdaSIVVIKXe3tgyKFCKlTMoHGA6sAEqtz+fA6dax3oAEhsecsxm4LWJbApOBeUA58B1wPtABeMnKdytwXsQ51Xn/GlgMVAAbgFOA7sBbVl7rgRFxzhsece3Iz/YGyvor4FPrWnuBj4FjI/KM/LxvnfMcsMQq33bAADIBB3AXsAMIWHL+OuZ6EvgdMNu6B98DU2PS5ALzrbIWAncAzwNLEnx+PYBXgD1ApXWf/xhx3A7cCmwB/Ja8D0UcnwJ8BpRhKt65QNf67ndEuS5uiXI29VrA+3GeZW/rWH/rXu0FSoB3gKMizv0tEAJGWfUkAJyV4D2+DdgGVAFfAVfHlCMTeMCS12/VpenNqMPbgduBB4Fi617eC9gi0iRSN7cDtzQxXxfwKLDPun+PAv8ANsfkPdGqU1VWvjOBjIh68D3wYET6AmAncHeCdb7J7Y113t+BrzF//98DjwEd4jz/k4G1VrrVwOCINAJ4EvP3VF0XZgCumGvdAPxg5bEYuMSSu0dEmsGYdbAM2A0sBHrF5DM5Jp/fxOYT9x4lciMTuNE2qzLMBAZYn3OxGmSapiB+Ai7F/BE+YhVokXXT+wMPWQ8zNybvLcA5wCHAq8CPmA3yuda+V6yH6YgnE2YDL4HxQBcgv56ydsH8sUwF+gCHYSqno6z7cLaVzxArbY513nPAfku2QVZ6O/BPoAiYYMk5HVN5jI65L4XAJKAf8Htr36iING8A32A2SkcAz2L+ABNVEG9Y92uQdW9GARdGHH8e2IVZQfsBJwJ/iDg+BRhj3ZOTgJXA0ojjdeoA8RvtlJSzqdcCcjAb6nut59jFer6dMevoo9YzHIhZJ4uq6wxmXTUwG4XTgL5AfgL3+DngC2CsdR8vwFRCV0Q0Ku9jNibnWPmOBCY1pQ5HNOQlwDTM3+sFmI3aZRFpEqmb26mrIBrL937MuvQr4FDrHu8nQkFY97AEs75Vl/MLYHZEmpFAEPildW8WAx9i/cYTVBBNam+s824BRljPcDTmC+nzMbIbwDIr3aGYDfgWzJ4kmMP7dwJDrXzOxlRu/xeRz3jr3k2x7uVvMdu1moYdOBxTMfyfdZ2jMF+gvgHcVppfWfncaD3HKzDrfospiE7WxU6t53hvElcQD0Rs51v7HopzrbNi8r4hIs0Qa99NEfuqfzxH1qMgejRUhjj59K7n+PB4xzF//HuBzIh9Xsy3wN/FpH0VeC/mvvwrJs0G4B/W9wFWmsgfrgNTISaqID6PfBYxx/pb+Z/fhDpRfZ+611cHiN9op6ScTb1WvPpp7bsN+DBmn8D88d9gbf/Wyn9ETLqG7nEfzEbl0Jj9twKfWd9HW/keX08eCdVhK+124I2YfW8Dc5pYN7dTV0E0lG8GZo9gUkyaNUQriO3ANTFpRlrl6xSx72+YPbL7MH9ffZpQR5vc3tSTz7nWvdJinv9xEWlOtPYNbCCfPwCbIrZXEKEQrX13Ea0gngPmxqRxYSq6c6zt5cBLMWnujcynvk9SjNRSyhLgKWCxEGKREGKaEGJgM7P7PCLf3UAY880h8loBzO5k3POotS18EWdf7HlN5QvMN5V1QohXhRBThBA/S/Dcr6WUZRHb/QEn5ptGJEsx344j+Sxmewfm2yyYbxFgvj0BIM3x7jUJygXmsMV0IcRHQoi7hRAjI44dZ/1/p76TrVkRi4UQ3wshSjErJUCvJsgAqS9noteqjyHAYCFEWfUHcyiiN6YCi2R1zHZD9/h4TEWzJibv6RH5DgZKpJTNLW8sDZW/KXWzKfn2w2zAVsakqa4vCCHyMevNzJh7sShCtmruwHxbvhFzOG5bI7LF0uT2RggxXgixTAjxoyXXS5j3qktEvpLoNmmH9b+mfgkhJll1odDK5x9E/14OJ6KuW6yK2R4CnBtzn4oAN7X15nAauN8NkbRZTFLKSZgV+L+Y4//rhBBXW4cN67+IOc0RJ6t4hrzYfZK6sgdjjte374DKLE3j9hmYQwergfOAb4QQZyVwenl92cZsizj7AnHOiS1L7DkJI6V8FrNyPoY542eREOLFRM4VQvTEtPVsxxw3Ph6zywzmD6cppLSczbhWLBrwLuYwUeRnIGbvopqwlLIqKvOG73H1dYfF5HskcHSMjMmiOfc6Xt1sSr4iYl99VKedQvS9OAaz0fsyIm1XzGGTsPW/qTSpvRFCDMUcwlmG2XM4DrjGShdZ1w2rrYjMg4h8JgCzMO0fZ2L2uG+nbpvY2L3WMO1osfXxEMyX9kTzqTfzpCGlXCelnCmlPAN4GrjKOlQ906FbdVohRAGmETldqK7UtsYSSpOPpZQzpJQjMd+qLmtqPphDGH5MhRrJSEwDZaKst/6fVL1DCGHHVNgJI6XcKaV8Vkr5G8xxyouEENmYhjYwx8bjMQTwYA6xrJBSbqTxN/HmkJRyNoEAdZ/jGsw36B1Sys0xn0Zn9DRwjz+xkvSMk+8W69gnQI4Q4vgG5CWOzM0hWXUzXr4BTANuJMOqv0gpCzGHDQfGuRebqxWvEEIDXrTkOR+4tQXWNQ0H9kgpb5FSfiSl/AZzaK+pjAQ+tdrLT6SUmzB7oZGsJ6KuW5wYs70G8wViS5z7VBKRT+z9jt2OS1KmuQoh+mMa+/6N+WC7YRpn1gJIKSuFECuAqUKIDdZ1/45ZAdOFPZjGnrFCiK8Af8QNrkEIMQxzLPgdTKPSAMwH9LSV5FvMHtOZQoh5Vj774l1QSlkhhPgXcIcQYjdm13wCplHp54kKLqXcJIT4NzDL6rXtBm4CsknwzUEI8TBmL2AjZvd0POazLJVS7hdCvAQ8IoRwY3Zzc4BhUsoHgU3WdW6y0h2DOXaeVJJRziayDTjZ6iFVYE7EeBizYX9NCHEn5j3qgdmr/I+UMrYrX0MC9/gZ4EkhxFTMe5yBqfzypZR3A+8BHwDzhBA3Yg6FdAMOk1I+RYJ1OBGSVTfj5FsuhHgMuFMIUYh5L67ANLDuikj6F+BpIcRe4DXMt/rDgDOklFdHpDkKGCSl/MHK9yUhxKDmljsBNgL5QogrgP9hKozfNTOfK4QQvwLWYc5yGx+T5j7MZ/0x5vDaMMzZR1Bb32dgzqJ8UQjxIOZvojfmJIYHpZRbrXzmW/m8Zcl8SSJCJqsHUY7ZUM7FHA98BXPM6/qINJdjVt6VVronMBvYtEBKaQDXAT7MH+2n9STdh6nVX8dsGJ/BHIO8w8qnEPgz5iyOnVa6hvgL5nS3BzDfhC7GNKa+28QiXIZZ0RZhznTZgTncV9XAOZEIS4Z1mN3nDMwfY3VFvAx4HHPmxdeYxso+AFLKLzCn0V2N+bZyM+b0vFRwoOVsCn/DnPK4EfOH19N6vidhNsYLrWMvYQ4dNVafG7vHV2HO8PkL5n18F3OGzVYwe67ALzB/5I9Z134RyLOOJ1qHEyVZdTOWaZiN/mzMxq0j5nBLDVLK2Zjl+IWVZjXmEN4OqHlRuxW4XEr5g3XazZiG6sihlaQipXwT8+V2BuZQ10Tgj83I6nHM8j+L+ZyGEj1EiZRyIeZsyWnWtS7CnK0EVn2XUn6NqTgyMW2j6zGfmQfzXiClfBXzRWoq5kvFRcCfEhFS1NZNRXtCCGHDnJXzhpTyptaWJ1UcLOVUKACEELcCU6SUuS1xvYN2JXV7w5oRU4D5NpKFOWWuN+Y0uHbDwVJOhcLytnATtYt9R2H2VmY1dF4yUQqi/WDDXMDTH3O8dh3moq8vrTH09Q2ce7WU8qUWkDEZ1FvOVpVKkVZY9oj6nDN+K6VsbKpuOiCBUzGVRBamTWwG5gLGFkENMR0EWDN9ejeQpFBKWdpC4igUKceaJZldz+GglPLblpSnraIUhEKhUCjicjC7+1YoFApFAygFoVAoFIq4KAWhUCgUirgoBaFQKBSKuKTVNFefz/cM5pLzXbquH9lI2p6YMQo6Yk59nKbr+lupl1KhUCgODtKtB/EcMC7BtLcAuq7rx2Iud38kVUIpFArFwUha9SB0XV/m8/l6R+7z+Xz9MFcO5mM6TJuk6/oGzEUk1fOcO2BGWlIoFApFkki3HkQ8ngAm67o+GNMZV3VP4TbgYp/P9wPmUvTJrSOeQqFQtE/SWkH4fL5MTE+F830+32eYHhC7WocvBJ7Tdb0HZsCN2T6fL63Lo1AoFG2JtBpiioMG7NV1fVCcY1dg2St0XV/l8/ncmG6Pd8VJq1AoFIomktZv3Lqu7we2+Xy+CQA+n0/4fL5jrMPfYQbuwefzHYYZgKXRiF4KhUKhSIy08sXk8/nmYHovzAMKMQO2vAc8ijm05ADm6rp+u8/nOxwzMEYmpsF6qq7r77SG3AqFQtEeSSsFoVAoFIr0Ia2HmBQKhULReqSTkVp1ZRQKhaLpiFRlnE4Kgh9/bN5at7y8PPbs2ZNkadIbVeb2z8FWXlBlbirdunVLsjTRqCEmhUKhUMRFKQiFQqFQxEUpCIVCoVDEJa1sEAqFQnEw0HnQWGy7i2q2qy0J4fxcCj9Ln+Vcaa0gpJSUl5fT2FqNyspKQqFQC0nVekgpcbvdOJ3O1hZFoVAcAJHKIZH9rUVaK4jy8nJcLhcOh6O1RUkLpJSUlpZSWlpKXl5ea4ujUCjaOWltg5BSKuUQgRCC7OxsSkpK2Lx5c2uLo1Ao2jlprSAU8bHZbKxZs6a1xVAoFO0cpSDaKAeDzUWhULQu7U5BeBYupOCEE+jaowcFJ5yAZ+HCpOVdXFzMJZdcwogRIxgzZgxXXnklRUXRRqUZM2bQq1cvZs6cGbXfMAwmTZpUc+7EiRPZvn17zfHLL7+cMWPGMHbsWM4991zWrVuXNLkVCkV6Ec7PbdL+1iKtjdRNxbNwIR2mTkWrrATAvmMHHaZOBaBy/PgDzl8IwbXXXsuwYcMAuOOOO5gxYwb33XcfAPfeey+ff/45K1euZPLkybhcLq677rqa8ydMmMCYMWPQNI1nn32WqVOnous6AA888ADZ2WaI7cWLF3PTTTexePHiA5ZZoVCkH5FTWdPZvUibURDZt96KY/36BtM4P/kEEQhE7dMqK+l40014X3653vOChx/O/ttvb1SGTp061SgHgOOOO44XXngBgFmzZrFlyxZmz56N0+nk5ZdfZvLkyTz55JNMmjQJTdMYO3ZszbmDBw/mqaeeqi2fpRwA9u/fj6a1u86dQqFoY7QZBZEQMcqh0f0HgGEYvPDCCzWNfmRPAcDtdvPkk0/We/6zzz7Lz3/+86h9N998M0uXLkVKyUsvvZR0mRUKhaIptBkFkcgbfsEJJ2DfsaPO/nD37hQtWJBUeW655RYyMjK47LLLmnzuo48+yqZNm5g/f37U/nvvvReABQsWcOeddzJ79uykyKpQKBTNoV2NY5ROm4bh8UTtMzweSqdNS+p1br/9drZt28ajjz7a5KGgZ599lldffZXZs2fjiZG1mvPPP5+VK1dSXFycDHEVCoWiWbQrBVE5fjz77rmHUPfuSCEIde/OvnvuSYqBupq77rqLL774gmeeeQaXy9Wkc1988UVefPFF5syZQ6dOnWr2l5eXsyOi5/POO+/QsWPHqDQKhULR0rSZIaZEqRw/PqkKIZKNGzfy0EMP0bdvX84++2wAevbsydNPP93ouWVlZUybNo0ePXowceJEAFwuF2+++SYVFRVcffXVVFZWomkaHTt25LnnnkOIlAWKUigUikZpdwoilQwcODDqTb8pZGZm8sMPP8Q9lp+fz5tvvnkgoikUCkXSaVdDTAqFQtFWECucOPp2Zb/L/C9WpJ+XZqUgFIo0oy00HIoDQIJY7sR+aQ7Cbw4jC78wt9PsWashJoUijRAr4jccoaeLkUMDIAUYQBjzv0HdfdXb1Z+wuU9E7pNAOCZdQ/mErXOM2n1mfrF5WPvCcfbVk4+Zl7ld5bZhK8uqK5+sTisi8oiXr7lP1JQxIo2MkCkcuy+mHNX7rDyiylp9TjgiTex5DdxXIePbFkWlhv2SXIJbdzZQQ1oWpSAUijTCfklujXKoRlRqOH7d/uN/SCEJaKBpmebYhpBgw/yuAQLQpPndFrMd9ZHIyHNsMiYP67zqPBzV+cTmJWvOMbSIfEREmnj7Gsinujy2mVlx70Hss29tlIJQKNKIhhqI0J/31zY4EY1NzXZkoxXVYBHdaNY0lrENcETDZqtnX8S2rK9BFDI6j0byiWy809kvUTLRZmXGfdbS1XD0zJZGKQiFIo2QLllvw2FcX9YKEilSQWh2kTmUWFlrBpYeg9Dz6bU4VhmpFYo0IjS7COkxovZJj0FodnrFKlYcGPLkAKHni2t6DNIlze2Tk+837kBoVwqiU/ZYCnK61/l0yh7b+MkJkMp4ENXMnDmT7t27s2HDhqTIrGhbtJWGQ3HgyJMDBLfuJNtv/k/HZ9yuFEQwNBgpo6eJSekkGDo+KflXx4P44IMPWLJkCb169WLGjBk1xyPjQSxfvpxZs2ZFnT9hwgSWLl3KkiVLOP3005lqxaqo5ssvv2Tt2rV07949KfIq2iZtoeFQHBy0GRtEpvdW7LaG40FAAAjG7Atht62jY9b59Z4VCh9OWUXrxoPw+/1Mnz6dWbNmMWHChEZlUSgUilTTZhREYjgxjAI0bRdCSKQUGEY+kPzFJ8mOB3Hvvfdy3nnn0bNnz6TLqlAoFM2hzSiIRN7wATRRSG7HkwA/4KJk/9sYsiDp8iQzHsSaNWv47LPPmD59erLFVCgUimbTrmwQAIbsTKX/AqQUVPovSIlySHY8iA8//JAtW7Zw4oknMnToUHbu3MlFF13E0qVLky67QqFQJEpKexA+n88GrAF26Lp+ViqvFUlF5Q3Ybd9QUXlD0vOujgcxe/bsZseD0HU9KtbD9ddfz/XXX1+zPXToUJ5//nkOPfTQpMmtUCgUTSXVQ0xTgK+B7BRfJwpDdmZv6StJzzdV8SAUCoUiHUmZgvD5fD2AXwB/B25M1XVaklTFg4jlo48+atY1FG2fseflMfiYADdcW0Ze+3e/pEhzUtmDeACYCsT3SgX4fL6rgKsAdF0nL+YXUVlZmULx2i6apqFpWp371d6x2+3tvsxfbXCyaasD/fUMfnshTPtDHl07t7ZULcfB8IxjSecyp0RB+Hy+s4Bduq5/4vP5Tq0vna7rTwBPWJsy1klXKBRKhXhtHsMwMAzjoHBqFsnB4citG4GA6YvpsWclTzznYEC/EOecWcGgo4P07xOia2eD9hqN9uB4xtEcSJm7deuWZGmiSVUP4mTgbJ/PdybgBrJ9Pt+Luq5fnKLrKRRtntKy2FZfYEjYuNnO3f/qULPX6zHo3zdE/z4h+vUx//fvG6JPrxCu9Io3o2jjpERB6Lr+Z+DPAFYP4malHBSK+FRUCJ59OYNHnsmM2u90SjQBvnPKucRXwd79Gpu32dm81c6WbXY++sTJwje9Nek1TdKze5h+fUMMsBRI/z4h+vUNktMxvdxIK9oGbWahnELR3qjyw4t6Bg89mcmeIhunjajivQ/cOB0STYNLLzS45re7Kciv9e467IRov0wVFYKt39rYvNVhKg9LgSxf5cIfqO2R5HQKRyiMWuXxs+5hbLYWK7KijZFyBaHr+vvA+6m+jkLRVggEYN5rXh54LIufCm0MO8HPUw+UMOS4AGPPy+P4QQFuuKaMww/LYc8eo8G8vF7JkYeFOPKwaHtdOAw7dtpqFMbmbWav45333ex5pVYjuJySPr2ih6oG9A3Rt1eIjAzV6zjYaZc9iF1aIVM6/Y5/lTxKvpG8ldTFxcVMmTKF7du343K56N27N3fffTe5ubk1aWbMmMHjjz/OlClTuPHG2tm9hmFw9dVXs2HDBlwuF3l5edx111307t0bMBfHuVyumsV3f/nLXzj11FOTJrui9QmFYOGbHu5/NIvvfrAzeFCAB2eUMPzE2l7BO68kx0Brs0HPHmF69ghz2gh/1LHivYItlsKo7nms3+hg0RI3hlHb6+jWJRTX1tE5v/0ayRXRtEsF8XDmA6x2fsRDmQ9w+/4ZjZ+QINXuvqs9ut5xxx3MmDGD++67D4h29z158mRcLleUE78JEyYwZswYNE3j2WefZerUqei6XnP8iSeeUKun2yGGAf9+2819j2SxZZuDow4PMPuxIkYN97dKQ5vTUZJzbJAhxwaB2qnk/gB8+529dqhqm50tW+3or3kpK691KZOZYdQZqurfJ0TvniGcykjermgzCuKO7Fv52tGYu28IEOAz51qkkLycMZv1jnU4G/HmeljwcP66v3XdfSvaH1LC4vfc3PtwFl9/42Bg/yBPPVjMuNFVafkG7nLCIf1DHNI/erhKSijcrbFpq6kwTOXhYOXHLl55o9ZIbrNJev0sTP8+wZreRnXPo2MHNVzVFmkzCiJRdtgiVytLdth+oE+4b9Kvk2x330CNP6YhQ4Ywbdo0OnToEO9URZojJby/3MU/H8ri86+c9OkVYtY9JfxyXGWbNAgLAV0KDLoUBBhxYrSRvKxcsHW7vY6t4/3lbgLBWi2YlxuuM1TVv0+IHt3CNNHfpaIFaTMKIpE3/F1aIad2PgkprHCNQrLfto8H9zySVFsEJNfdN8DChQvp3r07fr+fv/3tb9xyyy089NBDyRRZ0QKs/NjJPf/KYvWnLn7WPcTMO0s475eV2NvML61pZGZIjj4iyNFHRAfqCofh+x22qJlVm7fa+c87Hvbuq9UIbpekb+9ahXHs0RoFeXb69Q7j8aheR2vTrqrtw5kPYBBdqcIYSbdFVLv7fu6555rt7nvevHk17r6BmjCjLpeLSy+9tFmKR9F6rPnMwT3/ymbFRy66FIT5x617mXhuxUE7Jm+zQe+eYXr3DDPmlBgjeYlW09uo/nyxzsGbi6uN5ObLXI9uEQbyCHtHfp4ykrcU7UpBrHV+QlBEd4GDIsBa55qkXSMV7r4rKioIhUJkZ2cjpeT111/niCOOSJrMitTx5XoH9zyUxXvL3OTlhrntT/u42FeOx93akqUvOZ0MThgc4ITB0b/VKj/s25/P6rWlNUNVm7fZ+fgVLxWVtS9i2VlG7VBVnxAD+oXo1ydIrx5hHI6WLk37RkiZNt04+eOPP0btKC0tJSurXl9/Lc7GjRs57bTT6Nu3L2632QI0xd33oYceSo8ePWrKVO3u+9tvv2XSpEkYhkE4HGbAgAHccccddO4c30vbtm3b2LhxI+PGjUte4doA6eSnZ8MmO/c9nMVbSzx0zDa49vIyLvt1eVLXDqRTeVuKeGU2DNhZqLFlm6OOreOnXbVGHbtd0rtnxAryakN57xAdstOmnatDEnwxpaw/1a56EKkmVe6+e/XqxTvvvHMgoilaiC3bbdz/SBavveUhwyu56br9XHlJOdlZ6dsAtXU0Dbp3Neje1c/IYdHDVaVloqanUT0td/M2O+8udRMM1babBXnhOkNVA/qG6NpFGckbQikIhSIBvt9h4/5Hs1jwhgenU3LdFWVcfVmZ8nHUymRlSgYdFWTQUdFG8lAIvtthY/MWc0putQJ5Y5GHfftrNYLHY9Cvd6iOG5I+vUJqmBClIBSKBtlZqPGvx7OY84oXTYPLfl3O9VeWkZ/XsAsMRetit0PfXmH69gozltpeh5RQVKzVGapa+4WT1xd5kNLsdQgh+Vn3+FNzc3MOHiO5UhAKRRz2FGk8/FQmL8zNIGzAr8+rYPJVpXTrohRDW0YIyMs1yMsNcOLx0UbyyirY9m30tNzN2xysXO2kqqq219Gxg2EpjGCUraNn93C7m87czoqjUBwYJXsFjz2XyTMvZlDlF0z4VSU3XFNKzx7h1hZNkWI8bjh8YIjDB0avJDcM+PEnW/TU3K123lvmZu7CWiO5w246Pox1Q9KvT4iszOihyLYSWlYpCIUC09j51AsZPP58JmXlgl+dUckffldK/z5KMRzsaBr06BamR7cwpw6PNpLv2y/Ysr02RsfmbXa+2Wp6zQ1FGMm7FIRrhqoG9A3y1QYn32x2MO+1DMutu0bn/PTrnSoFoTioiQzWs3efxhljKrnpulIOO0SFu1U0TodsyXFHBznu6GgjeTAI334f7YJk8zY7C9/0UFqWYaYJCQjBk89rPD+nMxecU84N15allaJQCkJxUBIvWM/N15dyzJHBxk9WKBrB4cA0avcNweja/VLC7j0ax57aJWKfwO+H2XoG32xx8MrzRa0gcXza9QzgQk3w52wXY/MykpJfcXExl1xyCSNGjGDMmDFceeWVFBVFP8wZM2bQq1cvZs6cGbXfMAwmTZpUc+7EiRPZvn17zfGqqiqmTZvGySefzOjRo5k6dWpSZFZEEwjAbN3LyWd05m93deCQfiFem72H2Y8Vp5Vy2KUVcoZ9NLu1Xa0tiiKJCEFUhEAwQ8u6XZLfXFDOo/eWtJJk8WmXPYhCTfBAppN5GU4kEEjSnLRUxoP4+9//jsvlYvny5Qgh2L17d1JkVpgkEqwnnXg48wE+FCuT7kdMkT40FFo2XWgzCuLWbBfrHQ37Sg4AP9g0dtlMhSAjFMP5ud56zoLDg2Fu3++v93g1qYouCot/AAAgAElEQVQHUV5ezoIFC1izZg3Ckjk/P79ReRSNk27BehJhi20z8zJexhAGL2f8l4+dF1GsHcKDez+mS7grXYyueKSn8YwUacsRhwaiQst+VSz5c6aLT5x23tlT3tri1dBmFEQifOPQKBWClvjlJzMexPbt2+nUqRMzZ85k5cqVZGRkMHXqVE444YTUFaCd09aC9RgYfORcxXzvXP7teZ2wyCPEnwhzMV86BeDm4jxfTfqORke6hruZCiPcNeZ7V7oY3fDK+l+KWhPZwPcwknAjaZryPdJLUTLyrP94xIUSuOZLr5pD07s1wZW2IHrnzKSOdiSLNqMgEnnD3xUxtGQQfbMXFFUkVZ5kxoMIh8N8++23HHnkkfz1r39l7dq1/Pa3v2XFihVp5aywLdDWgvXs1H5koXc+C7zz+M7+LW6jL35eIszpgC3qZUczipEYGEJSJCR77Aaf20FiWDFQIhsXgZAC80+j+htR30XNGclqkKO+N6uxC0K37Gac14aRskVeaptDm1EQiVBgSGbs93NDWaBeRZEMkh0PokePHtjtds455xzAHLrKyclh69atHHPMMUmVvT3TVoL1BAjwnvu/6N65fOB6H0MYHOs/i0ODz/Ef99GERfwgEhdWVI9R1zb25haECVMuyqnQyqkQZZSLcsq1Msqx/osyKrXq+NO1zbhTOsg0MsmQmWQYGWTKTOuTQYa13yVr5YlWQQfwPcKLdOT+DG8G5RXlyb1W1HXj74+kufmLiPva2HWezHTyrU0zlWh66gagnSmIamIVxRpn8oqZingQOTk5DBs2jGXLlnHKKaewZcsW9uzZQ+/evZMmd3umrQTr+ca+kfneObzmeYViWzGdwz34edUsirSzWeXMIiwEXuMLKkURkhMxW45aj3GN96Jd1icn7lE/fnbZCvnJtpOdth/ZadvJT9q2mu2fbDvZqu2uichYTaaRZQ5bVQ9hGbXDWV3CXekW7kamzIrojzSfPHc2e8rSc+JAMjmzKhTxEisIpKmSUPEgmkCq4kEAfPvtt9x0002UlJRgt9v505/+xGmnnRY3LxUPwiQ2WM/1V5alXbCeUlHKm57Xme+dy+fOT3FIB0P9l+KW1/KRsz+7bRoFYQNfRZALKoL0DZu9hF2a4LH8DjyvhWt6wTt+3J9yeQMEapWItjNKeVR/dmmFdZRIhpFRY0CPtY1UK5ds2aFRJXKwxcA40Oec6ngQSkG0QQ52BdESwXoOBIlktfMjdO8cFrnfpEqrol/wGAYG/8J2+yjWOt3YpGR0VYgLK4Kc5g/F7crn5eWxvriophecLrNbggTZbdvFTu3HOsqjenuXtgtDRE/b9BgeU3kYXeMqkC7hrvTPHUDRnvRZKNYSHMhzVgGDFAqLb7bArX/vmLbBegq1n1jonc9871y+tW8nw8hkuP9GwlzKB66ufOUQ9AmFmb6/ivMrgnQ2Gpe7ergUGp+k0VI4cNAt3J1u4e5Qz9rCECF2a7uiFEj19522naxyrmCXrZCwiPZ15ZEeOhd0iatATOXSjU5Gp6QMZ6UT6ficQSkIRRugNliPA6fTnlbBeoIEec+9hAXeubzves8yOP+cIwNPss4xmDc9DtyG5KyqIBdWBBkaCDfYtHXKHovD/pW5YUCBZU4Iho6gZH/biTpox05XoxtdjW4NKpE92u4oBbI3cy/bglvZafuRj50fsstWSEhE+8VySlfNdF5TeXSLUCCmMsk1ctudEmkNlIJQpC2xwXp+d4XBFRftTotgPZvtm5jvncurngUU2faQH+7CuMqZ7NPG84GrI6uE4JhAmLv2VvKryiCJhkQOhgZjt21CiFpDrZROgqHjU1SS1sOO3WzQja41SiTPk8eeklobRJgwRdqeqN6HaR8xt9c4V1No+4mgiNZCTumMMqTHKpBu4W7kGLlo7dvb0AGjFIQi7agvWM/RR+awZ0/rKYcyUcZbnn+je+fwqfMT7NLOif6JZPkns9o5kIVeGx0Ng0vKA0ysCHJ4qOmyVlTegMc1r87+Sv/5CMqQZJDW8yKTjA0bBUZnCozOHBM8Nm4aA8NSIhFDWVqtXeRT5yf8ZNtZR4k4pIPO4S51lEfXiIWHuUYeNtJwAU0LoRSEIm2oDtbz9IsZ+NMkWI9E8olzDfO9c3jL/W8qtAr6BA/jl+UvssP+c951eZBCMMIfYvr+Ck6vCtG8SVQSm20DLsfbGNKLTdSORQsRILfDL81U0okhczCMHKTMwTByre1cpOxUuy1zkEYuhuwEOJJxK9IWDY18o4B8o4Cjg/HXDRkYFGvFtTaRmBlanzs/Y7FtEQERbQOwSzsF4c71KpAu4a7kGwXNViK7tEIusV/ATO0h8o2CZuWRSpSCULQ6pWWCJ1/I4Ik0CtazW9vFQu8CFnjmstWxhQwjg+FV1yC4gg9cPfg6Q6NbyOCGsgAXVAT4Wbg59hADu30tLsfbuJyLsNu2I6UgGDoKTZQiRAgpnewvvweBgdBK0EQRmlZs/hfF2O2fo4kSNG1f/VcxOmDITpbCyIlQKKaiidqWOUiZRXvrpWho5Bl55Bl5HBU8Om4aiaRYK7aUx491hrXWOb5gifsd/KIq6jybtFEQ7mxN8Y1WHtW9k3yjAHuc5jbdnTK2KwURGcYvFUE3iouLmTJlCtu3b8flctG7d2/uvvtucnNza9LMmDGDxx9/nClTpnDjjTfW7DcMg6uvvpoNGzbgcrnIy8vjrrvuonfv3nz//fdcfvnlNWn3799PWVkZX331VdLLkE6kW7CeECHed73HfO8c/ud+l7AIc4x/JL8q/xcbHCfyhteJQ0rGVoW4sKKSkf5wM94bgzjsq3A5F+FyLsamFSKlnUDoZCqqriYQOB1DdibT+2c87hep9F+IPzAhoXw1UYKIUB6aVoQQJWha9XYxmrYDu/YlmiiOsnNEIqWjgV5KDkZET6W2l5JmqxKbgUCQa+SSa+RyROjIuGkkkr2ipM6srOrvXzvW855rCVVatBLRpEaBURDlNyvTyETPmIMhDF7JmMfkshvSrhfRrtZBdD+iG06HRGikJDpTSUkJX3/9dZS7771790a5+169ejUzZ85k8uTJjB49usaJn2EYLFmyJMrd96JFi2rcfUdy6623Eg6H+fvf/x5Xjra+DqJOsJ6RVfzx+lKOPqLheAypWkS11baZ+d55vOpdwG7bLnLD+Zzkv5kK4eN9Vy4VmuCQYJiJFUHOrwySm8D01GgqcTrex+18C6fjXTRtH1J68AdH4Q+MIxAcjZQdo87QRCG5nX5PUclDGDIVjYZEUI6IVB6iyFIwxREKpvpYMZq2t97cDCPb6n10qqtQLCVSq1BykDKbeL2U9rBQTiLZJ/ZGG9Ujp/ta60dqXZ+AQzrxlV/Y5F6EWihnKYhb/5HN+o0Nj6WuWl3r+kJYKz0L8sxYsg25XTh8YJDb/9z0Var/+c9/eOGFF5g3bx6zZs1i3bp1PPjggzidTqqqqpg8eTInnHACkyZNqnPuF198wbXXXsuKFSui9gcCAQYPHszLL7/MUUcdFfe6bVVBBAIw7zUvDzyWxU+FNk4e6uePk/cz5NjEAvUks/EoF+Uscr/JfO9c1rg+xiZtDKs6lxzjD6xxHs4Wh50MQ/KryiATK4IcF2x4emosQuzF5fgvLufbOB3vI0QVhtERf3AM/sCZBIIjgYZddqdfYxmyeiSxvZRitIjhLyFqlY4Q8ef1S2mP6oGYQ125eNw9KCv3xPRgzKEv041I+6BQ+4lRnYfhj7g/bunm/cJVTepFqIVyzURK854V7rZRUalx5GHJjRaWTHffkbzzzjt06dKlXuXQFokN1nN8KwXrkUg+dXzCfO88/uN5nXKtnN6hAZxT8Qy7tDP4nzuDkBAcHwhxX0klv6wK0pTF2Zr4CZfzbVzOt3HYVyFEiLDRhUr/RPyBcQRDJ9K2DcZ2pMwnHM4nMeuQRFBRO+wVt5diKhS7/WvzGHvJqicApCEza+0osbYTS8FE2lqk7EC62lJmZT6IEe2YnDBG2tki2oyCSOQNv/sR3Wq+V0druuDccm64pizp0ZqS6e47knnz5jFx4sRkiNjqpEuwnj3aHl7zLGC+dy6bHZvwGB5G+i/HKa/mA1cv5no1csMGV1rTUwc0YXqqTdtqKYVFOOxrAQiF+1BRdTX+wDhC4UG088i+DSCQZCCNDAx+RiJaJS+3I0XFm2t6J5oojlEw1rZWiF18bSmYqrh5SWmzeik5cYzzuXF6Kbm0VC9lrfMTgjE2oKAIsNa5pkWunyhtRkEkSqoVAyTf3Xc1P/30E6tWreLBBx9MprgtTmywnkMHBHn6X8WcflrLBesJEeID1/vo3rm85/4vIRHi6MBJnFd+D5vtI3jd40KTklP9Ie7YV8WYqlCCZlaJ3faVaWR2vI3dvgGAYOgoyir+iD94BuHwIRzIm6tn4UKy7roL248/UtCtG6XTplE5fnyz82tTCDtS5hGWeYQT/ulW1A57VfdSahRMreHebtuIZi9CiL01Q9CxGDKjTi9F1vRO6s7+MnspTX8BWBEAR5zyBUOQTlGpU6IgfD6fG1iGqY7twAJd1/+WimtFEhnGL1XxXVPh7rsaXdcZPXo0OTnx3TWnO7HBevr2DvHIP4v55bgqmqhHm8122zYWeOey0LuAQttP5IRzObPyrwS4mP+58/nYKegZMvjj/ip8FUG6JWRwDuOwr6lRCjbb90ipEQydQGn5bfiD4zCMnyVFfs/ChXSYOhWt0jRg2nfsoMPUqQAHj5JoMl4Mw4tBj4R6KRBGiL1xjPMRvRRRjKbtwS42Wr2Uyrg5SWmzDPO1Q1yxw14y5hi428yK+ZQYqX0+nwAydF0v8/l8DmA5MEXX9Q8bOC3tvbmm0t03wPDhw7njjjsYNWpUg3mlo5E6NljPH64tTXqwnvqMtpWikkXuN1ngncdHrlVoUmOY/yy6hG/kM8cxrHPacUnJmZUhJlYEGBYIJ/DO58fpWGGtUViMpu1BSieB4HD8gTPxB8ciZW6juTSVghNOwL5jR539oa5d2fXxx7SYpm0l0s8wX01lVC9FiFi7SkmMgilBiPgvqYb0Io0OaNpPUT0ZKd0U7V3VpFlrbdJIreu6BMqsTYf1SZvpUs1l4MCB7Ijz402EzMxMfvjhhwbTLF++vFl5tyZRwXo6t1ywHonkC8dnNTGcy7RSeoZ6c375IxRpv+J/7iyqhOCIYJg791ZybmWQxnz7CcpxOt7D5VyE0/kumijDkBkEAqPxB8cRCJyGJDUvLKKsDOfKldjqqV/2nTvp2rs3Rk4ORl4eRk4O4by8mu9G9ffcXMK5uRh5ecisrLQNZdn28GAY3THo3oReyr4oQ3ysgnHYV2HTfkQIs/dQ6b8gRVOam0/KbBA+n88GfAL0B2bpuv5RnDRXAVeBObySl5cXdbyyMn637mBH0zQ0Tatzv1qST78Q3Ha3jbeXaBTkSe69I8Sk3xi43V7Am5Jr2u12RB7M015mtvYc67Wv8EgPPzcuJyt8Hf+z/YwXM6CDhN8YGpcbGsdKJ3g99Ysk9wBvIuTrwLsI/EjygAkY4mwQp+H0uHE2PCO16YRCiE8+Qbz7LtqSJYiPPkKEQmYIyji9etmxI8bVV8Pu3disj3PdOti9G7E//gQO6XRCfj4yPx/y8pAFBbXbsf8LCiCjnulDLYjdbm/Vep1cOjd8WO4EeShQBcKG23M7bm96lT3l6yB8Pl9H4FVgsq7r6xpImvZDTOlCaw4xxQbr+d0VZrAerzd19ShMmA9cS3mj06u8Jf5NUAQ5KnA8RwSnss12Kh+43BhCcJLfDMBzZlUQTwPiaNqOGvcWDvtHCGEQDnfHHxyHP3AGwdAQUvHuZNu+HdeyZbg++ADX8uVo+/cjhSB41FH4R47EP2IEth9/pMP06TU2CADD42HfPffUb4Pw+9GKirAVFaHt2YMW8b9mX3Gx+X/Pnqi8IzHc7ppeiGH1QozcXLOnErEvbH3HnfzQfek7xJQaalbMV11CWUXTp7e2ySGmSHRd3+vz+d4HxgENKQhFGrNlu437H8nitbc8ZGa0TLCe72zfssA7j1e8Oj/ZdpIjc/lFxc1ILuM9d2dWOzU6hw1+VxZgYkWAPg34Q7Jpmyz3Fm/jsH8OQCh0CBVV1+MPnEkofCTJ/p2JvXtxrVhRoxTs335rXrdbNyp/8Qv8I0YQGDECI3ZSgt1eM4spnMgsJpcLo1s3jG7d6k8TKVdFRZQS0YqKsMUoFm33bhxff41WVIQIxF+vYmRmJjbcVa1QHG15DUhqqKi8AY97KxWVN7S2KHFJ1SymfCBoKQcPMAa4OxXXUqSW2mA9HpxOmfJgPVVUstizCN07hw9dKxFScJL/dEZVPs8G70nMzQCblIypCnFhRRWj6gnXaU5H/bxGKdhtmwEIho6lrGI6/sA4wka/5AofDOJcuxbX0qW4li3D8fnnCMPAyMggMGwYZZMm4R8xgnC/fg3aBirHj6dy/PiUvU1Lr5ew10v4ZwnMvJISUVZW0/uwVfdEYnop9u+/R/vsM1OhhOMP0hsdO8ZVJuGI72LAADRNw+jUCWzt3822ITsjtXcxZHr2mlLVg+gKPG/ZITRA13X9zUbOUaQRscF6Lr+onOuuKEtJsB6JZJ3jS+Z75/CG5zVKtf30CPXkgrL72aedxxJ3R95zCwZI+IsVrrMg7vTUEA77R+bCNccibLadSGkjGDqR0qrL8AfGYsjE3rITE1xi37LF7CEsXYpz1Sq08nKkphEcNIiy3/8e/ymnEDj22Lb79iwEMiuLcFYW4T596gsOV4thIPbtM4e2qpWINcRlq/5eVIR982bze0kJImaYuwsghTCVRrzhrji9FNmhQ7uf4dUapGoW0xdA/OgeirQmMliPYcCF51Xw+6tL6do5+YqhRBTzuvdVFnjn8rVjPS7pZlSlj47G9axyHcLzmTY8VrjOX1cEOSO7I0VlsQbZKpyOZdYahf+iaSVI6SYQPIXyyqn4g2OQMnnrSrSiIpzLl+Natgz30qXYdu4EINS7N5Xjx5u2hJNPNhusgxFNQ3bqRKhTJ+jfv/H04TBaSUmNMukYCFC2fXutMrF6LI6vvjK/743vMFDa7bUKJcJOUmNTidknMzPVDK8EaHcrqRXNo6WC9RgYrHB9gO6dwxL3YgIiwBGBY7i4bDY/2MbylsdDQAgGBcLcbYXrrDZzVMcYFmK/NR31LZyO/6GJCgwjm0BwDP7AOPzBUSRtJlVVFc7Vq03D8rJlOL/80ixHhw74Tz4Z/5Qp+EeOJNyrV3Kud7Bhs9X0Bhg4ECMvj4qGhtUCAVNRxDPKW8rEtmcPzu+/N/eVlcXNRjqdcQ3wcffl5SE9yZ7G1jZosoLw+XzCWueQlogVTuyX5CL8AumShGYXIU9OjlO4VMWDAPjvf//LP//5T6SUSCm58cYbOfPMM5Mid0PEBus550wzWE+/3slVDD/YvucVr84Czzx+tO+go9GRsyp+j43Lec/VnacytZpwnRdWBDksxh+SELtBvkaHzAU4HcsRIkjYKMDvH48/eAaB4DCSEpNASuxff11jWHZ++CFaVRXSbicweDD7//hH/CNHEjzmmINijDztcDoxunTB6NKFhKKGVFXVVSZxjPI1Q15V8f06GR5P7bBWHNtJnRleCXhZaAsuVZrTg5gJ/CHZgiQDscKJ/dIchN960/QL7JfmEHq+OClKQgjBtddeGxUPYsaMGVHxID7//HNWrlzJ5MmTcblcUV5eJ0yYEBUPYurUqei6jpSSKVOmsHDhQg499FDWr1/POeecw7hx45rs6ylR4gXrufn6Ug4dkLxgPX6qeMe9mPkZc1jpNBcBDvOfxuiqJ9hgH8ocr9mgj/CHuWV/FadXhaJcpWnadzXuLRz21Qgpsdt6UVl1OVXBMwiFBpMMR3haYaHZQ1i6FNfy5dh27QIg2L8/Fb/+Nf6RIwmcdJI5LKFoW7jdGN27Y3Tv3nhaKROb4VVYaA55FRUhgvGtMkZWVk0vJN4ML9v69WQ99RTCb7r7TleXKk1WELqut4pysN2ajVjfsKFPfOhEyOhxRVGpYb8gF9mAa2l5eJDw7Y17i+3UqVONcgA47rjjeOGFFwCYNWsWW7ZsYfbs2TidTl5++WUmT57Mk08+yaRJk9A0rcY1OMDgwYN56qmnauUUgtLSUsCMKFdQUJAS5dDcYD1NYb19Hbp3Lm94X2WftpfuoR5cWH4XFeICFrs78a5bo3vI4A9WuM4eNdNTa+Mym2sUzIh6wdBhlFfeiDfjQor2deFAp6OKykqcH35o9hKWLcOxwXS4F87JwT9iRM2ahIQaFUX7QQhkRgbhjAzCPXs2nl5KRGmpOawVxyhfPdxl/+47tLVr0YqL653hBaBVVpJ1111tS0H4fL6ndV2/wvougCd1Xb8y5ZI1g1jlELk/2WNiyYwHIYTgscce47LLLsPr9VJeXs7zzz+fVHkDAZj7qpcHH68N1vPUg8UJB+tpjH1iL294XmO+dw5fOdfhlC5OqzyXgvBkPnYdzjOZdhxScnpViF9XVDK8JlxnfXGZj6e04q/4A2dgGOb4vjczD2jGdEDDwLFuXY1CcK5ejQgEkC4XgSFD2D99OlWnnELo8MPVTBhF4giBzM4mnJ1NuG/fxtMbBmLvXmxFReSPGlVn9haALWaxcGuTSA+ipuS6rkufz5fkyeOJkcgbvqNv15rhpUikSxJaUJRUeZIZDyIUCvHwww/z7LPPMmTIEFavXs21117L+++/T8YBuj8IheCVf5vBer7fkdxgPQYGq5wrmO+dy2LPIgLCz2HBI7i09BkKbWew2J1BpSYYGAxz274qzqsMkmNI4sdldhAInkxF1TVWXOYD80lj27GjViEsX46tuBiA4GGHUX7ZZeaw0dChB63xUdEKaBoyJ4dQTg7hbt3iOmUMJ7jYsaVIREHs8fl8VwIrgZOA5La0SSQ0u8i0QVTWvgVKj0Ho+eKkXifZ8SC++uorCgsLGTJkCABDhgzB6/WyadMmBg0a1CwZq4P13Dsrm63b7Rx1eIAZf01OsJ4fbTt4xaOzwDuPH+zfk2104JcVV+GRV/E/1894PMtGhiEZb4XrPDYYRlTHZfbUjctcFjjDisvc/Kmh1c7uaoaNtmwBIFxQgH/UKPynnGIOGxWklzM0xcFJ6bRpUW7dwTSEl06b1opS1SURBXEppkO964CNwG9SKtEBIE8OEHq+OHoWU5IM1NWkIh5E165d2blzJ5s3b6Z///5s2rSJ3bt306sZUydTFazHj58l7sXM985juWspUkhO9I9kbNVDbLKdzDyvi5AQDPGHuK46XCdWXObM2LjMY624zCNoLC5zvYRCOD7/vLaXsHYtIhTCcLsJnHQSFRddhP+UUwgNHKjmuyvSjmo7Q5NcqrQCjTrrs1ZDTwDygUeBY3VdX50CWdLeWV8q40EsXLiQWbNmIazG7Oabb67XGV88Z33xgvXcfN3+Aw7Ws8G+nvneubzuWUiJrYSuoW6MrrqWoLiIxe5cfrJp5IUNJli9hUPCP8aNy+wPnBERl7l5y2/y9u2j4vXX63d2N3IkgeOPT2iKYVvgYHNcB6rMTSUdnPW9CCwFLtR1/SGfz/cPTN9KBx2pjAcxfvx4xjfz7SE2WM/MO0sOKFhPqdjPvz2voXvn8qXzc5zSyaiqs+gemsKnzqN4KtOBJiWj/CHu3FfF6YFvyHS+jcsbGZe5LxVV11hxmY+hOdNR4zm7c5KAszuFQpEUEmlC8nVdf8zn8/lSLo0iIapnyiUzWI9E8pFzFfO9c1jkeQu/qGJg8DAuL32UIu1sFnmy2O8R9AoZ/Gl/FRf619Hb/rrpCM9bHZf5aMoqpuIPnEHYGECTX2wCAdPZXbUdIcLZnf/kk+GGGyg67rhGnd0pFIrkkIiC2OXz+S4APD6f71xgZ4plUjTCzkIbDz2h8dnGfPJyw/zftH1c7CvH3YyRlZ+0neYKZ6/Od/btZBpZnFXxG7KNa1nm7sMjWTZcUvKLygAX+79klJiDx7EIW1Z1XOahlJb/nxWXuUfTLt6YszvLjUW1s7u8vDzCB9nwg0LRmiSiIC4HrgTWAj2ASSmVSNEoUsL+UrDbJGNPreKX4yqbpBwCBHjP/V9071w+cL2PIQxO8A9jXOU/2W4/hQVeN34hODIQ5K7SdVxoPEcXx2tonuq4zCMor5zSrLjMjTq7O+UU/MOGHbzO7hSKNCIRBXGorusP+3y+AuC3QG9gQyqFUiSCIBQWzFnoZeu3dl55vvHZx9/YNzLfO4fXPK9QbCumS7gLF5VPR3Api90FLMvSyDYMLq76hstCLzDE/gyaowxDZhIInGb6PAqchqQJLieUszuFos2SiIK4DxgN3I5prH4Wcz2EohWx2SRul+SCc8u54Zr4HisBSkUp//G8wXzvHD5zfopDOhhVNY6+oSl84TiOpzPsGEIwPLCDWyt0zhf34RX7MGy5+P1n4w+OIxAcDiTYRWnI2d3xxytndwpFGyIRBeH1+XwuwKXr+hyfz3dNqoVSNIwQMOhIif6nQgry68ZpkEjWOD9G985hkftNKrVKBgQHclXpg+zTxvMfdzZveDS6GmXcFJjP5fIe+rOFsOyBPzCBkpq4zIk14A06u7voInO2kXJ2p1C0ORKd5vo68Defz+cGtqVWpObTedBYbLvrDrWE83Mp/OydA84/le6+lyxZwj//+U9CoRAdO3bk/vvvp2c9DsO6dg4zboysoxwKtZ941buA+d65bLdvI9PI5MzKC8kPX8tyd3/+lWXHLsOcGf6AKwIPMdZYAqH++INnURw4I+G4zI06uzvlFPzDhytndwpFG6fRhXItyAEvlOvWfXC9x37c8UmzBaumpKSEr7/+Osrd9969e6Pcfa9evZqZM2cyefJkRo8eXePEzzAMlixZEuXue9GiRei6zobi+ksAABZaSURBVN69exk+fDivv/46/fr145VXXmHhwoW89NJLceXYvG0zt2/6G/eNu5+ORif+536X+d45LHX9j7AIM8R/Isf7r2eH/ef8x+2kXLNziLGNy8NPc3H4ZXJCPWoWriUUl7kRZ3f+kSNbxNndwbaI6mArL6gyN5V0WCiXFmTfei+O9d80+/zc86+q91jw8EPYf/vNjeaRKnff27dvJz8/n379zMb6tNNO4/e//z3FxcXkxFkEVqrt5we+57Kci9llK6TItoeCcGcuLr8Jp/wt//V05P7sLLyyggnhOVwWfJHjAxqBwBn4A4sokV0bLatydqdQKOpVED6fr5Ou6yUtKUxbIpnuvvv27cuuXbv47LPPGDRoEK+++ioAO3bsiKsgykUZUkjWO/bQWf6FDsZYjg4JnsnoSlDYOcFYzaOBl/lVVRGuwCn4g0+wT3aqk08kDTq7O+202hgJytmdQnHQ0FAP4h8+n68TsAl4B1ip63rywo01kUTe8BsaYipa8EQyxUmqu+/s7GweffRRbrvtNvx+P6NGjaJDhw7Y6/GVIbER5iz83MZ3wg5CsMpZxDWhZ7m4ahv9qo7GH/wTEi/xAyjSuLO7iy/GP3KkcnanUBzE1KsgdF2/BsDn8w0Afg5cZQUM+ghYqOt6/Y6F2jnJdvcNMHLkSEaOHAnA7t27eeyxx+r15irpjuRIELUR9j7f/SEidC7gxF/PtW3bt9dMP411dld2zTXtztmdQqE4MBq1Qei6vgmzF/GI5dl1KNAVSDsFEc7PrXcWU7JIhbtvgF27dlFQUIBhGNx1111cfPHFeL3eenKq+0YvQiPq7ovj7A4g1L27cnanUCgapUlGal3Xw5iBg9KSZExlbYiNGzfy0EMP0bdvX84++2ygae6+p02bRo8ePZg4cSIQ7e77nnvuYfXq1QSDQUaOHMn06dMbyE0CcUb7GnF2VzZpEv4RI5SzO4VCkRBtZhZTOpBKd9/33ntvwnn9rGwHxxtfsDNYQThsI+B2kXPppdHO7o49to6zO4VCoWgKjSoIn883Ttf1t30+X3/gD8A8XdeXpV40RX3YSsKM2/4ODxy3kDuv+RMrhw3FvnkzleedZ842Us7uFApFEkikB3Ez8DYwHXgS+BcwJJVCKRohDNqL0HXrLmZdfxNSCHY20DtRKBSK5pDIFJwsn8/XEwjrur4KKE+xTIpGkEa0i42wuZpSoVAokkoiCuIfwJ3APy1fTB+mVqRapJSkkSuQtEBKSais1nur4fFQOm1aK0qkUCjaK4kMMfUBpum6Xu0oqcVaI7fbTUVFBRkZGS11ybRGSsmOrVv5TtcRmNNVS6dNo7KZsawVCoWiIRJREFuBe3w+Xwfg38ACXdeLUyuWidPpJBAIsGXLlgYXpGmahmHUdXudLlSICvZpFYTJQuJBQ5JlQKaU2EQlQli3s9yFKPaDYSA9HmR2NtIKMC2lrLkXFb/8JT179mTX0KGtWCqFQtHeSdibq8/nywEeA8YBi4EHdV1fnkRZ6nhzrWbv3r2sWLGCQCAQ97jX66WioiKJoiSHQlshS1xfsMt2KAb9ccowQwOSQcEwThnC6ViGw/4pcn8G2rwAYleI0IAB+IcOrdfnkRCCrKwsxo8fz759+1q4RK3Lwebp82ArL6gyN5VW9+bq8/nOACYCnTB7EFdZAr0OjEyVYJF07NiRX/ziF/UeT7dKtUsrZHqHl1nsHoIhpuIxqphcJrm8PEiWBJu2iQ6uK7F7NiMetiFuMaj6xbmU/WMyoUMOSegaDrWuQaFQpJhEhpiOAqbr/9/evYdZVdd7HH/v28zsuTnSmJwRfCRvUXjwHkXeSs2MjjyWXxXJUo9o4BUxT6UhWieOlYEKJuUlb9E30bDk2EVDfCxFTU8q3qkMUbEG5rLnumev88feJNACR2DNnr3n83qeedh7zV6/9f3BPHzmt9b6/Zb7RjPEzOzMaEoqXd108a36xdxaM5Le2Ewqgw6+3NbC2e0x6gKAgOq2a6kZ/l1irX3ET4nT2WC0PzCNvlGjil2+iMhG+hMQPwUuM7Na4IvAae7+I3d/MdrSSkdAwIKaR/heXSWZ+JmkgnbObH+T6W3V1Af50V9q5R+pz36ZxLhV8LsY3fcb7bNm0KenronIINWf21xvBOYATYW1mE6OtqTScm/VSsbu/AxX7HAsnbGDscxL/OnNHJe3VlMfQPK55xj2PWPHxs+SOGAVvb/4CH8f/jgtl3xf4SAig1p/RhAJd3/BzNa/f9dQMbORwK3AcCAHLHD3uVtd5SD0SEULMxre5rXk/sSCFo7qXM7V6/ZiWDAcgNRTT1F3zRwq/v235GZBrrWe1jfn0zP+iCJXLiLSP/0ZQTxoZj8AmsxsLvCbfuyTBS5y99HAOGCamX1oG+ocNJ5OZvnkTquxxpG8ltiDg3p+zaNvtXHL2g8yLIhT8dhjDJs0icYzJ5C8aCm5K6Gr69P8I/YYPTUKBxEpHf15HsSVZjYGeAB4EXitH/u8AbxReN1mZs8DuwArtq3c4lmRjPHVhrU8UbErkGDPXmfu2tGMzY6DIKDy4WXUzp1L5aOPkj2hnuyiakgHtLX/D109JxLhnWgiIpHY7DwIM/sZcIq792ywbW/gNnc/uL8HMLPdgGXAGHdv3eR7U8jfNou7H7C5eQ7vJplMks1G8zTU52I5vpJo5rfxeqCV4cHPuCY7muOCwyEIiN13H4nZs4k//ji5UU3kfrInjH2IgH0IYrdBbHQkdUXZ58FqqPV5qPUX1Of3qiI/kTay3z63FBCfA74MfN7d15nZp4ArgVPd/YX+NF648+kh4Fvufve7fHyzE+XeTRTzIF5MxpldB7+uqgbaqQ1uZkZbmtMynyeZi1O1ZAl1c+eSWrGC7K67krn0RCpO+l9SqWfp6Dqd9o6vA1XbtaYNDba5HwNhqPV5qPUX1Of3qmgT5dx9kZm9AdxnZr8CPgYc7e7r+tOwmaWARcAd/QiHQeOlZJzv1aX4ZVUFMTKkuJrJmbVc3HYWO/TWkl68mNprryX18sv07r47a+fMITipj9r6yyCoYF3bzfT0Hl3sboiIbLPNBoSZXUn+2ZavA9OB64HpZoa7f2NLjZpZjPztsc+7+9Xbsd7IvJSMM6e2gnvTKaCDROwaDu/6EzNbL2T3jl2pvusuaq+7juRf/0rv6NE0X3893RMOo67uUqoq76an96O0tl9DLtDS2yJSHrZ0kfq3hT8fAOa9x3bHA18AnjGzpwvbvubuS95jO5F7ORlnTm0li9NJ4nQRj13Dnr1LmNl6AYeuO5XqhQupnT+f5OrV9IwdS/PMmXQddRTJ1DPsWHssifhrtHfMoKPrPCBR7O6IiGw3WzrF9NDWNlpYxG9Q37bzSiLOnLpKfp5OkqCbBNcxLLiZC1vOYNLbt7DDbQupveFCEmvW0H3QQbR85zt0H3YYxALSVQuoTc8ml9uJdW2L6M32+5q9iEjJ6M9EubLySiLO3LoKfp5OkSBLFfOJcTWTMxO4YPWd7HLjPdT88OMk1q6l+5BDWDt/Pj3jxkEsRiz2NvU1F1JZ8Tu6ej5NW+Y7BMGOxe6SiEgkhkxAvJqIM6cQDEn6aAhuIRO/gkO69uGyv/2QsfMepObmTxFvbaXryCNpPu88eg844J/7p5LLqK89j3isjdbMt+nq/gKDfJAkIrJNyj4gVhaC4Z50ihQ5RvbdzZvJGQzP7sClf/4Gn/3uCqpvPZl4Rwedxx5L2/nnkx0zZoMWeqlJX0VNej7Z7F40ZxbS1/fBovVHRGSglG1A/DkRY05dJXenU1QQ8OHsg7yaPJvOeCczXzuDs7/ZTMNtX4GeHjonTqT9nHPI7r33Rm3E439lh9pppJJP0dk1mbaOy4F0UfojIjLQyi4g/rJBMCSBj/Y8yYup/2Rl8i9MfmsiX78cRt50LQQBHSecQPu08GcxVFYspq76EiBOS9sNdPdOGOiuiIgUVdkExBOpONMb0ryajFMFHNX1KiuTZ/Fk5XIOaT6Q2Vd8gP2u+zkkk3RMmkT71Kn0jRgR0lIHdTWXkq78KT29B9KamUcuF/Y5EZHyVvIB8WQqzsXJHl5srMlviMX4SNcXWZq+m1GZXbjz2wdx3OWPQ1WazBln0H7WWeSGDw9tK5l4lvraqSTiK8l0nk+mczpl8FckIrJVSvZ/v7fiMa6qq2BhdQUQQOydO4qeT/yKb96wF+ee+xLJyhYy55xL5swzyb3vfZtpLSBdeRO11d8kFwxjXdtP6c2OH5B+iIgMViUbEFN3TLO8IrFRMKz3bFOG92XX0H7+DDKnnUbQ0LDZdmKxZuprplNZ8Ru6e46kNfN9gmBYlKWLiJSEkg2Im+9YxKzqdhaeeDLE4my4cmrlWV/nrVNPJait3WIbqeTvqa89l3ismbbMFXR2n47mNoiI5PXniXKD0h7fmMlOzReQ7tuHBLdC0EE82wVA+9Sp7xIOWWrSV9FQZwRBNWtbf0Fn9xkoHERE3lGyAbEm9zq3nQZBcg0pLqIytg/J4FY+/Oz/bXG/eHwVDXWfpyY9l64eY23L/WT7xmxxHxGRoahkTzH997dryMUy/3wfYw2VfRdxxBM1cPRLoftUpu6jruZiiPXR0j6P7p6JA1WuiEjJKdmAePTYBnqqMhtt66mCP3ymAXo3/XQnddWXk666nd7svrS2z6Mvt9tAlSoiUpJK9hTTvb3LWX3dtbSN2IWOeIy2Ebuw+rprubd3+UafSyReYFj9Z0hX3U6mcyprW+9ROIiI9EPJjiAAOo8/ns7jj9/MM10Dqipvo656FrmgjnWtd9KTPawodYqIlKKSDojNicXWUVdzMVUVS+juOZzWzByCYKdilyUiUlLKLiBSyeXU10wjHl9DW8dldHZNoYTPpImIFE3JBsSO9UeTSj6Xf5OD9xcmPwcB9OV2Y23rYrJ9+xavQBGREleyv1r3Zg8gCCr+ZXtf36jC3AaFg4jItijZgOjovIBNZz4HQYp1bYsIqCtOUSIiZaRkAyIX7ExX90SCIP8+CFJ0dk8iF+xc3MJERMpEyQYEQKbzEmD9aaZEYVQhIiLbQ0kHRC7Ymc7ukwiI09l9Irng/cUuSUSkbJTsXUzrdXReQLpqpUYPIiLbWckHRC7YmSD+ALlg05nUIiKyLUr6FJOIiERHASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioSJZasPMbgImAGvcfUwUxxARkWhFNYK4BTgmorZFRGQARBIQ7r4MaI6ibRERGRhFXc3VzKYAUwDcncbGxq1qJ5lMbvW+pUp9Ln9Drb+gPg82RQ0Id18ALCi8Df7+961bsruxsZGt3bdUqc/lb6j1F9Tn96qpqWk7V7Mx3cUkIiKhFBAiIhIqkoAws58AfwD2NrNVZnZGFMcREZHoRHINwt1PjqJdEREZODrFJCIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEkoBISIioZJRNWxmxwBzgQTwI3efHdWxRERk+4tkBGFmCWAe8GngQ8DJZvahKI4lIiLRiOoU08HAK+6+0t17gIXAcREdS0REIhDVKaZdgL9t8H4V8JFNP2RmU4ApAO5OU1PTVh9wW/YtVepz+Rtq/QX1eTCJagQRC9kWbLrB3Re4+4HufmBhn636MrMnt2X/UvxSn8v/a6j1V33e6q/IRBUQq4CRG7wfAayO6FgiIhKBqE4xPQ7saWajgNeBk4BJER1LREQiEMkIwt2zwDnAr4Dn85v8uSiOVbAgwrYHK/W5/A21/oL6PKjEguBfLg2IiIhoJrWIiIRTQIiISKjIltoYCGZ2EzABWOPuY4pdT9TMbCRwKzAcyAEL3H1ucauKlplVAcuASvI/r3e5+8ziVjUwCisSPAG87u4Til1P1MzsL0Ab0AdkC7e/lzUzawB+BIwhPxXgdHf/Q3GrekepjyBuAY4pdhEDKAtc5O6jgXHAtCGwhEk38Al3HwvsCxxjZuOKXNNAOZ/8TR5DyRHuvu9QCIeCucD97v5BYCyD7N+7pEcQ7r7MzHYrdh0Dxd3fAN4ovG4zs+fJz1pfUdTCIuTuAdBeeJsqfJX9nRVmNgL4DPAtYHqRy5EImFk9cCjwJYDCskQ9xaxpUyUdEENZIRj3Ax4rcimRK5xqeRLYA5jn7mXfZ2AO8BWgrtiFDKAA+LWZBcAN7j5ob//cTj4AvA3cbGZjyf+Mn+/umeKW9Y5SP8U0JJlZLbAIuMDdW4tdT9Tcvc/d9yU/I/9gMyvr601mtv662pPFrmWAjXf3/cmvAj3NzA4tdkERSwL7A9e7+35ABviv4pa0MQVEiTGzFPlwuMPd7y52PQPJ3dcBSyn/607jgf8oXLRdCHzCzG4vbknRc/fVhT/XAPeQXxW6nK0CVm0wIr6LfGAMGgqIEmJmMeBG4Hl3v7rY9QwEM9upcKcHZpYGjgReKG5V0XL3r7r7CHffjfwyNQ+6++QilxUpM6sxs7r1r4GjgWeLW1W03P1N4G9mtndh0ycZZNcTS/oahJn9BDgcaDSzVcBMd7+xuFVFajzwBeAZM3u6sO1r7r6kiDVF7d+AHxeuQ8TJL9vyyyLXJNvfzsA9Zgb5/5fudPf7i1vSgDgXuMPMKoCVwGlFrmcjWmpDRERC6RSTiIiEUkCIiEgoBYSIiIRSQIiISCgFhIiIhCrp21xFNmVmhwEzyf/y0wdc5u6/N7MW4I/k13I6HWgCjnT3Swv7XQ4sdfelG7RVTX7Ji70K+y1w9x9vQ20N5BceHFITHKV0aQQhZcPMGoFZwER3PxyYCHQWvv2Mux8BXER+jaP+mAk8VGjr48Cft7HEBuD4bWxDZMBoBCHl5Fjg9vXrU7l7G/DUJp95mvyaTv3xMXe/pNBWQP65FJjZNeSXHm8FTiG/aOKR7n6pmX2psO9S4CagGRgFHAdMAY4ys6XACe7+9nvvosjAUUBIOWkCngEws0nAVOBRd5+xwWcOBV7c2gOY2UFAjbsfamaTgbPZ/Iq6O5JfGuRk4HPkH06/a7kvmyHlQ6eYpJy8QT4kcPc7gclAY+F7+5jZ78iHxmygi/xT6tar4p3TUVuyO/lrGZB/2tsebPx8itgGr1e4ew54nfzpJZGSohGElJMlwF1m5u7ewsY/3+uvQQBgZj3Afma2/pek/YGrNmnv92Z2irvfUVgocTz59XKOLnz/QOBVoIX8mlEA+wB/KrzeNDh6gcS2dFBkIGkEIWWjcE5/FrDYzB4E5pN/hnfYZ/9Bftn0ZcDD5J913bzJx2YBhxWuGTwC7O7uy4FOM3sYmAT8gHwgNJnZEmCnLZT4JjDMzO4ys2Fb2U2RAaPF+kREJJRGECIiEkoBISIioRQQIiISSgEhIiKhFBAiIhJKASEiIqEUECIiEur/AWx4OVB0+KIqAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAEkCAYAAAAvoUY9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXd4FMX/x19zLe1yCZAAJqFKEZCvIip2EBXrz4J69t4FBAFpgvTeey8KqBwIothQEVCxISgoLSA1QEgjuSSXXNvfH7tJLsmFBMiRwryeJ09ud2dnPrNl3lM+OyMURUEikUgkEl1FGyCRSCSSyoEUBIlEIpEAUhAkEolEoiEFQSKRSCSAFASJRCKRaEhBkEgkEgkgBSEfIURDIYQihLipom0pC0KIIUKI/RVtR0kUtU8I8bwQwn2B0r5gaV1MCCE6aO9IXDnEdUgIMbA87JKUjhBiiRDiu9LCSUEo4ChwCfAbgBAiTnv4O1SoVdWHFUBsZU1LCHGTdr8bBsSiACCEGCiEOHQBk9yC+o4cv4BpSi4ghoo2oLKgKIoHOFnRdlRXFEVxAI7qlpY/hBAmRVGcFZV+oNDyJN+RakzAWghajetnIYRd+/tbCHGndsxv94wQYr8QYojPtiKE6CaEWCGEyBJCHBFCPCKEiBBCLNfi/U8I8bDPOXlxPymE+EYIkS2E2COEaC+EiBVCfKnFtUsIcbOf8/JsOqr9/0Hbf6iEfL4shDjmJ55lPvteEEIkCiGEtl1Ha8IlaXn4WQhxi094IYSYL4Q4IIRwaHkcJYQIOsP1rqnFs0kIEXmme6OFf0AIsV27PqeFEL8LIdr4HL9UCLFSCJGqhdkhhLhPO1ZDCLFMux8OIcReIUSvvPyVkF6hbpy8bSHEjUKIbVoafwgh2hY573YhxE4hRI5mQ3vt+j5dXmlprYIfteAHtfg3+pz/uBDiL82GQ0KISUKIMJ/jG4UQC4UQw4UQJ4CEMl7jJkKIT7RjaUKI9UKI1kXy0lYI8bUQIkMIkanF0U4I8TwwHGig2asIn3enSBx5XT13CCE2a/bsEtr76BOuuRDiCy2dTCHE50KIJn7iiTvLeK8QQmzRrt8+IYTVj41mIcRUIUSCFs92IURnn+NWIYRTCHGtz75ntTjbFI3PT/x5z8Ct2vPkEOq7EiOEuEVLL0sI8Z0QItbnvEZCiNVCiOOaXTuFEM8UiXujEGKBEGKQEOKkUN+ZJUWekauEEF8JIU5p1/YPIcRdReKpJdR3Lkuo5cVwIcT7okhXj1DLxD1a3uOFEO8KIQw+x2uIgjIzUQgxAijx3SyEoijl/gfogVRgEtBU+3sIuFk73hBQgJuKnLcfGOKzraDWSJ4DmgCzgGzgK+B5bd90IAuoVSTuA8CDQDNgDWoz9zvNjmbAJ6iFvtGfTUAbbbszUBeILiGvjbRwzbXtl4BTwHGfMEuBj7XfIcAuLf2rtTy8C+QCLbQwOmAE0E6z637gBDDUJ84hwH7td30tzlVAUBnuT13ACfTR7G8BPAm09jmeqF2vm4BLgQeAe3yO9wWu0s5/GsgEXvBnn7b9POAusu0FNgM3A5cB67X7ZtDCxGr3ewHQErgN+FO73k+fIX9nlRbq83q/Fu81Wv5q+pybBjwDNAZuAXYAS33i3wjYgTmana3LcI3roD7bs7XwzVGf5RS0Zw1ohfpsf4T6rDQFngCuR32OxqA+w3W1P3MJ16ODlre/gbu0eD4ATgORPs/lYeB7oK329wPqO2kqEk/cWcabAHwJXKHZ/od2XwdqYYSW1kbU560x8Kp2/W7zycd87Z5ZUN9hO9CtjGVS3jOwEfW9ugqIR60IbASuQ33n9wArfM5rDXQB/of6HnQD3MCtRe7/aWAy6rN1l7Y9tMg9eA71+WiG+n47gWY+YT4D9gG3avd+MZAOfFfkvTqMWo41Au4BjgDDfcKs0e5bRy2eZUCGbzwlXqcACUIN7UHpUMLxhpRdEKb4bEdr+6b7Seu+InH38Alzjbavl8++vAL/8hIEIe5MeShi90HgTe33cmCodgNaavuOAa/6PJjH0Ao9nzg2+ObVTxpvA/FFC1ztQU0AZgK6Mt6fvLw3LOH4cNTCKuws7vlU4Nui9hV5IYsW0gpwlc++6ygsriOBQ4DeJ8xdnJsglJbWTf6uiZb+60X23aKFraFtb0R9kXU+YUq7xkOAX4vsE6gFXg9teylqYev3vgIDgUNluDcdNFs6++yrq+27U9t+CbWQjvIJUwe16+3ZIvEUFYQzxfsyamWhhk+Yy7UwA33iyQEiiti9CPjUZzsE+BewAdt9j5XhGuQ9A1f67HtH29e2yHuWXEpca4H5PtsbgR1FwswBfiklnr+Bd7XfTTVbfAXQiCr432nbodo9uqtIPM8Cp7XfTbR47vA5bkItI0oVhICMISiKkiaEWAB8I4TYAGwC1iiKsvccovvbJ94kIYQHtYbmm5YTqF3SeRT0e+7ws6/oeefCD6hqPAtV3Wei1ug6avbGohb4UFADPS0K97AE4dPvLYR4BfVlagiEodZki3bxRaPWehcoitL7LOzdAXwD/COE+Bb1gV6tKEpeN1lbYIuiKFn+ThZC6FBrvo+jCmcw6sN7+CxsgILaZR4J2v86wF7U2tQfijq+k8cvZ5lGWdMqhhAiGmgATBJCTPA9pP1vglrbBfhTURSvT5jSrvE1QFshRGaRZENQCwdQ78PXReI9H/7K+6Eoyknt2ayj7WoF7FIUJdknTKIQYq927FzjbQnsVhQlzSfMP0KIdJ/zr0ErtIq8EybUWnzeeQ4hxGNaeomoLcazQQF2+myXVC7UEkLoFUXxCCFCgfeA/0MdUDehvqs/FIn7ryLbCUCnvA3tWRqKWk7URX2fg1GfL1CvE8Cv+cYqiksIsRUI13a1Qn0+PhFCKD5p6YFgLY28eLb4xOMUQvwBmCmFgA0qK4ryihBiKupFuQMYLoToqijKXNSmGxTv1zL6icpVhn0KxQtLV5HjJe0rj3GUDcBUIUQr1Jv3u7bvNsADHFUUJc8FUwfsRm3yFSUbQAjxKKqo9EMV0wzgUdQasy+nUR/mB4QQUxRFOUYZ0B70u1FfxNuBh4ExQohHFUVZlxfsDFH0AvoDPYFtqE33t4F7y5K+D94ihb2/e1LUjjPZdb5pFSXvWHeKFwCgtvTyKCSeZbjGOtTuma5+4vUtLM81v/7wN9B9pmsN6jtamg1nircs5+tQ83xNGeLOG+OLRK3MpZYSty9+nwFFUfyVC3ll03jU7tJeqN1JWcBEIKIUO4uWSUtQu3b7oPYoOICPUQWm6HklkRffo6gt0qKkUtaxglISCAiKovyjKMokRVHuBhai9gsCJGn/Y/LCCiFqc+HcEstC3g3WlyHs90BN1EJxs6IoblRBaI9aGGzwCbsVtY80Q1GU/UX+8tz5bgG2a9fuT0VR4lFbCkVxoY5x7AQ2CSEa+AnjF0Xld0VRRimKcguq8LygHf4TuNF3UKwIt6DWXBcqirJdE7umJYQ9H3YB1wghfO/B9QFIB/zcb0VRElGb7M393Kv9iqLknCnCUq7xVtQaX4KfePPejz+B27UWWUk2l+X5LAv/Aq2EEFF5O4QQdVD7u/89z3hbCh9HB63i5FugbkUt4IP9XIsjRc6bBLyGOo74sTiDo0U5cQuwXFGUFYqi/A38h3pNziWeWYqifKYoyk7UMcHGPsd3af/zn29toNjXyeJf1K61xiU8jx4K7tUNPvGY8C+2xQiIIAjVe2KsUD2NGgghrkcdzNsF+W6BPwN9NA+EtqiDUbmBsOccSUbt++wkhKgrhKhRUkBFUU6gdjs8R0Hh/xdqS+h+CgvCctQawhdCiE5C9UpqJ4ToL4R4UAuzF2gtVC+VS4UQ3VELfn9puwAr6ku1SQjR2F84X4QQN2geEe2EEPWFELehjkXkPZSzUJ+NtUL1zGkkhLhPq/Hm2ddBqB4bzTQvhnalpXsOzELtepgthGghhLiVglZSedacQe3u8gL3CCFqCyHyCqx3gbeE6vN/uVA9cR4UQsw9U2RluMYzUAvzT4UQN2vPwU1CiJFCiLyXeRyq0C4XQlytPQuPau8TqM9RXSHE9UKIKK1741z5ELWitkKoHjFtUWuwCajfdZxPvHZgmfauX4c6NuDrFrwB1YFhtRDiISFEY6F6V3XTuk4RQgRr9nymKMpC4BXU8UPfrrxAsBe1BX6tEKIlMA+fiuxZxvOUEKK1EOJKVEcB38pHPPA5MFOonnQtgbmoA+h5LZlMYBQwSgjRVXsWWwnVC26sFmY/6uD0TO39bInqlBFOGQhUCyEL9UH+GLVp8wlqn5Zv8/hF1AJ3ixZuHqpqVgq0ftsuqIXtUdRBrDPxPWoX3AbtfAW1Rpi/T9ufg9py2IrqRbAPWA1cS0Ef/FzUAcXFWrrtUAchS7LVjerB8hOqKJRWW09HrYmsRe2jXYQqVMO1+E6gNs3tqN4h/6IWxHnN0eFa3tai9unXAKaVkuZZoyhKAqqg3oAqsFNRB1JBrSmVZ1qJqN1g/VCfw7Xa/qWoz8C9qF2Bf6DeiwS/ERVQ2jVO1I4no97/vdrxBlr6aDXJDqhjRZtQr0Fv1G5IgE+BlcAXqIV5n3PMfl4lrRNqpWyzll4W6gDmOX9ToShKNqonTC3U67cc1RvnlE8YBfU+r0ZtAexBzdO9qIPsaOeEobYO0MYkngJeF0Lcf672lYG3Ud/LH1Df8QRUb76z5QXU8vZ31Pv2NQXjT75h/kFt/WzU0voWn2ddUZThmk0vo46J/aRtH/KJ50XUZ2Ud6n1MQPU8KhWhjUJLJFUCoX6vsQn4n1ZgSiTVEq2rdA9qq6jXhUhTfqksqdQIId5ArQkdR/WgmAz8JsVAUt3QKju1UXsFwlFr/g1RB6QvCFIQqiFCiAHAgJKOK4pSqvtZJaIBaldO3odc36J+FCeR5OPHfdeXUYqijLpgxpw7etQu0SaoDiP/oH4Ad8EqP7LLqBoihKiJ6vXkFx8XWImkWiB8ptjwQ6qiKGfjnnrRIgVBIpFIJICc/loikUgkGlIQJBKJRAJIQZBIJBKJhhQEiUQikQCVzO3UarUuAu4DTtlststLCVsfeB91DhQ90M9ms30ZeCslEomkelLZWghLUOe7LwsDAZvNZmuDOg3zrEAZJZFIJBcDlaqFYLPZNlut1oa++6xW66WoU0FHo04P/YrNZtuDOuGTRQsWgVz4WyKRSM6LytZC8Mc8oJvNZmuLOrFXXktgCPC01Wo9hjoBW7eKMU8ikUiqB5VaEKxWqxl1psuVVqv1L9RZQC/RDj8BLLHZbHGosykutVqtlTo/EolEUpmpVF1GftABp20225V+jr2ENt5gs9l+sVqtwUAUPtPqSiQSiaTsVOoatc1mywAOWq3WRwGsVquwWq1XaIePoK2parVaW6CuT5rkNyKJRCKRlEqlmsvIarV+hLogSBTqItqDUReXmY3aVWQEPrbZbMOsVmtLYD7qwtEK0Mdms62vCLslEomkOlCpBEEikUgkFUel7jKSSCQSyYWjMg0qy6aKRCKRnD2i9CBlozIJAsePn9u3ZVFRUSQnJ5ezNZUbmefqz8WWX5B5PltiYmLK1RbZZSSRSCQSQAqCRCKRSDSkIEgkEkkFo0tMwtJ/NFGdnqhQOyrVGIJEIpFcTOgSk9APnUyd91eC4kU4XRVqT6UWBEVRyMrKorRvJRwOB263+wJZVXEoikJwcDAmk6miTZFIJOeB7sQpLCOnErLuO/AqCI+nok0CKrkgZGVlERQUhNForGhTKgWKomC327Hb7URFRVW0ORKJpAyIzCwMu/dj3B2Pcdc+jLv3Y9y2E+H1VrRpxajUgqAoihQDH4QQWCwW9u/fz/79+4mMjKxokyQSSR5eL/qjxzHu2qcKwK596u/DCQVBLGZcLZqS/eh9GI4kYPpzB0CFdxXlUakFQeIfvV7P1q1buf322yvaFInkokRkZmHYcyC/0Dfu3o9hz350mVkAKELgaVQfV+sWZFvvx9WyGe6WTfHE1gVR8B2Z7lQy0XOWoXvfBl45hiA5Ry6GMROJpMLJq/XvjsewK6/LJx7DoWMFQXxq/e4WTdTCv/mlKKEhpUdfOwrPtBEkvf405ikLMG39O5C5KZVqJwghq1cTPmYM+uPH8cTEYO/XD0fnzuUSd2pqKt27d+fQoUMEBQXRsGFDxo4dS61atfLDjBo1irlz59K9e3d69uyZv9/r9fLaa6+xZ88egoKCiIqKYsyYMTRs2BCAF198kSNHjqDT6QgLC2P48OFcfvnl5WK3RCIpHZGVXdDVszse4654/7X+Vs3JfvT/Sqz1nwve2lFkjOpXHtk4L6qVIISsXk1Enz7oHA4ADAkJRPTpA1AuoiCE4I033uCGG24AYPjw4YwaNYqJEycCMGHCBP7++2+2bNlCt27dCAoKokuXLvnnP/roo9x+++3odDoWL15Mnz59sNlsAEyZMgWLRV0i+ptvvqFXr1588803522zRCIpQtFavzbY66/W73jkXlwtm55Vrb8qU2UEwfLeexh37TpjGNOffyKczkL7dA4Hkb16EfrhhyWe52rZkoxhw0q1oUaNGvliAHDVVVfxwQcfADBz5kwOHDjA0qVLMZlMfPjhh3Tr1o358+fzyiuvoNPp6NSpU/65bdu2ZcGCBQX508QAICMjA51OfjMokZwv+bX+vEJ/dzzG3UVq/Q3r+dT6m+Ju2axcav1VkSojCGWiiBiUuv888Hq9fPDBB/mFvG9LACA4OJj58+eXeP7ixYu54447Cu3r3bs3mzZtQlEUli9fXu42SyTVFkXx8fBRu3uMu+LRHz6G0L5j8oaH4WrZrKDW36Ip7suaVPta/9lQZQShLDX42tdeiyEhodh+T2wsKatWlas9AwcOJCwsjBdeeOGsz509ezbx8fGsXLmy0P4JEyYAsGrVKkaMGMHSpUvLxVaJpDohsrILPHx2x2PQvHyK1/qbkf3IvbhaNcPdoimeuEsuylr/2VBlBKEs2Pv1KzSGAOANCcHer3wHa4YNG8bBgwdZsmTJWXftLF68mDVr1rBixQpCQvzXTB555BH69u1LamoqNWvWLA+TJZKqR16tP6/QL6nWn9fX36Kp2uUja/3nTLUShLyB40B5GQGMGTOGHTt2sHTpUoKCgs7q3GXLlrFs2TJsNhs1atTI35+VlcXp06eJjY0FYP369URGRhYKI5FUZ0S2A8Pu/eiOniDi922qAOzZj85epNbfUtb6A0m1EgRQRaE8BcCXvXv3Mn36dBo3bsz9998PQP369Vm4cGGp52ZmZtKvXz/i4uJ4/PHHAQgKCmLdunVkZ2fz2muv4XA40Ol0REZGsmTJEoR80CXVDUVBf+yE2te/ax/GXaqbp2+tX5dX63/Yp9bf/FKUsNAKNr76U+0EIZA0b96cBD9jFGXBbDZz7Ngxv8eio6NZt27d+ZgmkVQ68mr9hT184kus9btbNsV8YzuSw4Jlrb+CkIIgkUjOj0K1fs3DZ3c8+kNHi/f1d74HV8tmmodP8Vq/OSoKLrIlNCsTUhAkEkmZEdkODHv25xf6Bs3Tp1Ctv0GcWut/+G7cWuHvqRcja/1VACkIEomkOFqt37A7HuO/BVM5FKr1m8NwtWhSaq1fUnWQgiCRXOTk1/rz5u+Rtf6LFikIEsnFgqKgTzipFvhlqfW30ObwkbX+iwYpCBJJNURkOzDszZuvP77AwycjMz+Mu2E9XC2bqrV+rfD3xF0Cch6tixYpCBJJJUaXmKTOk//nDpLXf1Q8gG+tf1fBEo36g0eK1/ofvKugr79FE1nrlxSjWgnCfVGd2G36t9j+Fs5WrEtef97xB3I9hDwmTZrExIkT+f7777nsssvO22ZJ1USXmIR+6GTqvL8SFHUlLeFw5M/h4ztvf+Fav9bX3/kuWeuXnDXVShCucrZlvzEelyiY3dSomLjKeXW5xB/I9RAAdu7cybZt2/KnsJBcfOS1CMJWfAYeD8LtyT9Wt+nNstYvCShVRhCGW95jt/HM6yE4ceKm8JqkbtzsMv7Dk7UeKfG8Fq6WDMqo2PUQcnNzGTBgADNnzuTRRx8t1RZJ9aTGmwMw/bY9v+D3xd7zVdzatM2eejGy1i8pd6qMIJQFEyaiPbVJ0p9CEQpCEUR7ojFhKve0yns9hAkTJvDwww9Tv379crdVUjXQHU/EG25GKAqKAPT6Qi2EzJ6vVpxxkouCKiMIZanBA5zSJdKhzvXkkksQQXyW/DXR3trlbk95roewdetW/vrrLwYMGFDeZkqqAk4X5nnLME9ZgFAUMnq/TvYj9xK92IbufRt41TEEiSTQVLs2Z21vHR7JegyhCB7OeiwgYpC3HsLs2bPPeT2EpUuX5q+H8Ouvv3LgwAGuu+462rVrx4kTJ3jqqafYtGlTudsuqVwEbfqF2rc9hmX0DHLbX8epjavIfPsVvPVi8EwbQeIvn5H1xIM4WzWraFMlASAxScdtDxg4lVQ5iuIq00I4G7pm9iDeuI9umT3KPe5ArIfQtWtXunbtmr/drl073n//fellVI3RHzuBZegkQr7cgLtRfVKWTSf31huKhfPWjiJjVPku8CSpPEyZbWbL74Ipc8yMGpRR0eZUT0Go7a3DRymflHu8gVoPQXIRkZOLec5SzNMXgRBk9OtC5qtPQ1Dhca7EJB2PvWRg+hgdtaO9FWRsxaMo4PGA2wMet8DjLfjt9oDXo/73Pe7xgDvvt7vgeF5Yj0fgdlPouCdvv/bf4ymIx+0Br1eL0wPuIscLpekBj1eLXwvr1f7n7fN4BR43OHJg3wEjiiJYsSaMHq9nVvi9DqggWK1WPbAVSLDZbPcFMq0LQaDWQyjKb7/9dk5pSCo3Qd/9SMTgCRgOHcNx721kDH4bT+wlfsNOmW1my2+CMVPDebenXS3U3L6Flcjf5/WKQoVk4cLN53degeou2JdfWPkWmG6t0PP6/C4SZ0GBqxV6hQrMAjsLCkn/vwsKzDz7BG5P3YIC31t55krS6RQMetDp1f++v/UGMOgV9DrQG7R9etDri/w2QLDBi0EPe/cXFL9eL5WilRDoFkJ3YDdgCXA6EkmlRX/4GBGDJxL87WZcTRqS/NEsnLe0KzH8jn8NLFsZhlerOa5YE3YBrS3AkF+wKYULNwPodWrhptdrBaHPcZ3PvpAQBb1OKSgw9UXPKfhtMCiEhYXgcjkKHS8oVPPCab916jn6IoWzwVBQePsrnA0GBZ2O/ONqPH4Kb5/jen35evkmJum4/s46KIoqeE5X5WglBEwQrFZrHHAvMBLoWUpwiaT64cghfOYSzLPeR9HrSR/YnayXngCT0X9wh2DO4jAmzwnHq5UJer3C1Vc6eeg+R+FCsWiBadAKxfyCunjNVC3YCo7nFaT+jlfUJw5RUSaSkyu+Lz3QTJltRilS7leGVkIgWwhTgD5AeEkBrFbrq8CrADabjaioqELHHQ5HAM2ruuh0OnQ6XbHrVd0xGAxVI8+Kgvj8Wwy9hyIOH8Pz2P14Rr9LSGxdQvwHZ9VaHf2H6TmaINDpFECtOXo8gh3/mrAtEdQtf4e5SkeVucfnyd//GnC6CneHOV2Cv/4JJSqq/L+bKisBEQSr1XofcMpms/1ptVo7lBTOZrPNA+Zpm0pykaXz3G53IMyr8ni9XrxeL0WvV3UnKiqq0udZ/98RIgZPIHjDz7iaX0r6yrk4b9CmTvFj+45/jQweY+H3bQZaXebCctdmMjbcCM7g/DAOby5XTVzDjnfaX6hsVBhV4R6XB1+uKPhdNM9nk/2YmJhytCpw3yHcCNxvtVoPAR8DHa1W67IApSWRVDgi20H4mJnUvs2K6fe/SB/ck6RvlheIQRFOJenoNSiCex6L4sAhA+OHnuYrWxKGI00KiQEAzmCU34u7pEok5U1AWgg2m60/0B9AayH0ttlsTwciLYmkQlEUgr/cgGXIRAzHE8l++F4yBr6Ft7b/bo9cJyxcambqXDO5uYLXns+i+2t2LOHq3EXff5JKhzrNyRW5+efoFT3PZ3blS1cL4jz1iPPUp4a3BoLK44EjqR5Uy+8QEpN0vNm7BrMnpJXriH0gp79u164dQUFB+R+7vfvuu3To0KHcbJeUP4b9B7EMGk/w5t9wtWxG8syROK9t4zesosD6H4IZNs7CoaMG7uiQw3t90mncoGCuIgWF7aY/MSrGAkFQQI+eWeFTC8UX5g0j1lOPep56xLnrE+eJI85Tn3ruesR56hGuSMc+ydkTcEGw2WwbgY2BTseXKbPN/PanqdxH7AM9/fW8efPk18lVAJGZRfiUBYTNX44SGsLpEX3IfuZh1XfRD3viDQweE8FPvwbR7FIXH85Lof2NuYXC7DD+zSjLUP4I+o1GrkvJFbm4hItggtmY+AvBSgjH9Ec4ZjjGMf0RjuqPkqA/ylHDUX41bSFLl1UovghvJHFuVTBiPfWo566vtS7UvxDF3/C25GKnyrQQ3httYdde/+56vjidsG2HCUURLF0Rxj97jCV5+eXTsrmLYf1LF45ATn8tqQIoCsGfrSdi2GT0J5PIevwB7P274o2q6Td46mnBxBkWPlgRisWsMHxAOs9YszD6PI8ndMeZYBnDp6GfUMsTxYjTY3l2eRBDze+y+GkXTy/VUz/zJxydO9PC3YoW7lbFzULhtEjjqOGoKhr6YxwzqKIRb9jHD8EbyBU5hc6J8kSrrQp3/QLR8NQnzl2PGE9sQGYIllR+qowglJVjx/X5vxXgWIKexg09JZ9wjpT39NdA/nxG11xzDf369SMiIqKcrZacK4a9B4gYOI6gLVtxtr6M1HnjcbVt7TesywUfrAhj0sxw7FmC5x7PomcXOzUjC9Y4yBJZzDPPYkHYHLxC4Q17V17L7ErtT74lok8f3o1wsLchvNsvi4iMPgA4Onf2m55AUEOpSQ1XTf7nuqLYcS9eknVJHNUfIcFwjKP6IxzTH+WY4Sg7TH/xtf4L3KLAo08ogjreutRz1yfWE5cvFHGaaNT1XIIefbF0JFUfofhZiKOCUI4fP15oh91uJzy8xM8YipH39V9ubsFgW3CQwi/fJJb7138DBgzg5MmTLFiw4KxnPJ0hCLmoAAAgAElEQVQ9ezZffPEFK1euzJ/xNCEhgdjYWHJzcxk8eDBZWVlMnz7d7/kHDx5k79693HXXXeedj6pERbgkCnsm4RPnEbboY5TwMDL6diH7qYfUL778sPGnIIaMtRD/n5Gbr89lSN90LmtaUNh68LA6ZCUTLWNJ0p/i/7If5B17f2I9cQDUvvZaDH6mR1EMBpzXX487NhZvTAzu2Fg8MTF4tP+EnHsXkBs3p/SJHNUfKS4a+qOc1J9AEQXlhEExEOOJVcXCtyvKrQpGlDca3Tk6MF4sbqe+nE+eNbfTcvMuqFYthAv19V/e9NdLliw55+mvV6xYkS8GQP6ymUFBQTz33HPntM6CpBxRFEJWf4VlxBR0SalkP/kQ9n5v4q1Zw2/wA4f0DBsXwXebgmlY383i6SnccWsuwudV/dn0I6MihrLHuJs2zrbMTl1AG1db9aDHQ/C6dehLmivL7UZkZRG8cSO6xMRiK6p5atZUxSHvz0csPLGxeGvXLvHzYwNqAR/jiaUd1xc77sTJcX1C/piFb7fUhuDvSNYnFQofpAQT644lzlN8sFt6SFVuqpUg/Pm3ye/Xf1v/Kr/+0EBMf52dnY3b7cZisaAoCmvXrqVVq+J9xZILg+HffUQMHEvQ73/hbNOK1MWTcV3p/35k2AVT5oSzaFkYQUEKA3ul8+LTWYUmLz1g2M8Yy3A2BH9HnLse01Jnc0/O/6mFosdDyOefY548GeP+/SgGgzr7XBE8sbEkf/65uuF0oj95En1Cgvp3/Hj+f8PBgwT99BO6zMxC5ytGI55LLlFFIu9/EQFRSmiNmzDR0NOIhp5G4Cx+3CEcJOgLd0XlDXzvMG3ntO50ofBn8pD6H1ee4c5IAk216jIKNHv37qVjx440btyY4GD146Gzmf76sssuIy4uLj9PedNfHz58mFdeeQWv14vH46Fp06YMHz6cOnXq+I1LdhkFBpFuJ3zCbMKWrMQbEY59QDeyH3/Ab83a44GPV4cydlo4qWk6Hu+cTd+37ERHFTRRU3WpTDNP4sOwDwhRQnkz8y2ez3yRIIJVIfjsM8xTpmDcvx9X8+bYe/RAuFxE9O2LzmfaFm9ICOnjxpU4huA3LxkZBYJRRDT0CQnoT5xAeAqPrXktlsKti6K/69Sh0Ih4GbELe4keUsf0Ry56DynZZVRFCdT01w0aNGD9+vXnY5rkfPB6CVm5DsvIaejS0sl+5mEy3nkDpYb/Qf1f/jAxeEwE/+4xcu1VuSyfm0HrlgVLXOaSy9KwxcwIn0q2yOKJ7Kd5y96LWt5aqhCsXa0KwYEDuJo3J3XOHHLuvbdAeIQgfMwY9MeP44mJwd6v31mJAYBiseC2WHC3aOE/gMeDLjGxQCTyhCIhAUNCAsZt29CnpRWOU6fDW6dOIZFw+wpHTAxKjRoU6icDwpXwMntIpVlS2Zu7R3pIVRBSECQXNcadu4kYMBbTtp042/6PlA9n4L7c/7cgRxP0DJ9g4Yv1IcRe4mb2xFT+786c/PJPQeGb4C8ZaxnJEcNhOuR0pF/GIJq6mxUXgssuI3XuXHLuuadYC8TRuTOOzp0D2yLS6/HGxOCNiaGk1ZpFdnYhofD9bdyxg+BvvkHkFv6ewhsSUnwsw7eVccklEFwwNUdRD6kocxTJ6QV5lh5SFxYpCJKLEpGWjmXsTEKXrcZbqwZpk4fgeORev91DWVmCGQvMzF1iRugUenfN4PUXMgnxmXJoh/EvRlqGsjXod5q5LmNJyofcnNse3G5C1n5C+JQpGP77D1eLFqTOm0fO3XdX3BzTZUQJDcXdpAnuJk38B/B60aWkFBaNvL8TJzDu3o3+1Klip3mio4sNeud7S7VurV4X7dro0FHbW4fa3jq0dV1TLK4zeUj9ZvqFtfrVF8xDqjogBUFyceH1EvrRp4SPnoEuI5OsFx/H3us1lIjiY1VeL6xeF8LoyRZOntLT+b5s+r+dQUzdgnGC4/oEJoSPYW3oamp5ohh5ehyPZj+O3q0Q8ukqVQgOHlSFYP58cu66q9ILQZnR6fBGR+ONjsZ1RfHvHwDIzUV/4oTfcQxDfDxBGzeiy84udMolJlNBy8JXMHwHwMPURYOkh1T5IgVBctFg3P4PEQPHYfrrX3LbtSF9RF/cLZv6Dbtth5H3RkewfYeJKy93MndyKldfWdC5kikymWueyULzPBQU3rB34/XMrphdwYSsWU341KnVVwjOhqAgPA0b4tHm7CqGoiDS0/NbFhHp6Tj27csXDdOWLehPnkR4C/uTeyMji4uFtu2OicFbpw4YDBfUQ+ps5pC6L6oTu03/FuzQZrFu4WzFuuSKG0+UgiCp9uhS0wgfPYPQj9bija5J2vThOB66u9jgJ8CJRB2jp1j45LNQ6kR7mDwyjUfud+SX5R48rApdwaTwcSTrk7g/+yF62/sRm1uXkDVr1BbBoUO4WrYkdcECcu688+IUgrIiBEpkJO7ISNytWuGNisJedNzE7UafmOi/ayohAdMff6A7XbjgVvR6PHXrluwxFRODEhFBCCE0cTelidt/xeBMHlLnM4fUVc627DfG4xIFKmVUTFzl9D9d+oVCCoKk+uLxELpsNZZxsxCZWWS9+hT2t19BCTcXC+rIgXnvm5k+34zXI+j2qp1uL2cSFlbQ//yzaTMjI4ax17ibtrnXMC91MVc4WhOyWmsRHDqEq1UrUhcuJKdTJykE5YXBkF+Qc03xcQQAkZlZ4gC4aft29F98gXAVHj73hoWVPPitDYCHm8ruIXU2c0hFuWvjpbDbrx4d3TJ7nOfFOj+kIEiqJcatO4h4dwymf/aSe8PVpI/si7tZ42LhFAW+WB/M8AkWjh03cM/tDgb2zqBBvYKXdb8hnjGW4fwQ/D313PWZkTqXuzLvJPST1YRP61ogBIsWqULgp+UhCSyK2Yy7WTPczZr5D+D1oktO9jv4rU9IwLhzJ/oiLRNFCLzR0cVbFz7bNWrVOqc5pPaZ9uCloBvMqJh4OOsxor0Vu05qtRYEXWIS5ikLMP25g+T1H513fIFcDyEnJ4chQ4bw448/EhwcTNu2bRk3btx523yxoUtKwTJqOqG2z/HUrU3q7NHk/N8dfgvpf3ar01L/ujWIFs1c2BYlc2O7giZ8ii6F6eGT+DB0KSFKKP3SB/Hs6aeJXP0F4dM6YDh0COfll0shqArodHhr18ZbuzauNv7XrMDhKDwA7vttxp49BH3/PbqcwjV+JTi44MvvIqLhjomhTmwstUOu8eshlfrNAm56YjC5waDPcfKOrRlU8Lem1VIQ8oQgbMXnoHgRzpI8rc+OQK6HMHLkSIKCgvjpp58QQpCUlFTcAEnJuN2Evb+S8AlzEI4c7F2eI7P7yyhhocWCJqfoGDctnA8/CaVGpJcxg0/z5MPZ+fPV5ZLLB2GLmal9WPZk9jN0S+1G3MqNhE/rhOHwYZytW5OyeDG5d/gXG0kVJCQET+PGeBoXb0kC6gB4Wpo6RYifrqmgzZv9zzNVo0YxTyn90aNc8tFHPOuAha/Bs4ug6TsjSB9nOeuPEMuTKiMIlvcmYNy178yBnC70x46jP5UCUOjG1Hrk1RJPc7VsRsaw3qXaEKj1ELKysli1ahVbt25FaIVLdHR0qfZIVEy/biNi4FiMu/eT0/460oe9g6dJw2LhnE5Y9GEYU2aH48gRvPxMFm+/YSfCoj4nCgpfBa9jnGUURw1HuDXnNvql9Kf1ir8Jn/oQhiNHpBBczAiBUrMm7po1cV9+uf8wLpc6AO5n8Ntw5AhBv/yCLqNgos3+w2F3K/W/zuEgfMwYKQjlhXHffwh75gXxEi7P9RAOHTpEjRo1mDRpElu2bCEsLIw+ffpw7bXXBi4D1QBdYhKWEVMJXf0V7ti6pM4fT87dtxYrqBUFvtsUxNBxERw8bKDjLTkMfieDJo0LvnD927idERFD2GbaSnNXC95PXEqnZYmYp72oCsH//kfKsGHk3n67FAJJyRiNeOLi8MTFlRhEZGRQt2VLhKJwyUn4tkPBMX2R+dwuNFVGEMpSg9edSta6ij4Db+GuopRV88rVnoEDBxIWFnZO01TPnj2b+Ph4Vq5cCYDH4+Hw4cNcfvnlDBo0iG3btvH888/z888/V6rJ/SoNLhdhi1YQPmkewunE3v0lMru9gOJnTYB9+w0MGWth05ZgLm3kYumcFDreXDDdwnF9AuPDR/NZ6BqiPNGMShnDs0sEEVMHYDh6FOcVV5AyfDi5t90mhUBSLigWC56YGL/rXnhiYirAogKqjCCUBW/tKDJG9SOzx8slCkN5UN7rIcTFxWEwGHjwwQcBtSuqZs2a/Pfff1xR0hegFymmn/8gYuA4jPv+I6fjjWr3UKN6xcKlnRZMnh3Oko/CMIcpDO2XznOPFyxfmSkymWOewSKz2op783QX3l5Sh7qTZqpCcOWVpIwYIYVAEhDs/foR0adPsVlt7f36VaBV1UwQ8igqDKatf5db3IFYD6FmzZrccMMNbN68mfbt23PgwAGSk5PzPZAkoDueiL7HEKJWfo67fiwpiyeTe8fNxQprtxuW2UIZP8NChl3w9KPZvNPNTs0aqoufBw8rQz9mUvg4UvTJPJj5IAOXtaT5yA8wHDuGs00bUkaOJLdjRykEkoCRN05wvrPaljdyPYSzIFDrIQAcPnyYXr16kZaWhsFgoG/fvnTs2NFvXBfVeghOF+YFH2KePB/h9WLv8jyZbzxLoZnlNDb/YmLImAj27jdyw7W5DO2XTsvmBeMEPwZtYrRlOHuNu7k652qG267jpgFrMCQk4GzTBnvPnuTeWnwMoqKQy0leHMj1EKoogVoPAdQ1EVatWnWuplVLgjb/imXgOIwHDuPo1B79tBFkhhd3Iz14WJ2W+psNITSo52bB1FTuuq1gWup4wz5GW4azKXgD9V31mb/mKR7r9gPGhBlqi2DsWHI7dKg0QiCRVBRSECSVDn3CCSxDJhHy5QbcDeuR8sFUcm+7iaioKPCpSdkzBdPmmpn/gRmTSaF/jwxefjaTYK0nL0WXwtTwCXwcupxQbyiDN9xDt5e2E3ZouSoE48aT2769FAKJREMKgqTykOvEPGcp5mlqF1xGnzfJfO1p8kt4Da8XbJ+GMGaKhaQUPdYHs+nXI4M60eo4QS45LDEvYpZ5Gg6RzXN/XsOg5w9S998vcV51FSkjJ0ghkEj8IAWhClKJxn3KjaDvfyLivQkYDh3Fcc9tZAx+G0/cJcXC/f6niffGWNi5y8TVVzpZMjOVK1urXmQKCl8Gf844yyiOGY5yR/xljHkphZY//oqzbVtSPpxM7i23SCGQSEqgUguCEAKn04nJJNdIBVUITp8+TWpqakWbUm7ojyRgGTyRkPWbcF3agJQPZ5DbvvhCJwnH9fQYoGfl2iguqeth5rg0HrjHkV+2/2XcxoiIIWw3/UnLxLp83q0mt6/cg/Pqq0n5aBq5Nxf3SJJIJIWp1IIQFhZGeno6R44cQa8veR1UnU6Ht8gCGtUNRVHweDwcPXqU9PT0SuV9dU44cjDP/oDwmUtQdDoy3n2LzJefBJOxULDsbMGsRWZmLzKDgJ5v2nnjhUxCQ9VWUoL+GOPDR/N56KfUzjQzs5+F56aexN32GpI/molTCoFEUmYqtSAIIYiIiGDPnj38999/GAyG/Ll+fAkJCcHh84FHdSZPGNq3b1/RppwbikLQt5uJGDwRw5EEHPd3In1QD7wxdYoG49MvQhg52cKJk3oeuDubCSMMhAbbAXXhkrnmGSwMm4fOo9B3Uhi938vE2PIa0j7shfOmm6QQSCRnSaUWBFBFoV27dtSrV4/Tp0/7bQlERESQnp5eAdZdePR6PXXq1KFZs2ZVzl9bf/AoEe+NJ3jDz7iaNSbZNgfnjcWnBf5rp5HBYyLY+peJ1i2dzBqXxrVtnURFRXEy2Y0t9COmmMeTYkjhiZXBDO2ZQ+2YNtgX9iRDCoFEcs5UekEAVRRiY2OJjY31e/xi/JilKiEcDszTFmOe8wGKyUT6e2+T9eJj5M8joZGYpGP0ZAsr14YSXcvDxOFpWB8sWL7yO7GeflE9iTfFc8OvRsZ1hcsNV2Cf2JOUG2+UQiCRnCdVQhAkVRRFIfjLDViGTsKQcJLszveQMfAtvHUKT+2dkwvzPzAzfZ4Zl0vQ5SU73V7NJNysjhPsM+xltHkIm42baXxQz0e94K4TV5HZtxcpN9wghUAiKSekIEgCgn7/ISIGjSN482+4WjQhefoCnO0Kr1SlKPD198EMG2/hyDEDd3Z0MKh3Bo0aqMtXJuuSmRoylo/DPyLcDmN6wgt/Xo2zW29SfdalkEgk5YMUBEm5IrKyMU9dgHnecpTgINKH9SbruUfBUPhR27VXXb5yy+9BXNbUxccLk7n5OnX5ylxyWBI0h1nmqTj0Tl6bAb2/upronqNI6l18sXOJRFI+SEGQlA+KQvBn3xIxbDL6k6fItv4fGQO64Y2uVShYSqqO8TPCWb4yFEu4wsiBp3n60WwMBvXDsnX6VUwIGsyxyHTu/QwGf3Qllzz8Ls6FN6AUmbpCIpGUL1IQJOeNYd9/RAwcR9DPf+C8vDmpc8bguqbwOg4uFyz5KIxJs8LJyha88KS6fGWNSHWcYLv3Z0YrPfkz5hj/+wtmLmzFVR2G4px4PU5/iUokknInIIJgtVqDgc1AkJbGKpvNNjgQaUkqDmHPJHzSPMIWfYxiDuP0qH5kP90ZinxEuOHHIIaMtXDgoJEON+YwuE8GzZqo01InuOKZmNGFta3/pe4JmDGqCfddNgpP/xulEEgkF5hAtRBygY42my3TarUagZ+sVutXNpvt1wClJ7mQKAoha77CMmIqulMpZD/5IPZ+XfDWrFEo2P7/DAwdZ2HDj8E0auDm/Vkp3HZLLkKAPSeRBQldmX/NFnTR0GdxPV6sNQbj8x3wVFC2JJKLnYAIgs1mU4BMbdOo/VW/GdkuQgy749XuoV+34byyFakLJ+Jqc3mhMKfTC5avDAlWeO+ddF54MguTCTzZGazZ1Yvx139F8s0Kj39Vm56MoNad91ZQjiQSSR4BWzHNarXqgT+BJsBMm83W10+YV4FXAWw2W1un89w6CQwGA263u/SA1YgLnufT6eiHTUY35wOItOAZ3hfvC4+Bz5rSHg8sXKZjyBg9qWnw4tNehvT1UDsayMri+/U9efeKpey+zMNNf0cywjWaNv97scwmXGz3+WLLL8g8ny3axJ/l9iFOwJfQtFqtkcAaoJvNZvvnDEGLLaFZVi7GL5UvWJ69XkJWrsMyajq6lDSyn3mYjD5votSIKBTs599MDB4Twe59Rq6/JpchfdO5vIUbkZ3N4a/GM7LlYr67zUWjY8H0P96DjnFdEWf5HF9s9/liyy/IPJ8tVW4JTZvNdtpqtW4E7gLOJAiSSobhnz1EDhiL6c8dOK9qTeqyabhatygU5vBRPSMmWPjyuxDqxbqZOymVezvloMvOIuv9mUyuO4clb+YSnm3gvZ0v8kStQZji5HTmEkllJFBeRtGASxODEOB2YGwg0pKUPyItHcv42YQu/QRvjQjSJg3G8eh9hbqHMrME0+ebmbfEjMGg0OetDF59LpNQTxa6efNZYpjBxO4OHKGC5w7fR5eQ0dSoVbMCcyWRSEojUC2ES4D3tXEEHWCz2WzrApSWpLzwegn9eC3ho2egO51B1vOPYu/9BkpEuG8QVn0WwujJFk4l63n4/mz698ggxmwndOFivkycxuBBWRxtAJ1OtKOPYzyNgi+VLgUSSRUgUF5GO4A2pQaUVBqMf+8i4t0xmLb/S+61V5I+oi/uVs0Khflju5EhYyL46x8Tbf7nZOH0VNo2TiNsyRJ2bptB/yF2/rgOWqU3YlnyWK5XbkT6kEokVYezFgSr1So0t1JJNUCXmkb4mJmEfvgp3uiapE0bjqPz3YVmED1+UseoSRbWfBFK3doepo1Jo3P7JMLfX0xy35m8M8DOJ6Ogdk5NxqYN5CHHI+gpeYU7iURSOTmXFsIk4O3yNkRygfF4CF2+BsvYWQh7JlmvPIm956so4eb8IA6HYM7iMGYsNKN4Bd1fs9P1sRPUXrEIz6g5jH0znRl/CHT6IN6yd+HlzNcJU8IqMFMSieR8OGtBsNlsUgyqOMatO4gYOBbTzj3kXt+W9JF9cTe/NP+4osBnXwczYoKF4ycN3Heng0GvHeeyr+cRfMdcFlvTGf63kZRI6Jz9CL1S+1LXe0kF5kgikZQHutICWK3WhT6/hdVqXRBYkySBQpecSmTPoUQ/8AL6pBRSZ40iZeXcQmKw418jnZ+txZu9a1IjUmH17CMsbzCEtp3bsmXnOK7900WPWdAk5Go+TfqK8aenSDGQSKoJZWkhNM77YbPZFKvVeumZAksqIW43YR+sInz8bES2A/ubz5HZ42WUsND8IKeSdIydFs6KNaHUrOFlfL8TvJQ0hYg35rEr7jR9vqvJD22hgbs2s1MHcUfOnWf9YZlEIqnclEUQkq1W68vAFuB6ICWwJknKE9Pv24kYMBbj7nhyb25H+oh3cDdplH881wkLl5qZOtdMbq7g1cdTedc4hdjR0zkVnM6by2NZencG4YqXgelDeSrrWUzID8skkupIWQThOdT5hroAe4FnA2qRpFzQJSZhGTGN0NVf4o6pQ+q8ceTc0zHfe0hRYP0PwQwbZ+HQUQN33GRn7CWzuOLD0eTmpjN6VhMmP+MkV5/Is1kv0M3+NpFKjVJSlUgkVZmyCEIucBLVo3w26vcFfwTSKMl54HIRtthG+MS5CKcTe7cXyXzrRZTQkPwge+INDBkbwY+/BNGsUS6r71zEA1/3gYx0lo9qzZDuJzkesp87HHfRN2UAjTyyl1AiuRgodVAZWAZEAk/YbDY3MDqwJknOFdOWrUTf+RQRQyfhvOYKTn1vw96vS74YpJ4WvDsigjs6R7PzHwPjrlvDjgMxPLTiNTa92JwbU5vzar+d1NDXYXnySuakLZRiIJFcRJRFEKJtNtscICfQxkjODd2JU0S+OYCoR19DZDtIXTSR1KXT8DSuD6jLVy5cFsbNd9dhqS2Ul5r/zL70S3nn68781/lyHjlxM/dM+p0T4emMS5vMp8lfcZ3zhgrOlUQiudCUpcvolNVqfQwIsVqtDwEnAmyTpIzojp2gxpv9MW37B0xG7G+/gr3L8xASnB9m40/q8pXx/xm5NWYvU9Ke5X+//M7JR26n+/hrWNLgM/SKnu4ZvXg563VCldCSE5RIJNWasgjCi8DLwDYgDngloBZJSkWXmIT+lT7U+WoDQlvPIvGHlXgaxOWHOXBIz7BxEXy3KZhG4Ums8XTlgV027P93F+PHdmVys+Wc1p2ms+NRemb0kd8SSCSSMnUZXWaz2WYAQ4EQoGFALZKUiC4xiYju71Hn6nvQf/l9vhgA+WKQYRcMG2/htgei+W0LjPUOZPfRODrdbOfjbSO44ZP9vNdiBs3cl/Fp8leMOz1ZioFEIgHKJggTtf/DgKPA4sCZIykRr5eoh18ldNUXCK+32GGPB5avDOWmu6KZtySUZ5xL2ZfckG43befXX6Zz/+pMXmwzEI/wMCd1EctTVnK5q3UFZEQikVRWyiIIoVarNQgIstlsHwHntvCx5JzRHzhMLevrGA4ewVMnCiXIhGIq/HHY3Q/XoM+QSFqk/c5W+9XMuHklyZum87otkrvbvc4/xp0MSh/GV6c2yK+MJRKJX8oyhrAMWAsMtlqtwcDBwJokycflwjx7KeFT5qMEmTg9YRCKyUP2iAX8nnoF9/MdHqHHpLix70vi4+x3+L9O2ST1Gs7wtpuYa34ep3DyQtbLdLF3lx+WSSSSM1KqINhstpnATJ9dzwfMGkk+xr/+JbL3cIy743Hcexvpw98h6OcfiejTl3eYyqKwl6jrPcng3Enc7/qK7Tf2xNnrdZa02c14yyuc1J+gk+Nu+mQMoJGncekJSiSSi55ALaEpOUdEtoPw8bMJW/AR3tq1SF04gZy7bgXAPHoMsz3PszDkZRCCU7raXO3eQO3oZNa9/xQjLb3ZafqbVs7WTEqbTjvn9RWcG4lEUpUocQzBarXK/oULTNCmX4juaMU8bznZTz3EqR9W5ovBv7v11Lr0Q7qGzgKhehd5gty0e/slYvac4vGozpzSJzI+bQqfJn8pxUAikZw1Z2ohjNZEIR5YD2zRpq6QlDMi9TQRQycRuuoL3I0bkLx6Ac526pLUaacF40frWLouCmOIAQxOcGsDys5gPMtfwN5jOD30z/BS1mvywzKJRHLOlCgINpvtdQCr1doUuAN41Wq1CuA3YLXNZjt2YUysxigKIWu/wfLeBHTpGdi7v4T9rZcgOEh1I10RxLgJIaTnBPGmZy6J0Zey8mT7wnF4ddzZZQ3dZjeomDxIJJJqQ1kGleNRWwmzrFarHmgHXAJIQTgP9AkniOg3muANP+Ns04qU8bNxt2gKwB/bTAx618DOI5G0d29k/FUrqD/hRa7v3hScwYUjcgYTf6otkHzhMyGRSKoVZzWobLPZPKgL5UjOFY+HsPdXEj56BigK6UN7kfXCY6DXk5ikY+SoYD5ZH0mc9yjLg/vTaVI7jt39Kt0tIzixZTX13XVI06fiEi6ClWA2Jv5CtLd2RedKIpFUA8ryYZqknDDsPUDUgy8RMWg8zmuvJOmHlWS9/CROj57ZC8O4pVNNPl8fQv/c0Wx9bCI3/dSN2Z0PckftW/gqZB1v2t/i+6SfsGY9gU7R8XDWY1IMJBJJuVFqC8Fqtd5ls9m+tlqtTYC3gRU2m21z4E2rRuQ6CZ++CPOMxXjNYaRNH47jobtBCDb+FMTgYSHsTwjlPtfnjL10EVGTuvLTFWkMiehMvHEv7XM6Mih9aP73BF0ze3Aw9D+6Zfao4IxJJJLqRFm6jHoDXwMDgPnANOCaQBpVnTD98RcR74zAGH+Q7M73kJUKUAMAABJwSURBVDGkJ95aNTh8VM/Q0Wa+2RRGE288n+ve5aYh1/LfkwPoEjmSdaFriXPXY07qIm7P6VRoqona3jp85f6eZK8cN5BIJOVHWQQh3Gq11gc8NpvtF6vVmhVoo6oDwp6JZfQMwt5fiTu2LinLppN76w04HIIZ083MXhCKwZ3DaEc/XrvvCPYh/ZhSbw0zwm/FJdx0s7/N6/YuBBNSemISiURSDpRFEEYDI4AR2lxGvwbWpKpP0Lebiew/Bt3JU2S+9AT2vm/iDQ1l3TfBDBtt5niSiSedyxlZZzbm+T34rkMbhkY8xgHjfjrm3M7A9KE08DSs6GxIJJKLjLIIQiOgn81mO65t9wugPVUaXVIKEe9NIOSz9bguu5TUeWNxXdWavfsNDBpp4effg7lC2cFHzp5c+dbV7Osyjl5Ro/kyZB313Q2Yn7KEjrl3VHQ2JBLJRUpZBOE/YJzVao0APgdW2Wy21MCaVcVQFEJsnxMxbDIi20HGO2+Q+eZzpOeYmDg6nCUfhmJR7MzM7smzN+wjdfQgJrT8hhnm2/EKhR4ZvXk18w2CCC49LYlEIgkQZfkwbS2w1mq11gTmABOsVus3wFSbzfZToA2s7OgPHSWy7yiCfvqd3GuvJH38QJyNG2H7NITRk8JJSdPxSu58hlmmYRzTnS8638TQyBc5aPiP2x13MjBjCPU89Ss6GxKJRFImt9O7gceBGqgthFcBgbpGwi0Bta4y43YTtuBDwsfPAYOB06P7k/10Z7b/E8SgJyPYvtPE9eJ3vs58k+bPXcme/tMZETORb0K+pIG7IQtTltIht2NF50IikUjyKUuXUWtggM1mS/DdabVaXwmMSZUfwz97iOw9HNPOPTg6tSd9VD9OmeoyenA4H68Oo64hhQ+yemBtsZNT44cx6rqfmG2+F4BeGX15KfM1ggiq4FxIJBJJYcoiCCuAQVar1Qw8B7xgs9kW2Gy2vYE1rRLiyCF88jzMc5b9f3t3Hl1Vee5x/JsJQiCMATVOOIviRShVKr04UaVUry7URwYVcQCXoFKRUSBAQHCioCKDRYQFgo8oV68i1aqIiOJQaEGxrWIFZIxMkcGE5Nw/zsYiVYYkO5uT/D5rncWZ9rufV7Pyy/vuvd9Ncd3abJ74IPmXXcrU52rw6BM12LkDehWOZmDRozCkO7Nvb01unXv4OvVftNnVlvu3DyG76NioeyEi8pMOZemKycAYIDtYy6hDuCUdmaq89xENWrcnc9xUdtoVbJw/mz9nteXy6xqQM6oW5+9+j2XbzmZY6wX88/2J3Pj7d7it/s2kxFJ45ttnGbflKYWBiBzRDmWEkOLun5vZ3teVav2jpK3bqTl8DNVnvsSehseR99x4vjr5VwwdWpNXX6/GiekbmLOjG7/L/isbxg1hWJtlTMi8jpRYMn22D6DLd7dThSpRd0NE5KAOJRDeMrMJQLaZjQXeONgGZnY8MA04GigGJrn72FJVWt5iMdJffZNaAx8iefNW8rt3Jq97V558Nosn7qlB0p4ihhYP577ND1PU41Zm9W5Lbv3BrE5dRdtdVzJg22COKc6OuhciIofsoH/tu3su8AQwiPj00ZOH0O4eoJe7NwJaAN3N7KzSFFqektdtpM6tvajbrS9FR9dn06vTmN2iDxfZCTzyRE3aJr/Jim9Ppe8581j+7gSuf+BvdD2mG1VjVZmWN4vHt0xQGIhIwvnZEYKZPQ90cvcCd18OLDezM4jfTvO8AzXq7uuAdcHzfDNbARwLfFZmlYehuJiM6S9S84HHoHAP2wbdw18vvZGch+sx/710zqy5ljd2dObi1KWsf7I/AzuuYlJmF9JiqfTbNojOO27R9JCIJKwDTRnNAuaa2bXuvtXMLgdygZsOZwdm1hBoSvzWm/t/1pX4dQ24O1lZWYfT9A9SU1NLvO0P/v4lqXf2I3nhhxRf3JJtD49k+JyTefy6ZKqlFDK6Sg7dV48k+bbOvPRQJ/rXGsKqpK+5tuh6hheN4phq2ZTnOnRl0ucEU9n6XNn6C+pz1JJisdjPfmhmFwAPA38CLgDau/vWQ208OFX1HWCEu794kK/H1q5de5Cv/LSsrCzy8kq4FHRBITXGTyNzzFPEMqqxbdDvmZ5hjBhdiw2bUrip3jweWtmZuo2yWPL43Qy64DkWpL/NaYVnMGTbcFoUXFCy/ZZSqfqcoCpbnytbf0F9PlzZ2dnAPmvjl9KBpoxygRjwDXAvMB6418xw98EHa9jM0oAXgBmHEAaRSFuynNq9c0lb8QW7rvwNi2/sR5/xp/DRkqo0a7CaOd934vx1n7B+2N30vSufyTXvpkqsKgO3DeGGHTeTRlrUXRARKTMHmjL6c/Dvm8C4w2nUzJKIH4Be4e6jS1hbaJJ27CTzofFUnzyT4qPq8/Xjf2DQ8iuZ3i2DejUKmJh5P7f9YyS7f3s5Ux8bTG7Dx1iXupard15Dv+0DddtKEamQfjYQ3P2dUrTbErgRWGZmS4P3Brj73FK0WSaqvr2IWv0eIHXNOvJvuo6Jp/dhxMPZ5O9I4o7j5/HAsg7UODaTj18Ywf1t57EwvR9nFjZiTN44mhcc8Fi6iEhCO5TrEA5bsApqmc1rlYXkzVuomTOajBfnUnhqQ94ZNY0751zEZy+n8d8NV/HE9vac89lHrO/ZhSGDkni6dg7psWoM3pZLpx03kRrOfyoRkSNGxf8tF4tRbc5r1Mx5lOT871h7e1d65vfk+VF1ODZrNzOyc+iwdCTf/7I5U57uT26jp1ifsp5rdhp9tg8gq7h+1D0QESkXFToQUtaso1a/B0h/exG7m57DxF89wIA5zSgqgt6NX2PwB+3JqJHMh1N607/9It5Pz+XsgsY8vnkizQqbR12+iEi5qpiBUFRE9SnPkflg/KLqDzv3p+OSbnz5bFXaNF7N2JXtOX3hIjZ0bkf/RzJ5pt4fyIhVZ+jWEXTYeSMppETcARGR8lfhAiF1xT+p3TuXKks+ZcsFv6Z79VHMfOkUTjluN//baDBXLcyl4LRTeWpxT4b/YiYbUzZgOzpyX34/6hXXi7p8EZHIVJhASF71DXW79iFt2ecU16nNM1eMptsH15NWBYb9+nV6v309VYp2s3jMbfTttozF6WM4p6AJEzZPpklh06jLFxGJXMIHQvKGTaTc0Z+jXnmDpOCq6yb1FvDpwvpc23INj3xxAye+8g4brmzFsInHMPXoKWTGMhm+9UFsZwdND4mIBBI2EJI3bCLzkQlkzHqZpOLiH39YpwZvnJHLpXNzKDqqAZPe6kJuq1f4Nvld2u/sRK/tfakTqxtN4SIiR6iEDYQ6dw6gyuIlP4wK9vWXlQ1J+ySPxTlX07v3Kj7OmMK5BU15avNU/quwSQTViogc+RL27mdbxo9kwy9asJN0du+35PTWRg24c+WVXDr4JVZW/YqRWx/h+byXFQYiIgeQsIFQdeEChn7ehtMy32NylQ7sJJ3C4HhAs/nrmXbC/9Fp5028sXEBtrMDyYnbVRGRcpGwU0aXNH6QpTX+Abur0SNjFLnp9zKo8CFanjydE4pOZMq3Mzh7T+OoyxQRSRgJGwh7RvaH4n8vl7QhuQE9ModzfOOTWJR3lUYEIiKHKWF/axYvbQUF6T9+syCdmm9dqDAQESmBhP3NuXDQfHo8mEr1TUmkb0mi5rok7hqVyruD50ddmohIQkrYQNjVrh3dU4eStif+OjkG3dOGsatdu2gLExFJUAkbCAA123Thmj03kRxL5pqizmS2uTnqkkREElbCHlTeq8d3PfkqYyV3fdcz6lJERBJawgdCg+KjeG3Pm+QV50VdiohIQkvoKSMRESk7CgQREQEUCCIiElAgiIgIoEAQEZGAAkFERAAFgoiIBBQIIiICKBBERCSgQBAREUCBICIiAQWCiIgACgQREQkoEEREBFAgiIhIQIEgIiKAAkFERAIKBBERAUK6haaZPQ1cAWx098Zh7ENERMpWWCOEZ4A2IbUtIiIhCCUQ3H0BsDmMtkVEJByhTBkdKjPrCnQFcHeysrJK1E5qamqJt01U6nPFV9n6C+pz1CINBHefBEwKXsby8vJK1E5WVhYl3TZRqc8VX2XrL6jPhys7O7tMa9FZRiIiAigQREQkEEogmNlM4H3gDDNbY2a3hrEfEREpO6EcQ3D3DmG0KyIi4dGUkYiIAAoEEREJKBBERARQIIiISECBICIigAJBREQCCgQREQEUCCIiElAgiIgIoEAQEZGAAkFERAAFgoiIBBQIIiICKBBERCSgQBAREUCBICIiAQWCiIgACgQREQkoEEREBFAgiIhIQIEgIiKAAkFERAIKBBERARQIIiISUCCIiAigQBARkYACQUREAAWCiIgEFAgiIgIoEEREJKBAEBERQIEgIiIBBYKIiAAKBBERCSgQREQEUCCIiEggNayGzawNMBZIAf7o7qPC2peIiJReKCMEM0sBxgG/Bc4COpjZWWHsS0REykZYU0bnAV+4+0p3LwBmAVeFtC8RESkDYU0ZHQus3uf1GuD8/b9kZl2BrgDuTnZ2dol3WJptE5X6XPFVtv6C+hylsEYIST/xXmz/N9x9krs3d/fmwTYlepjZJ6XZPhEf6nPFf1S2/qrPJX6UmbACYQ1w/D6vjwPWhrQvEREpA2FNGX0EnGZmJwHfAO2BjiHtS0REykAoIwR33wP0AP4ErIi/5Z+Gsa/ApBDbPlKpzxVfZesvqM+RSorF/mNqX0REKiFdqSwiIoACQUREAqEtXVEezOxp4Apgo7s3jrqesJnZ8cA04GigGJjk7mOjrSpcZpYOLACqEv95ne3uOdFWVT6CK/4/Br5x9yuiridsZvYvIB8oAvYEp6NXaGZWG/gj0Jj4qfm3uPv7UdWT6COEZ4A2URdRjvYAvdy9EdAC6F4JlgT5HrjE3ZsA5wJtzKxFxDWVl3uIn5RRmVzs7udWhjAIjAXmufuZQBMi/v+d0CMEd19gZg2jrqO8uPs6YF3wPN/MVhC/KvyzSAsLkbvHgO+Cl2nBo8KfCWFmxwG/A0YA90ZcjoTAzGoCrYCbAYJlfgqirCmhA6EyC4KwKbA44lJCF0ydfAKcCoxz9wrfZ2AM0AfIjLqQchQDXjezGDDR3Y+Y0zFDcjKwCZhiZk2I/4zf4+47oioo0aeMKiUzqwG8APR09+1R1xM2dy9y93OJX/F+nplV6ONFZrb3uNgnUddSzlq6ezPiqyR3N7NWURcUslSgGTDe3ZsCO4B+URakQEgwZpZGPAxmuPuLUddTntx9KzCfin/cqCXwP8FB1lnAJWY2PdqSwufua4N/NwJziK+aXJGtAdbsM+KdTTwgIqNASCBmlgRMBla4++io6ykPZlY/OBMDM6sGtAY+j7aqcLl7f3c/zt0bEl/25S13vyHiskJlZtXNLHPvc+AyYHm0VYXL3dcDq83sjOCtS4n4eGBCH0Mws5nARUCWma0Bctx9crRVhaolcCOwzMyWBu8NcPe5EdYUtmOAqcFxhGTiy6C8EnFNUvaOAuaYGcR/Lz3r7vOiLalc3AXMMLMqwEqgS5TFaOkKEREBNGUkIiIBBYKIiAAKBBERCSgQREQEUCCIiEggoU87FdmfmV0I5BD/Y6cIGOTui8xsG/AX4msh3QJkA63dfWCw3RBgvrvP36etDOJLSJwebDfJ3aeWorbaxBfqq1QXFEri0AhBKgwzywKGAle7+0XA1cCu4ONl7n4x0Iv4GkGHIgd4J2jr18BXpSyxNtCulG2IhEYjBKlI2gLT967v5O75wJL9vrOU+JpIh+ICd+8btBUjfl8GzOwx4ktxbwc6EV9ksLW7DzSzm4Nt5wNPA5uBk4CrgK7Ab8xsPnCdu286/C6KhEeBIBVJNrAMwMw6AncCH7j7fft8pxXw95LuwMx+CVR391ZmdgNwBz+/4mwd4kttdACuIX4z9RMq+jIUkrg0ZSQVyTrioYC7PwvcAGQFn51jZm8TD4lRwG7id2HbK51/Ty8dyCnEj0VA/G5mp/Lj+zMk7fP8M3cvBr4hPl0kckTTCEEqkrnAbDNzd9/Gj3++9x5DAMDMCoCmZrb3j6JmwEP7tbfIzDq5+4xgYcGWxNebuSz4vDnwJbCN+JpLAOcAfwue7x8UhUBKaTooEiaNEKTCCObkhwIvmdlbwJPE70H9U9/9lvgy4guAd4nfq3nzfl8bClwYzPm/B5zi7h8Cu8zsXaAjMIF4AGSb2Vyg/gFKXA/UNbPZZla3hN0UCY0WtxMREUAjBBERCSgQREQEUCCIiEhAgSAiIoACQUREAgoEEREBFAgiIhL4fyNt3oajE1qJAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Unmanaged Memory Plots\")\n", + "system = \"summit\"\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_nomanaged\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_nomanaged\")\n", + " \n", + "print(\"Managed Memory Plots\")\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_managed\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_managed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/duplicate_keys.sh b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/duplicate_keys.sh new file mode 100755 index 0000000..bed00b3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/duplicate_keys.sh @@ -0,0 +1,54 @@ +keycount=26 +gpucount=8 +tablesizes=($(seq 21 1 31)) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +rm $resultsfile +echo "keycount,tablesize,gpucount,time" >> $resultsfile +echo "duplicate_keys" +echo "keycount,tablesize,gpucount,time" +echo "build tests" +echo "build tests" >> $resultsfile + +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + echo "tableSize: ${ts}" + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${ts},${gc},${time}" >> $resultsfile + done + +echo "intersect tests" +echo "intersect tests" >> $resultsfile + +keycount=$((echo $keycount - 1) | bc) +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + echo "tableSize: ${ts}" + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${ts},${gc},${time}" >> $resultsfile + done diff --git a/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_index_nomanaged.txt new file mode 100644 index 0000000..086aa08 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_index_nomanaged.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,4, +67108864,4194304,4, +67108864,8388608,4, +67108864,16777216,4, +67108864,33554432,4, +67108864,67108864,4, +67108864,134217728,4, +67108864,268435456,4, +67108864,536870912,4, +67108864,1073741824,4, +67108864,2147483648,4, +intersect tests +33554432,2097152,4, +33554432,4194304,4, +33554432,8388608,4, +33554432,16777216,4, +33554432,33554432,4, +33554432,67108864,4, +33554432,134217728,4, +33554432,268435456,4, +33554432,536870912,4, +33554432,1073741824,4, +33554432,2147483648,4, diff --git a/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt new file mode 100644 index 0000000..086aa08 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,4, +67108864,4194304,4, +67108864,8388608,4, +67108864,16777216,4, +67108864,33554432,4, +67108864,67108864,4, +67108864,134217728,4, +67108864,268435456,4, +67108864,536870912,4, +67108864,1073741824,4, +67108864,2147483648,4, +intersect tests +33554432,2097152,4, +33554432,4194304,4, +33554432,8388608,4, +33554432,16777216,4, +33554432,33554432,4, +33554432,67108864,4, +33554432,134217728,4, +33554432,268435456,4, +33554432,536870912,4, +33554432,1073741824,4, +33554432,2147483648,4, diff --git a/hash-graph-dehornetify/experiments/dgx1/experiments.lsf b/hash-graph-dehornetify/experiments/dgx1/experiments.lsf new file mode 100755 index 0000000..39c88e9 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/experiments.lsf @@ -0,0 +1,91 @@ +#!/bin/bash +#BSUB -P BIF115 +#BSUB -W 2:00 +#BSUB -nnodes 1 +#BSUB -alloc_flags gpumps +#BSUB -J snmg-hg-experiments +#BSUB -o snmg-hg-experiments.%J +#BSUB -e snmg-hg-experiments.%J + +buildpath="../../build" +includepath="../../include" + +declare -a modes=("noindex_nomanaged" "index_nomanaged" "noindex_managed" "index_managed") +# declare -a exps=("strong_scaling" "weak_scaling" "duplicate_keys") +declare -a exps=("strong_scaling" "weak_scaling") + +# 64-bit section +sed -i 's/^#define B32/\/\/&/' $includepath/MultiHashGraph.cuh +keysize=8 +echo "64-bit keys" + +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=16 + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=16 + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.txt + head -n -$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/build/$exp\_$mode.txt + tail -n +$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/intersect/$exp\_$mode.txt + done + done + +# 32-bit section +sed -i 's/^\/\/.*#define B32/#define B32/' $includepath/MultiHashGraph.cuh +keysize=4 +echo "32-bit keys" + +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=12 + continue + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=12 + continue + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode\_32.txt + head -n -$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/build/$exp\_$mode\_32.txt + tail -n +$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/intersect/$exp\_$mode\_32.txt + done + done diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1-nov14.tar b/hash-graph-dehornetify/experiments/dgx1/results/dgx1-nov14.tar new file mode 100644 index 0000000..890c1b7 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx1/results/dgx1-nov14.tar differ diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/duplicate_keys.sh b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/duplicate_keys.sh new file mode 100755 index 0000000..bed00b3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/duplicate_keys.sh @@ -0,0 +1,54 @@ +keycount=26 +gpucount=8 +tablesizes=($(seq 21 1 31)) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +rm $resultsfile +echo "keycount,tablesize,gpucount,time" >> $resultsfile +echo "duplicate_keys" +echo "keycount,tablesize,gpucount,time" +echo "build tests" +echo "build tests" >> $resultsfile + +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + echo "tableSize: ${ts}" + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${ts},${gc},${time}" >> $resultsfile + done + +echo "intersect tests" +echo "intersect tests" >> $resultsfile + +keycount=$((echo $keycount - 1) | bc) +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + echo "tableSize: ${ts}" + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${ts},${gc},${time}" >> $resultsfile + done diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_managed.txt new file mode 100644 index 0000000..1544137 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_managed.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,8,0.0580301 +67108864,4194304,8,0.0705024 +67108864,8388608,8,0.0683704 +67108864,16777216,8,0.0721684 +67108864,33554432,8,0.0624497 +67108864,67108864,8,0.0654193 +67108864,134217728,8,0.0740516 +67108864,268435456,8,0.0812042 +67108864,536870912,8,0.0943094 +67108864,1073741824,8,0.0976015 +67108864,2147483648,8,0.112628 +intersect tests +33554432,2097152,8,0.172413 +33554432,4194304,8,0.178891 +33554432,8388608,8,0.127246 +33554432,16777216,8,0.163775 +33554432,33554432,8,0.115248 +33554432,67108864,8,0.0961229 +33554432,134217728,8,0.159902 +33554432,268435456,8,0.175639 +33554432,536870912,8,0.182176 +33554432,1073741824,8,0.239101 +33554432,2147483648,8,0.24948 diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_nomanaged.txt new file mode 100644 index 0000000..a8f1d2b --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_index_nomanaged.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,8,0.0781353 +67108864,4194304,8,0.0749967 +67108864,8388608,8,0.0652605 +67108864,16777216,8,0.0730378 +67108864,33554432,8,0.0591237 +67108864,67108864,8,0.0637317 +67108864,134217728,8,0.0773632 +67108864,268435456,8,0.0704553 +67108864,536870912,8,0.089729 +67108864,1073741824,8,0.0857754 +67108864,2147483648,8,0.120108 +intersect tests +33554432,2097152,8,0.18877 +33554432,4194304,8,0.156562 +33554432,8388608,8,0.182072 +33554432,16777216,8,0.157746 +33554432,33554432,8,0.102901 +33554432,67108864,8,0.114157 +33554432,134217728,8,0.152416 +33554432,268435456,8,0.173071 +33554432,536870912,8,0.185147 +33554432,1073741824,8,0.236218 +33554432,2147483648,8,0.281177 diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_managed.txt new file mode 100644 index 0000000..1810c56 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_managed.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,8,0.0680182 +67108864,4194304,8,0.0728883 +67108864,8388608,8,0.0799447 +67108864,16777216,8,0.0706949 +67108864,33554432,8,0.0590356 +67108864,67108864,8,0.0641802 +67108864,134217728,8,0.0763955 +67108864,268435456,8,0.0754412 +67108864,536870912,8,0.0798249 +67108864,1073741824,8,0.0973578 +67108864,2147483648,8,0.108602 +intersect tests +33554432,2097152,8,0.204929 +33554432,4194304,8,0.16904 +33554432,8388608,8,0.135283 +33554432,16777216,8,0.176119 +33554432,33554432,8,0.0996557 +33554432,67108864,8,0.110754 +33554432,134217728,8,0.165837 +33554432,268435456,8,0.169425 +33554432,536870912,8,0.189284 +33554432,1073741824,8,0.217991 +33554432,2147483648,8,0.288689 diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt new file mode 100644 index 0000000..b9692c2 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/duplicate_keys/results/dk_noindex_nomanaged.txt @@ -0,0 +1,25 @@ +keycount,tablesize,gpucount,time +build tests +67108864,2097152,8,0.0622152 +67108864,4194304,8,0.0706519 +67108864,8388608,8,0.0726661 +67108864,16777216,8,0.0720128 +67108864,33554432,8,0.0585257 +67108864,67108864,8,0.0593684 +67108864,134217728,8,0.0690524 +67108864,268435456,8,0.078762 +67108864,536870912,8,0.0826112 +67108864,1073741824,8,0.091691 +67108864,2147483648,8,0.110717 +intersect tests +33554432,2097152,8,0.205296 +33554432,4194304,8,0.145175 +33554432,8388608,8,0.152027 +33554432,16777216,8,0.17575 +33554432,33554432,8,0.11355 +33554432,67108864,8,0.122662 +33554432,134217728,8,0.15679 +33554432,268435456,8,0.173647 +33554432,536870912,8,0.191007 +33554432,1073741824,8,0.221443 +33554432,2147483648,8,0.258794 diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/experiments.sh b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/experiments.sh new file mode 100755 index 0000000..da01e57 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/experiments.sh @@ -0,0 +1,63 @@ +includepath="../../include" +sspath="./strong_scaling" +wspath="./weak_scaling" +dkpath="./duplicate_keys" + +echo "no index, no managed memory" + +sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + +echo "strong scaling" +$sspath/strong_scaling.sh $sspath/results/ss_noindex_nomanaged.txt + +echo "weak scaling" +$wspath/weak_scaling.sh $wspath/results/ws_noindex_nomanaged.txt + +echo "duplicate keys" +$dkpath/duplicate_keys.sh $dkpath/results/dk_noindex_nomanaged.txt + + +echo "index, no managed memory" + +sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + +echo "strong scaling" +$sspath/strong_scaling.sh $sspath/results/ss_index_nomanaged.txt + +echo "weak scaling" +$wspath/weak_scaling.sh $wspath/results/ws_index_nomanaged.txt + +echo "duplicate keys" +$dkpath/duplicate_keys.sh $dkpath/results/dk_index_nomanaged.txt + + +echo "no index, managed memory" + +sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + +echo "strong scaling" +$sspath/strong_scaling.sh $sspath/results/ss_noindex_managed.txt + +echo "weak scaling" +$wspath/weak_scaling.sh $wspath/results/ws_noindex_managed.txt + +echo "duplicate keys" +$dkpath/duplicate_keys.sh $dkpath/results/dk_noindex_managed.txt + + +echo "index, managed memory" + +sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + +echo "strong scaling" +$sspath/strong_scaling.sh $sspath/results/ss_index_managed.txt + +echo "weak scaling" +$wspath/weak_scaling.sh $wspath/results/ws_index_managed.txt + +echo "duplicate keys" +$dkpath/duplicate_keys.sh $dkpath/results/dk_index_managed.txt diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_managed.txt new file mode 100644 index 0000000..19f04ef --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_managed.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0325028 +16777216,2,0.0279849 +16777216,4,0.0228086 +16777216,8,0.0594862 +33554432,1,0.0726999 +33554432,2,0.0536484 +33554432,4,0.0344586 +33554432,8,0.0378921 +67108864,1,0.132583 +67108864,2,0.102978 +67108864,4,0.0703488 +67108864,8,0.0644721 +134217728,1,0.293839 +134217728,2,0.197102 +134217728,4,0.135312 +134217728,8,0.115765 +268435456,1, +268435456,2,0.415772 +268435456,4,0.232936 +268435456,8,0.195699 +536870912,1, +536870912,2, +536870912,4,0.528768 +536870912,8,0.34557 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8,0.776124 +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, +8589934592,1, +8589934592,2, +8589934592,4, +8589934592,8, +intersect tests +8388608,1,0.0380641 +8388608,2,0.0350464 +8388608,4,0.0411105 +8388608,8,0.108064 +16777216,1,0.0715561 +16777216,2,0.0682189 +16777216,4,0.0680817 +16777216,8,0.109553 +33554432,1,0.165783 +33554432,2,0.116574 +33554432,4,0.0992727 +33554432,8,0.100065 +67108864,1,0.290011 +67108864,2,0.21082 +67108864,4,0.160978 +67108864,8,0.149008 +134217728,1, +134217728,2,0.426286 +134217728,4,0.268797 +134217728,8,0.27525 +268435456,1, +268435456,2, +268435456,4,0.517581 +268435456,8,0.47248 +536870912,1, +536870912,2, +536870912,4, +536870912,8,0.867117 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8, +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_nomanaged.txt new file mode 100644 index 0000000..7b950e4 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_index_nomanaged.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0354109 +16777216,2,0.0267448 +16777216,4,0.0273582 +16777216,8,0.033154 +33554432,1,0.0731556 +33554432,2,0.0537969 +33554432,4,0.0383631 +33554432,8,0.0424847 +67108864,1,0.132904 +67108864,2,0.102855 +67108864,4,0.0725115 +67108864,8,0.0617421 +134217728,1,0.296422 +134217728,2,0.203568 +134217728,4,0.127621 +134217728,8,0.108602 +268435456,1, +268435456,2,0.415724 +268435456,4,0.2497 +268435456,8,0.195514 +536870912,1, +536870912,2, +536870912,4,0.544872 +536870912,8,0.34428 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8,0.820133 +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, +8589934592,1, +8589934592,2, +8589934592,4, +8589934592,8, +intersect tests +8388608,1,0.0414372 +8388608,2,0.0401961 +8388608,4,0.0743526 +8388608,8,0.10152 +16777216,1,0.0716493 +16777216,2,0.0679014 +16777216,4,0.0536822 +16777216,8,0.143913 +33554432,1,0.17775 +33554432,2,0.115764 +33554432,4,0.105716 +33554432,8,0.0909435 +67108864,1,0.285494 +67108864,2,0.219197 +67108864,4,0.152773 +67108864,8,0.172042 +134217728,1, +134217728,2,0.429687 +134217728,4,0.302091 +134217728,8,0.255397 +268435456,1, +268435456,2, +268435456,4,0.530875 +268435456,8,0.452492 +536870912,1, +536870912,2, +536870912,4, +536870912,8,0.827657 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8, +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_managed.txt new file mode 100644 index 0000000..2e2a914 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_managed.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0321884 +16777216,2,0.0257464 +16777216,4,0.0247398 +16777216,8,0.0442378 +33554432,1,0.0767375 +33554432,2,0.0526592 +33554432,4,0.0391946 +33554432,8,0.0424151 +67108864,1,0.133328 +67108864,2,0.104533 +67108864,4,0.0697836 +67108864,8,0.0632678 +134217728,1,0.294505 +134217728,2,0.196317 +134217728,4,0.133293 +134217728,8,0.101436 +268435456,1, +268435456,2,0.416702 +268435456,4,0.247043 +268435456,8,0.209354 +536870912,1, +536870912,2, +536870912,4,0.537729 +536870912,8,0.380677 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8,0.796484 +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, +8589934592,1, +8589934592,2, +8589934592,4, +8589934592,8, +intersect tests +8388608,1,0.0379945 +8388608,2,0.0364431 +8388608,4,0.0413204 +8388608,8,0.107868 +16777216,1,0.0716636 +16777216,2,0.0643635 +16777216,4,0.0598927 +16777216,8,0.142664 +33554432,1,0.154671 +33554432,2,0.124279 +33554432,4,0.0820214 +33554432,8,0.11417 +67108864,1,0.288989 +67108864,2,0.206034 +67108864,4,0.155674 +67108864,8,0.147816 +134217728,1, +134217728,2,0.432848 +134217728,4,0.288113 +134217728,8,0.297398 +268435456,1, +268435456,2, +268435456,4,0.554682 +268435456,8,0.465064 +536870912,1, +536870912,2, +536870912,4, +536870912,8,0.858583 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8, +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_nomanaged.txt new file mode 100644 index 0000000..67740d3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/results/ss_noindex_nomanaged.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0372132 +16777216,2,0.0286996 +16777216,4,0.0229724 +16777216,8,0.0430479 +33554432,1,0.0724408 +33554432,2,0.0543549 +33554432,4,0.0379433 +33554432,8,0.042924 +67108864,1,0.135927 +67108864,2,0.102988 +67108864,4,0.0720292 +67108864,8,0.064937 +134217728,1,0.294759 +134217728,2,0.194393 +134217728,4,0.137312 +134217728,8,0.0996321 +268435456,1, +268435456,2,0.416627 +268435456,4,0.253356 +268435456,8,0.215705 +536870912,1, +536870912,2, +536870912,4,0.529392 +536870912,8,0.38232 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8,0.921484 +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, +8589934592,1, +8589934592,2, +8589934592,4, +8589934592,8, +intersect tests +8388608,1,0.043349 +8388608,2,0.0406374 +8388608,4,0.0670669 +8388608,8,0.0788367 +16777216,1,0.0787538 +16777216,2,0.0566938 +16777216,4,0.0605194 +16777216,8,0.109079 +33554432,1,0.146623 +33554432,2,0.123966 +33554432,4,0.0895754 +33554432,8,0.108794 +67108864,1,0.308463 +67108864,2,0.215558 +67108864,4,0.173583 +67108864,8,0.14286 +134217728,1, +134217728,2,0.419283 +134217728,4,0.29244 +134217728,8,0.296266 +268435456,1, +268435456,2, +268435456,4,0.548758 +268435456,8,0.476861 +536870912,1, +536870912,2, +536870912,4, +536870912,8,0.879738 +1073741824,1, +1073741824,2, +1073741824,4, +1073741824,8, +2147483648,1, +2147483648,2, +2147483648,4, +2147483648,8, +4294967296,1, +4294967296,2, +4294967296,4, +4294967296,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/strong_scaling.sh b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/strong_scaling.sh new file mode 100755 index 0000000..49ec8af --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/strong_scaling/strong_scaling.sh @@ -0,0 +1,60 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +# gpucounts=(1 2 4 6) +gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +rm $resultsfile +echo "keycount,gpucount,time" >> $resultsfile +echo "strong_scaling" +echo "keycount,gpucount,time" +echo "build tests" +echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + echo "gpucount: ${gc}" + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${gc},${time}" >> $resultsfile + done + done + +echo "intersect tests" +echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + kc=$((kc / 2)) + echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + echo "gpucount: ${gc}" + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${gc},${time}" >> $resultsfile + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_managed.txt new file mode 100644 index 0000000..188d601 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_managed.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0331346 +33554432,2,0.0527944 +67108864,4,0.065323 +134217728,8,0.114937 +33554432,1,0.0641638 +67108864,2,0.0999332 +134217728,4,0.128951 +268435456,8,0.226372 +67108864,1,0.13308 +134217728,2,0.2081 +268435456,4,0.276078 +536870912,8,0.379625 +134217728,1,0.293651 +268435456,2,0.416441 +536870912,4,0.519183 +1073741824,8,0.904787 +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, +8589934592,1, +17179869184,2, +34359738368,4, +68719476736,8, +intersect tests +8388608,1,0.0468941 +16777216,2,0.0632852 +33554432,4,0.0951091 +67108864,8,0.170156 +16777216,1,0.0853412 +33554432,2,0.116057 +67108864,4,0.168933 +134217728,8,0.274745 +33554432,1,0.146932 +67108864,2,0.20876 +134217728,4,0.272589 +268435456,8,0.463303 +67108864,1,0.286874 +134217728,2,0.427265 +268435456,4,0.570002 +536870912,8,0.803026 +134217728,1, +268435456,2, +536870912,4, +1073741824,8, +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_nomanaged.txt new file mode 100644 index 0000000..3613860 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_index_nomanaged.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.033577 +33554432,2,0.0505057 +67108864,4,0.0712387 +134217728,8,0.10672 +33554432,1,0.0642273 +67108864,2,0.0989092 +134217728,4,0.134486 +268435456,8,0.207112 +67108864,1,0.132879 +134217728,2,0.199899 +268435456,4,0.260228 +536870912,8,0.402193 +134217728,1,0.293427 +268435456,2,0.419867 +536870912,4,0.532494 +1073741824,8,0.784293 +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, +8589934592,1, +17179869184,2, +34359738368,4, +68719476736,8, +intersect tests +8388608,1,0.0408279 +16777216,2,0.0647905 +33554432,4,0.0988385 +67108864,8,0.164739 +16777216,1,0.0843796 +33554432,2,0.133035 +67108864,4,0.150459 +134217728,8,0.292163 +33554432,1,0.147315 +67108864,2,0.215324 +134217728,4,0.288144 +268435456,8,0.426444 +67108864,1,0.292106 +134217728,2,0.424314 +268435456,4,0.557476 +536870912,8,0.825283 +134217728,1, +268435456,2, +536870912,4, +1073741824,8, +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_managed.txt new file mode 100644 index 0000000..bfd6ead --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_managed.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0323 +33554432,2,0.0512072 +67108864,4,0.0759613 +134217728,8,0.119804 +33554432,1,0.0651776 +67108864,2,0.101349 +134217728,4,0.136969 +268435456,8,0.186157 +67108864,1,0.133221 +134217728,2,0.201105 +268435456,4,0.278444 +536870912,8,0.371993 +134217728,1,0.301606 +268435456,2,0.427477 +536870912,4,0.526256 +1073741824,8,0.742206 +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, +8589934592,1, +17179869184,2, +34359738368,4, +68719476736,8, +intersect tests +8388608,1,0.0473549 +16777216,2,0.0600207 +33554432,4,0.102821 +67108864,8,0.159159 +16777216,1,0.0845179 +33554432,2,0.116434 +67108864,4,0.166212 +134217728,8,0.274013 +33554432,1,0.141697 +67108864,2,0.21287 +134217728,4,0.285071 +268435456,8,0.431558 +67108864,1,0.291182 +134217728,2,0.442546 +268435456,4,0.546869 +536870912,8,0.813265 +134217728,1, +268435456,2, +536870912,4, +1073741824,8, +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_nomanaged.txt new file mode 100644 index 0000000..5329f31 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/results/ws_noindex_nomanaged.txt @@ -0,0 +1,83 @@ +keycount,gpucount,time +build tests +16777216,1,0.0340644 +33554432,2,0.0488161 +67108864,4,0.0646902 +134217728,8,0.110567 +33554432,1,0.0655421 +67108864,2,0.10459 +134217728,4,0.119148 +268435456,8,0.200432 +67108864,1,0.138668 +134217728,2,0.195339 +268435456,4,0.250116 +536870912,8,0.372453 +134217728,1,0.292901 +268435456,2,0.428749 +536870912,4,0.523375 +1073741824,8,0.854255 +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, +8589934592,1, +17179869184,2, +34359738368,4, +68719476736,8, +intersect tests +8388608,1,0.0396247 +16777216,2,0.0620933 +33554432,4,0.0950456 +67108864,8,0.150268 +16777216,1,0.0850309 +33554432,2,0.124054 +67108864,4,0.164013 +134217728,8,0.28403 +33554432,1,0.147728 +67108864,2,0.220272 +134217728,4,0.289937 +268435456,8,0.435595 +67108864,1,0.288566 +134217728,2,0.442978 +268435456,4,0.549197 +536870912,8,0.864007 +134217728,1, +268435456,2, +536870912,4, +1073741824,8, +268435456,1, +536870912,2, +1073741824,4, +2147483648,8, +536870912,1, +1073741824,2, +2147483648,4, +4294967296,8, +1073741824,1, +2147483648,2, +4294967296,4, +8589934592,8, +2147483648,1, +4294967296,2, +8589934592,4, +17179869184,8, +4294967296,1, +8589934592,2, +17179869184,4, +34359738368,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/weak_scaling.sh b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/weak_scaling.sh new file mode 100755 index 0000000..ea9bdd5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/results/dgx1/weak_scaling/weak_scaling.sh @@ -0,0 +1,64 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +# gpucounts=(1 2 4 6) +gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +rm $resultsfile +echo "keycount,gpucount,time" >> $resultsfile +echo "weak_scaling" +echo "keycount,gpucount,time" +echo "build tests" +echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + # echo "keycount / dev: ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${gc},${time}" >> $resultsfile + done + done + +echo "intersect tests" +echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + kcdev=$((kcdev / 2)) + echo "keycount / dev : ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + echo "${kc},${gc},${time}" >> $resultsfile + done + done + diff --git a/maxflow/galois/lib/CMakeLists.txt b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt similarity index 100% rename from maxflow/galois/lib/CMakeLists.txt rename to hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt diff --git a/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..4bae0ae --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/results/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,15 @@ +strong_scaling +keycount,gpucount,time +build tests +16777215,1,0.0954333 +16777215,2,0.0928285 +16777215,4,0.130237 +16777215,8, +33554431,1,0.147303 +33554431,2,0.12886 +33554431,4,0.158875 +33554431,8, +67108863,1,0.26835 +67108863,2,0.202707 +67108863,4,0.210046 +67108863,8, diff --git a/hash-graph-dehornetify/experiments/dgx1/strong_scaling/strong_scaling.sh b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/strong_scaling.sh new file mode 100755 index 0000000..eb1f751 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/strong_scaling/strong_scaling.sh @@ -0,0 +1,79 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "strong_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +# $1 is sizeof(keyval) +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + kc=$((kc / 2)) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx1/weak_scaling/weak_scaling.sh b/hash-graph-dehornetify/experiments/dgx1/weak_scaling/weak_scaling.sh new file mode 100755 index 0000000..8828903 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx1/weak_scaling/weak_scaling.sh @@ -0,0 +1,84 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8) +# gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "weak_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + # echo "keycount / dev: ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + kcdev=$((kcdev / 2)) + # echo "keycount / dev : ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + # # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/duplicate_keys.sh b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/duplicate_keys.sh new file mode 100755 index 0000000..18f0a12 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/duplicate_keys.sh @@ -0,0 +1,73 @@ +keycount=30 +gpucount=16 +tablesizes=($(seq 23 1 33)) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,tablesize,gpucount,time" >> $resultsfile +echo "duplicate_keys" +echo "keycount,tablesize,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +keycount=$((echo $keycount - 1) | bc) +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..89df96e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_managed.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..89df96e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..e5b11b5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,9.19325 +8589934592,4194304,16,8.5119 +8589934592,8388608,16,8.71239 +8589934592,16777216,16,8.39451 +8589934592,33554432,16,9.13138 +8589934592,67108864,16,9.23199 +8589934592,134217728,16,9.25678 +8589934592,268435456,16,8.79075 +8589934592,536870912,16,9.32799 +8589934592,1073741824,16,9.25154 diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..d35f431 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,5.8532 +8589934592,4194304,16,6.08746 +8589934592,8388608,16,6.37243 +8589934592,16777216,16,5.88324 +8589934592,33554432,16,5.86087 +8589934592,67108864,16,6.38923 +8589934592,134217728,16,5.51001 +8589934592,268435456,16,6.24494 +8589934592,536870912,16,6.85461 +8589934592,1073741824,16,6.20287 diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..3af4c36 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_managed.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..3af4c36 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..6a619ae --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_managed.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,9.19325 +8589934592,4194304,16,8.5119 +8589934592,8388608,16,8.71239 +8589934592,16777216,16,8.39451 +8589934592,33554432,16,9.13138 +8589934592,67108864,16,9.23199 +8589934592,134217728,16,9.25678 +8589934592,268435456,16,8.79075 +8589934592,536870912,16,9.32799 +8589934592,1073741824,16,9.25154 +8589934592,2147483648,16,11.9001 +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,52.1995 +4294967296,1073741824,16,33.2493 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..685e7db --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,5.8532 +8589934592,4194304,16,6.08746 +8589934592,8388608,16,6.37243 +8589934592,16777216,16,5.88324 +8589934592,33554432,16,5.86087 +8589934592,67108864,16,6.38923 +8589934592,134217728,16,5.51001 +8589934592,268435456,16,6.24494 +8589934592,536870912,16,6.85461 +8589934592,1073741824,16,6.20287 +8589934592,2147483648,16,6.70001 +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,0 +4294967296,1073741824,16,0 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..17eb3cf --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..17eb3cf --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..2e52e97 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,52.1995 +4294967296,1073741824,16,33.2493 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..e111f25 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,0 +4294967296,1073741824,16,0 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/experiments.lsf b/hash-graph-dehornetify/experiments/dgx2/experiments.lsf new file mode 100755 index 0000000..4b6973b --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/experiments.lsf @@ -0,0 +1,125 @@ +#!/bin/bash +#BSUB -P BIF115 +#BSUB -W 2:00 +#BSUB -nnodes 1 +#BSUB -alloc_flags gpumps +#BSUB -J snmg-hg-experiments +#BSUB -o snmg-hg-experiments.%J +#BSUB -e snmg-hg-experiments.%J + +buildpath="../../build" +includepath="../../include" + +declare -a modes=("noindex_nomanaged" "index_nomanaged" "noindex_managed" "index_managed") +declare -a exps=("strong_scaling" "weak_scaling" "duplicate_keys") + +# 64-bit section +sed -i 's/^#define B32/\/\/&/' $includepath/MultiHashGraph.cuh +keysize=8 +echo "64-bit keys" + +sed -i 's/^#define HOST_PROFILE/\/\/&/' $includepath/MultiHashGraph.cuh + +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=16 + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=16 + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.txt + head -n -$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/build/$exp\_$mode.txt + tail -n +$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/intersect/$exp\_$mode.txt + done + done + +sed -i 's/^\/\/.*#define HOST_PROFILE/#define HOST_PROFILE/' $includepath/MultiHashGraph.cuh +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=16 + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=16 + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.prof + head -n -$(cat ./$exp/results/$exp\_$mode.prof | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.prof > ./$exp/results/build/$exp\_$mode.prof + tail -n +$(cat ./$exp/results/$exp\_$mode.prof | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.prof > ./$exp/results/intersect/$exp\_$mode.prof + done + done + +# # 32-bit section +# sed -i 's/^\/\/.*#define B32/#define B32/' $includepath/MultiHashGraph.cuh +# keysize=4 +# echo "32-bit keys" +# +# for mode in "${modes[@]}" +# do +# bytes=8 +# if [ "$mode" == "noindex_nomanaged" ]; then +# sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# elif [ "$mode" == "index_nomanaged" ]; then +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# bytes=12 +# continue +# elif [ "$mode" == "noindex_managed" ]; then +# sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# elif [ "$mode" == "index_managed" ]; then +# sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# bytes=12 +# continue +# fi +# +# make -C $buildpath multi-hash +# +# echo $mode +# +# for exp in "${exps[@]}" +# do +# echo $exp +# ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode\_32.txt +# head -n -$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/build/$exp\_$mode\_32.txt +# tail -n +$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/intersect/$exp\_$mode\_32.txt +# done +# done diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs00000000248718c600000023 b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs00000000248718c600000023 new file mode 100644 index 0000000..66cffd5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs00000000248718c600000023 @@ -0,0 +1,47 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0122644 +16777216,2,0.0178678 +16777216,4,0.0226222 +16777216,8,0.026281 +16777216,16,0.0431165 +33554432,1,0.0240906 +33554432,2,0.0334356 +33554432,4,0.037845 +33554432,8,0.0413512 +33554432,16,0.0547164 +67108864,1,0.043605 +67108864,2,0.0638628 +67108864,4,0.0743997 +67108864,8,0.0713103 +67108864,16,0.0901489 +134217728,1,0.0934072 +134217728,2,0.121122 +134217728,4,0.140348 +134217728,8,0.128722 +134217728,16,0.141252 +268435456,1,0.170955 +268435456,2,0.242831 +268435456,4,0.254967 +268435456,8,0.268106 +268435456,16,0.266513 +536870912,1,oom +536870912,2,0.477132 +536870912,4,0.535152 +536870912,8,0.483546 +536870912,16,0.498846 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.03539 +1073741824,8,0.955675 +1073741824,16,0.931251 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8, +2147483648,16, +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs0000000025156d8f00000003 b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs0000000025156d8f00000003 new file mode 100644 index 0000000..ce67a5b --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/.nfs0000000025156d8f00000003 @@ -0,0 +1,19 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0126556 +16777216,2,0.0155249 +16777216,4,0.0179722 +16777216,8,0.0239145 +16777216,16,0.0330906 +33554432,1,0.0303903 +33554432,2,0.0287724 +33554432,4,0.0306668 +33554432,8,0.0326011 +33554432,16,0.0452659 +67108864,1,0.0735078 +67108864,2,0.0620175 +67108864,4,0.0575386 +67108864,8,0.0574781 +67108864,16,0.0634112 +134217728,1,0.190365 diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_managed.txt new file mode 100644 index 0000000..18cb585 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_managed.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0197448 +16777216,2,0.0349307 +16777216,4,0.0390062 +16777216,8,0.0544696 +16777216,16,0.133357 +33554432,1,0.0442962 +33554432,2,0.0665201 +33554432,4,0.0704225 +33554432,8,0.0863109 +33554432,16,0.134142 +67108864,1,0.102748 +67108864,2,0.142723 +67108864,4,0.131049 +67108864,8,0.148587 +67108864,16,0.210538 +134217728,1,0.267932 +134217728,2,0.314535 +134217728,4,0.264044 +134217728,8,0.261124 +134217728,16,0.329721 +268435456,1,0.602868 +268435456,2,0.70451 +268435456,4,0.560778 +268435456,8,0.500924 +268435456,16,0.564247 +536870912,1,oom +536870912,2,1.54855 +536870912,4,1.18566 +536870912,8,1.00559 +536870912,16,1.04638 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.5596 +1073741824,8,2.02187 +1073741824,16,2.00142 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,4.5 +2147483648,16,3.97151 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,8.23494 +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..7e8df7e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_index_nomanaged.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0213166 +16777216,2,0.0291174 +16777216,4,0.0346163 +16777216,8,0.0470825 +16777216,16,0.0679752 +33554432,1,0.0465981 +33554432,2,0.05555 +33554432,4,0.0598907 +33554432,8,0.0656824 +33554432,16,0.0933458 +67108864,1,0.105692 +67108864,2,0.110837 +67108864,4,0.110091 +67108864,8,0.109054 +67108864,16,0.123016 +134217728,1,0.267389 +134217728,2,0.241594 +134217728,4,0.224476 +134217728,8,0.200649 +134217728,16,0.20066 +268435456,1,0.612958 +268435456,2,0.530566 +268435456,4,0.454569 +268435456,8,0.391276 +268435456,16,0.373754 +536870912,1,oom +536870912,2,1.15302 +536870912,4,0.839668 +536870912,8,0.786043 +536870912,16,0.67591 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.25959 +1073741824,8,1.66527 +1073741824,16,1.30773 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,3.71888 +2147483648,16,2.94296 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,5.4394 +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..7780332 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0115487 +16777216,2,0.0196608 +16777216,4,0.0210924 +16777216,8,0.030422 +16777216,16,0.103077 +33554432,1,0.0288348 +33554432,2,0.037631 +33554432,4,0.0365906 +33554432,8,0.0452895 +33554432,16,0.108776 +67108864,1,0.0707564 +67108864,2,0.0838492 +67108864,4,0.0706273 +67108864,8,0.0744571 +67108864,16,0.121832 +134217728,1,0.186573 +134217728,2,0.193385 +134217728,4,0.143653 +134217728,8,0.135376 +134217728,16,0.184122 +268435456,1,0.433906 +268435456,2,0.43317 +268435456,4,0.310134 +268435456,8,0.275083 +268435456,16,0.313358 +536870912,1,oom +536870912,2,0.782982 +536870912,4,0.684746 +536870912,8,0.541164 +536870912,16,0.556485 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.48842 +1073741824,8,1.05888 +1073741824,16,1.02886 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.25346 +2147483648,16,2.07309 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,4.45807 +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..7987eaf --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_managed_32.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.00904909 +16777216,2,0.012671 +16777216,4,0.0147302 +16777216,8,0.0272118 +16777216,16,0.0798966 +33554432,1,0.0215378 +33554432,2,0.0238264 +33554432,4,0.0237189 +33554432,8,0.0307313 +33554432,16,0.102224 +67108864,1,0.061697 +67108864,2,0.0517632 +67108864,4,0.0421304 +67108864,8,0.0484874 +67108864,16,0.111248 +134217728,1,0.154036 +134217728,2,0.130718 +134217728,4,0.0806298 +134217728,8,0.0848169 +134217728,16,0.126216 +268435456,1,0.400805 +268435456,2,0.236055 +268435456,4,0.186377 +268435456,8,0.146209 +268435456,16,0.194962 +536870912,1,0.94888 +536870912,2,0.762158 +536870912,4,0.435002 +536870912,8,0.311005 +536870912,16,0.318592 +1073741824,1,oom +1073741824,2,1.29818 +1073741824,4,1.0278 +1073741824,8,0.664478 +1073741824,16,0.564344 +2147483648,1,oom +2147483648,2,oom +2147483648,4,2.29752 +2147483648,8,1.47168 +2147483648,16,1.11792 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,4.0154 +4294967296,16, +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..ba8de1f --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0126341 +16777216,2,0.0156662 +16777216,4,0.0180244 +16777216,8,0.0218276 +16777216,16,0.0320809 +33554432,1,0.0303186 +33554432,2,0.0292536 +33554432,4,0.0320338 +33554432,8,0.0314122 +33554432,16,0.0497111 +67108864,1,0.0733604 +67108864,2,0.0617933 +67108864,4,0.0593879 +67108864,8,0.0592865 +67108864,16,0.0660326 +134217728,1,0.188676 +134217728,2,0.137769 +134217728,4,0.116816 +134217728,8,0.100187 +134217728,16,0.0940442 +268435456,1,0.443171 +268435456,2,0.313036 +268435456,4,0.251006 +268435456,8,0.197405 +268435456,16,0.185187 +536870912,1,oom +536870912,2,0.688961 +536870912,4,0.500017 +536870912,8,0.389059 +536870912,16,0.342239 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.45416 +1073741824,8,0.823694 +1073741824,16,0.700535 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.19697 +2147483648,16,1.58418 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,3.42857 +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..eac30ee --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,50 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.00994202 +16777216,2,0.010537 +16777216,4,0.0119603 +16777216,8,0.0174531 +16777216,16,0.0289485 +33554432,1,0.0227154 +33554432,2,0.0188928 +33554432,4,0.0200919 +33554432,8,0.0220262 +33554432,16,0.0342313 +67108864,1,0.0638249 +67108864,2,0.0383857 +67108864,4,0.0346081 +67108864,8,0.0349901 +67108864,16,0.0443116 +134217728,1,0.156629 +134217728,2,0.0922092 +134217728,4,0.0677499 +134217728,8,0.0611287 +134217728,16,0.0643891 +268435456,1,0.408063 +268435456,2,0.221356 +268435456,4,0.145043 +268435456,8,0.106491 +268435456,16,0.100313 +536870912,1,0.958387 +536870912,2,0.519897 +536870912,4,0.344002 +536870912,8,0.210592 +536870912,16,0.193616 +1073741824,1,oom +1073741824,2,1.20162 +1073741824,4,0.966464 +1073741824,8,0.486605 +1073741824,16,0.360796 +2147483648,1,oom +2147483648,2,oom +2147483648,4,1.74458 +2147483648,8,1.10394 +2147483648,16,0.74587 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8, +4294967296,16, +8589934592,1,oom +8589934592,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_managed.txt new file mode 100644 index 0000000..985dc2c --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_managed.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.022142 +8388608,2,0.0409108 +8388608,4,0.0521513 +8388608,8,0.083199 +8388608,16,0.26762 +16777216,1,0.0470559 +16777216,2,0.0745165 +16777216,4,0.0828488 +16777216,8,0.113052 +16777216,16,0.266994 +33554432,1,0.101058 +33554432,2,0.141326 +33554432,4,0.146054 +33554432,8,0.179982 +33554432,16,0.293961 +67108864,1,0.246503 +67108864,2,0.296507 +67108864,4,0.277441 +67108864,8,0.299949 +67108864,16,0.43827 +134217728,1,0.576289 +134217728,2,0.663182 +134217728,4,0.550988 +134217728,8,0.548909 +134217728,16,0.679544 +268435456,1,oom +268435456,2,1.27134 +268435456,4,1.16239 +268435456,8,1.0495 +268435456,16,1.14873 +536870912,1,oom +536870912,2,oom +536870912,4,2.48764 +536870912,8,2.11424 +536870912,16,2.17216 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,4.28807 +1073741824,16,4.13363 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,8.21929 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..c3c1939 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0236769 +8388608,2,0.035756 +8388608,4,0.0538829 +8388608,8,0.0784179 +8388608,16,0.135621 +16777216,1,0.0471972 +16777216,2,0.0630804 +16777216,4,0.0761754 +16777216,8,0.102217 +16777216,16,0.170254 +33554432,1,0.106002 +33554432,2,0.121607 +33554432,4,0.1329 +33554432,8,0.143133 +33554432,16,0.204185 +67108864,1,0.239015 +67108864,2,0.237604 +67108864,4,0.247171 +67108864,8,0.238154 +67108864,16,0.282421 +134217728,1,0.584335 +134217728,2,0.515976 +134217728,4,0.459921 +134217728,8,0.421534 +134217728,16,0.445658 +268435456,1,oom +268435456,2,1.12042 +268435456,4,0.956948 +268435456,8,0.7864 +268435456,16,0.78305 +536870912,1,oom +536870912,2,oom +536870912,4,1.89665 +536870912,8,1.66623 +536870912,16,1.45069 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,3.48334 +1073741824,16,2.65142 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,5.82697 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..e9c6de0 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0140288 +8388608,2,0.0242319 +8388608,4,0.0294031 +8388608,8,0.0629043 +8388608,16,0.173129 +16777216,1,0.0288102 +16777216,2,0.0424858 +16777216,4,0.0481782 +16777216,8,0.065279 +16777216,16,0.225125 +33554432,1,0.0698296 +33554432,2,0.0862034 +33554432,4,0.0809093 +33554432,8,0.100521 +33554432,16,0.232558 +67108864,1,0.171688 +67108864,2,0.188598 +67108864,4,0.154456 +67108864,8,0.164301 +67108864,16,0.25348 +134217728,1,0.434301 +134217728,2,0.41991 +134217728,4,0.312751 +134217728,8,0.284499 +134217728,16,0.399834 +268435456,1,oom +268435456,2,0.840113 +268435456,4,0.670008 +268435456,8,0.579037 +268435456,16,0.655371 +536870912,1,oom +536870912,2,oom +536870912,4,1.4732 +536870912,8,1.16068 +536870912,16,1.14526 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,2.45844 +1073741824,16,2.21272 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,4.51092 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..ed418e8 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_managed_32.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0115425 +8388608,2,0.0173824 +8388608,4,0.0216146 +8388608,8,0.0551066 +8388608,16,0.137882 +16777216,1,0.0239258 +16777216,2,0.0291185 +16777216,4,0.0323441 +16777216,8,0.0618291 +16777216,16,0.178019 +33554432,1,0.0571494 +33554432,2,0.0551414 +33554432,4,0.0517263 +33554432,8,0.0670833 +33554432,16,0.216941 +67108864,1,0.140325 +67108864,2,0.1154 +67108864,4,0.0982508 +67108864,8,0.112464 +67108864,16,0.238025 +134217728,1,0.378028 +134217728,2,0.282535 +134217728,4,0.192029 +134217728,8,0.180579 +134217728,16,0.276268 +268435456,1,0.898554 +268435456,2,0.690758 +268435456,4,0.432286 +268435456,8,0.327116 +268435456,16,0.414152 +536870912,1,oom +536870912,2,1.64112 +536870912,4,0.980567 +536870912,8,0.665205 +536870912,16,0.676533 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.17646 +1073741824,8,1.44065 +1073741824,16,1.28635 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,3.22378 +2147483648,16,2.5334 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16, diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..2f18866 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0150569 +8388608,2,0.0198308 +8388608,4,0.0298383 +8388608,8,0.046889 +8388608,16,0.0800154 +16777216,1,0.0293458 +16777216,2,0.0349512 +16777216,4,0.0417331 +16777216,8,0.0603259 +16777216,16,0.096172 +33554432,1,0.0699863 +33554432,2,0.0649656 +33554432,4,0.0680243 +33554432,8,0.081877 +33554432,16,0.120511 +67108864,1,0.167941 +67108864,2,0.139217 +67108864,4,0.133331 +67108864,8,0.124817 +67108864,16,0.152677 +134217728,1,0.42197 +134217728,2,0.304572 +134217728,4,0.256246 +134217728,8,0.228204 +134217728,16,0.238367 +268435456,1,oom +268435456,2,0.668326 +268435456,4,0.508968 +268435456,8,0.424179 +268435456,16,0.420598 +536870912,1,oom +536870912,2,oom +536870912,4,1.14895 +536870912,8,0.859954 +536870912,16,0.731401 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,1.85308 +1073741824,16,1.42397 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,3.14241 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..326263b --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0133827 +8388608,2,0.0152812 +8388608,4,0.0231188 +8388608,8,0.0380242 +8388608,16,0.0761569 +16777216,1,0.0243804 +16777216,2,0.0248084 +16777216,4,0.0302725 +16777216,8,0.0474737 +16777216,16,0.0819866 +33554432,1,0.0549949 +33554432,2,0.042882 +33554432,4,0.0467732 +33554432,8,0.062166 +33554432,16,0.0947354 +67108864,1,0.143825 +67108864,2,0.087937 +67108864,4,0.0806799 +67108864,8,0.0827228 +67108864,16,0.128418 +134217728,1,0.371895 +134217728,2,0.204634 +134217728,4,0.153496 +134217728,8,0.132919 +134217728,16,0.166689 +268435456,1,0.873903 +268435456,2,0.486309 +268435456,4,0.322768 +268435456,8,0.257408 +268435456,16,0.245693 +536870912,1,oom +536870912,2,1.13359 +536870912,4,0.734622 +536870912,8,0.506797 +536870912,16,0.426156 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.82216 +1073741824,8,1.01878 +1073741824,16,0.86528 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.95334 +2147483648,16,1.62837 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16, diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_managed.txt new file mode 100644 index 0000000..84f0626 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_managed.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0197448 +16777216,2,0.0349307 +16777216,4,0.0390062 +16777216,8,0.0544696 +16777216,16,0.133357 +33554432,1,0.0442962 +33554432,2,0.0665201 +33554432,4,0.0704225 +33554432,8,0.0863109 +33554432,16,0.134142 +67108864,1,0.102748 +67108864,2,0.142723 +67108864,4,0.131049 +67108864,8,0.148587 +67108864,16,0.210538 +134217728,1,0.267932 +134217728,2,0.314535 +134217728,4,0.264044 +134217728,8,0.261124 +134217728,16,0.329721 +268435456,1,0.602868 +268435456,2,0.70451 +268435456,4,0.560778 +268435456,8,0.500924 +268435456,16,0.564247 +536870912,1,oom +536870912,2,1.54855 +536870912,4,1.18566 +536870912,8,1.00559 +536870912,16,1.04638 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.5596 +1073741824,8,2.02187 +1073741824,16,2.00142 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,4.5 +2147483648,16,3.97151 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,8.23494 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,1,0.022142 +8388608,2,0.0409108 +8388608,4,0.0521513 +8388608,8,0.083199 +8388608,16,0.26762 +16777216,1,0.0470559 +16777216,2,0.0745165 +16777216,4,0.0828488 +16777216,8,0.113052 +16777216,16,0.266994 +33554432,1,0.101058 +33554432,2,0.141326 +33554432,4,0.146054 +33554432,8,0.179982 +33554432,16,0.293961 +67108864,1,0.246503 +67108864,2,0.296507 +67108864,4,0.277441 +67108864,8,0.299949 +67108864,16,0.43827 +134217728,1,0.576289 +134217728,2,0.663182 +134217728,4,0.550988 +134217728,8,0.548909 +134217728,16,0.679544 +268435456,1,oom +268435456,2,1.27134 +268435456,4,1.16239 +268435456,8,1.0495 +268435456,16,1.14873 +536870912,1,oom +536870912,2,oom +536870912,4,2.48764 +536870912,8,2.11424 +536870912,16,2.17216 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,4.28807 +1073741824,16,4.13363 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,8.21929 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..da22222 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0213166 +16777216,2,0.0291174 +16777216,4,0.0346163 +16777216,8,0.0470825 +16777216,16,0.0679752 +33554432,1,0.0465981 +33554432,2,0.05555 +33554432,4,0.0598907 +33554432,8,0.0656824 +33554432,16,0.0933458 +67108864,1,0.105692 +67108864,2,0.110837 +67108864,4,0.110091 +67108864,8,0.109054 +67108864,16,0.123016 +134217728,1,0.267389 +134217728,2,0.241594 +134217728,4,0.224476 +134217728,8,0.200649 +134217728,16,0.20066 +268435456,1,0.612958 +268435456,2,0.530566 +268435456,4,0.454569 +268435456,8,0.391276 +268435456,16,0.373754 +536870912,1,oom +536870912,2,1.15302 +536870912,4,0.839668 +536870912,8,0.786043 +536870912,16,0.67591 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.25959 +1073741824,8,1.66527 +1073741824,16,1.30773 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,3.71888 +2147483648,16,2.94296 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,5.4394 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,1,0.0236769 +8388608,2,0.035756 +8388608,4,0.0538829 +8388608,8,0.0784179 +8388608,16,0.135621 +16777216,1,0.0471972 +16777216,2,0.0630804 +16777216,4,0.0761754 +16777216,8,0.102217 +16777216,16,0.170254 +33554432,1,0.106002 +33554432,2,0.121607 +33554432,4,0.1329 +33554432,8,0.143133 +33554432,16,0.204185 +67108864,1,0.239015 +67108864,2,0.237604 +67108864,4,0.247171 +67108864,8,0.238154 +67108864,16,0.282421 +134217728,1,0.584335 +134217728,2,0.515976 +134217728,4,0.459921 +134217728,8,0.421534 +134217728,16,0.445658 +268435456,1,oom +268435456,2,1.12042 +268435456,4,0.956948 +268435456,8,0.7864 +268435456,16,0.78305 +536870912,1,oom +536870912,2,oom +536870912,4,1.89665 +536870912,8,1.66623 +536870912,16,1.45069 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,3.48334 +1073741824,16,2.65142 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,5.82697 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..2df922d --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_index_nomanaged_32.txt @@ -0,0 +1,12 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0184412 +16777216,2, +16777216,4, +16777216,8, +16777216,16, +33554432,1,0.0323328 +33554432,2,0.0601539 +33554432,4,0.0674591 +33554432,8, diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..21d03ec --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0115487 +16777216,2,0.0196608 +16777216,4,0.0210924 +16777216,8,0.030422 +16777216,16,0.103077 +33554432,1,0.0288348 +33554432,2,0.037631 +33554432,4,0.0365906 +33554432,8,0.0452895 +33554432,16,0.108776 +67108864,1,0.0707564 +67108864,2,0.0838492 +67108864,4,0.0706273 +67108864,8,0.0744571 +67108864,16,0.121832 +134217728,1,0.186573 +134217728,2,0.193385 +134217728,4,0.143653 +134217728,8,0.135376 +134217728,16,0.184122 +268435456,1,0.433906 +268435456,2,0.43317 +268435456,4,0.310134 +268435456,8,0.275083 +268435456,16,0.313358 +536870912,1,oom +536870912,2,0.782982 +536870912,4,0.684746 +536870912,8,0.541164 +536870912,16,0.556485 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.48842 +1073741824,8,1.05888 +1073741824,16,1.02886 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.25346 +2147483648,16,2.07309 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,4.45807 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,1,0.0140288 +8388608,2,0.0242319 +8388608,4,0.0294031 +8388608,8,0.0629043 +8388608,16,0.173129 +16777216,1,0.0288102 +16777216,2,0.0424858 +16777216,4,0.0481782 +16777216,8,0.065279 +16777216,16,0.225125 +33554432,1,0.0698296 +33554432,2,0.0862034 +33554432,4,0.0809093 +33554432,8,0.100521 +33554432,16,0.232558 +67108864,1,0.171688 +67108864,2,0.188598 +67108864,4,0.154456 +67108864,8,0.164301 +67108864,16,0.25348 +134217728,1,0.434301 +134217728,2,0.41991 +134217728,4,0.312751 +134217728,8,0.284499 +134217728,16,0.399834 +268435456,1,oom +268435456,2,0.840113 +268435456,4,0.670008 +268435456,8,0.579037 +268435456,16,0.655371 +536870912,1,oom +536870912,2,oom +536870912,4,1.4732 +536870912,8,1.16068 +536870912,16,1.14526 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,2.45844 +1073741824,16,2.21272 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,4.51092 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..ef73d06 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_managed_32.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.00904909 +16777216,2,0.012671 +16777216,4,0.0147302 +16777216,8,0.0272118 +16777216,16,0.0798966 +33554432,1,0.0215378 +33554432,2,0.0238264 +33554432,4,0.0237189 +33554432,8,0.0307313 +33554432,16,0.102224 +67108864,1,0.061697 +67108864,2,0.0517632 +67108864,4,0.0421304 +67108864,8,0.0484874 +67108864,16,0.111248 +134217728,1,0.154036 +134217728,2,0.130718 +134217728,4,0.0806298 +134217728,8,0.0848169 +134217728,16,0.126216 +268435456,1,0.400805 +268435456,2,0.236055 +268435456,4,0.186377 +268435456,8,0.146209 +268435456,16,0.194962 +536870912,1,0.94888 +536870912,2,0.762158 +536870912,4,0.435002 +536870912,8,0.311005 +536870912,16,0.318592 +1073741824,1,oom +1073741824,2,1.29818 +1073741824,4,1.0278 +1073741824,8,0.664478 +1073741824,16,0.564344 +2147483648,1,oom +2147483648,2,oom +2147483648,4,2.29752 +2147483648,8,1.47168 +2147483648,16,1.11792 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,4.0154 +4294967296,16, +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16, +intersect tests +8388608,1,0.0115425 +8388608,2,0.0173824 +8388608,4,0.0216146 +8388608,8,0.0551066 +8388608,16,0.137882 +16777216,1,0.0239258 +16777216,2,0.0291185 +16777216,4,0.0323441 +16777216,8,0.0618291 +16777216,16,0.178019 +33554432,1,0.0571494 +33554432,2,0.0551414 +33554432,4,0.0517263 +33554432,8,0.0670833 +33554432,16,0.216941 +67108864,1,0.140325 +67108864,2,0.1154 +67108864,4,0.0982508 +67108864,8,0.112464 +67108864,16,0.238025 +134217728,1,0.378028 +134217728,2,0.282535 +134217728,4,0.192029 +134217728,8,0.180579 +134217728,16,0.276268 +268435456,1,0.898554 +268435456,2,0.690758 +268435456,4,0.432286 +268435456,8,0.327116 +268435456,16,0.414152 +536870912,1,oom +536870912,2,1.64112 +536870912,4,0.980567 +536870912,8,0.665205 +536870912,16,0.676533 +1073741824,1,oom +1073741824,2,oom +1073741824,4,2.17646 +1073741824,8,1.44065 +1073741824,16,1.28635 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,3.22378 +2147483648,16,2.5334 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16, diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..e4820ed --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0126341 +16777216,2,0.0156662 +16777216,4,0.0180244 +16777216,8,0.0218276 +16777216,16,0.0320809 +33554432,1,0.0303186 +33554432,2,0.0292536 +33554432,4,0.0320338 +33554432,8,0.0314122 +33554432,16,0.0497111 +67108864,1,0.0733604 +67108864,2,0.0617933 +67108864,4,0.0593879 +67108864,8,0.0592865 +67108864,16,0.0660326 +134217728,1,0.188676 +134217728,2,0.137769 +134217728,4,0.116816 +134217728,8,0.100187 +134217728,16,0.0940442 +268435456,1,0.443171 +268435456,2,0.313036 +268435456,4,0.251006 +268435456,8,0.197405 +268435456,16,0.185187 +536870912,1,oom +536870912,2,0.688961 +536870912,4,0.500017 +536870912,8,0.389059 +536870912,16,0.342239 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.45416 +1073741824,8,0.823694 +1073741824,16,0.700535 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.19697 +2147483648,16,1.58418 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,3.42857 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,1,0.0150569 +8388608,2,0.0198308 +8388608,4,0.0298383 +8388608,8,0.046889 +8388608,16,0.0800154 +16777216,1,0.0293458 +16777216,2,0.0349512 +16777216,4,0.0417331 +16777216,8,0.0603259 +16777216,16,0.096172 +33554432,1,0.0699863 +33554432,2,0.0649656 +33554432,4,0.0680243 +33554432,8,0.081877 +33554432,16,0.120511 +67108864,1,0.167941 +67108864,2,0.139217 +67108864,4,0.133331 +67108864,8,0.124817 +67108864,16,0.152677 +134217728,1,0.42197 +134217728,2,0.304572 +134217728,4,0.256246 +134217728,8,0.228204 +134217728,16,0.238367 +268435456,1,oom +268435456,2,0.668326 +268435456,4,0.508968 +268435456,8,0.424179 +268435456,16,0.420598 +536870912,1,oom +536870912,2,oom +536870912,4,1.14895 +536870912,8,0.859954 +536870912,16,0.731401 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,1.85308 +1073741824,16,1.42397 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,3.14241 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..be3d8b5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.00994202 +16777216,2,0.010537 +16777216,4,0.0119603 +16777216,8,0.0174531 +16777216,16,0.0289485 +33554432,1,0.0227154 +33554432,2,0.0188928 +33554432,4,0.0200919 +33554432,8,0.0220262 +33554432,16,0.0342313 +67108864,1,0.0638249 +67108864,2,0.0383857 +67108864,4,0.0346081 +67108864,8,0.0349901 +67108864,16,0.0443116 +134217728,1,0.156629 +134217728,2,0.0922092 +134217728,4,0.0677499 +134217728,8,0.0611287 +134217728,16,0.0643891 +268435456,1,0.408063 +268435456,2,0.221356 +268435456,4,0.145043 +268435456,8,0.106491 +268435456,16,0.100313 +536870912,1,0.958387 +536870912,2,0.519897 +536870912,4,0.344002 +536870912,8,0.210592 +536870912,16,0.193616 +1073741824,1,oom +1073741824,2,1.20162 +1073741824,4,0.966464 +1073741824,8,0.486605 +1073741824,16,0.360796 +2147483648,1,oom +2147483648,2,oom +2147483648,4,1.74458 +2147483648,8,1.10394 +2147483648,16,0.74587 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8, +4294967296,16, +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16, +intersect tests +8388608,1,0.0133827 +8388608,2,0.0152812 +8388608,4,0.0231188 +8388608,8,0.0380242 +8388608,16,0.0761569 +16777216,1,0.0243804 +16777216,2,0.0248084 +16777216,4,0.0302725 +16777216,8,0.0474737 +16777216,16,0.0819866 +33554432,1,0.0549949 +33554432,2,0.042882 +33554432,4,0.0467732 +33554432,8,0.062166 +33554432,16,0.0947354 +67108864,1,0.143825 +67108864,2,0.087937 +67108864,4,0.0806799 +67108864,8,0.0827228 +67108864,16,0.128418 +134217728,1,0.371895 +134217728,2,0.204634 +134217728,4,0.153496 +134217728,8,0.132919 +134217728,16,0.166689 +268435456,1,0.873903 +268435456,2,0.486309 +268435456,4,0.322768 +268435456,8,0.257408 +268435456,16,0.245693 +536870912,1,oom +536870912,2,1.13359 +536870912,4,0.734622 +536870912,8,0.506797 +536870912,16,0.426156 +1073741824,1,oom +1073741824,2,oom +1073741824,4,1.82216 +1073741824,8,1.01878 +1073741824,16,0.86528 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,2.95334 +2147483648,16,1.62837 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16, diff --git a/hash-graph-dehornetify/experiments/dgx2/strong_scaling/strong_scaling.sh b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/strong_scaling.sh new file mode 100755 index 0000000..0f790c8 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/strong_scaling/strong_scaling.sh @@ -0,0 +1,92 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8 16) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "strong_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +# $1 is sizeof(keyval) +# $2 is 32-bit vs 64-bit +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8 - 1)) ? $kc : 2**($2 * 8 - 1))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + kc=$((kc / 2)) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8 - 1)) ? $kc : 2**($2 * 8 - 1))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_managed.txt new file mode 100644 index 0000000..bbf744c --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_managed.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0185682 +33554432,2,0.0669501 +67108864,4,0.132988 +134217728,8,0.269438 +268435456,16,0.559408 +33554432,1,0.0443382 +67108864,2,0.140349 +134217728,4,0.26889 +268435456,8,0.51517 +536870912,16,1.04561 +67108864,1,0.102418 +134217728,2,0.26093 +268435456,4,0.557798 +536870912,8,0.986896 +1073741824,16,2.00255 +134217728,1,0.253509 +268435456,2,0.701043 +536870912,4,1.19875 +1073741824,8,2.11936 +2147483648,16,3.96369 +268435456,1,0.6015 +536870912,2,1.26104 +1073741824,4,2.45516 +2147483648,8,4.4356 +4294967296,16,7.98802 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..15e7b15 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_index_nomanaged.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0202752 +33554432,2,0.0547246 +67108864,4,0.111913 +134217728,8,0.202385 +268435456,16,0.35841 +33554432,1,0.0469064 +67108864,2,0.111024 +134217728,4,0.225743 +268435456,8,0.385973 +536870912,16,0.645602 +67108864,1,0.110916 +134217728,2,0.242035 +268435456,4,0.416214 +536870912,8,0.755983 +1073741824,16,1.29628 +134217728,1,0.258133 +268435456,2,0.530756 +536870912,4,0.858458 +1073741824,8,1.85418 +2147483648,16,2.66905 +268435456,1,0.613959 +536870912,2,1.16148 +1073741824,4,2.31747 +2147483648,8,3.57564 +4294967296,16,6.19864 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..d36b319 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0109097 +33554432,2,0.0376719 +67108864,4,0.068353 +134217728,8,0.135574 +268435456,16,0.310204 +33554432,1,0.0289137 +67108864,2,0.0835625 +134217728,4,0.145621 +268435456,8,0.265196 +536870912,16,0.550008 +67108864,1,0.0710492 +134217728,2,0.189354 +268435456,4,0.312841 +536870912,8,0.524655 +1073741824,16,1.05892 +134217728,1,0.177973 +268435456,2,0.342451 +536870912,4,0.671905 +1073741824,8,1.08255 +2147483648,16,2.06928 +268435456,1,0.433854 +536870912,2,0.79433 +1073741824,4,1.49553 +2147483648,8,2.21621 +4294967296,16,4.18388 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..7c68b9a --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_managed_32.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.00832512 +33554432,2,0.0229919 +67108864,4,0.041599 +134217728,8,0.0808253 +268435456,16,0.19252 +33554432,1,0.0201656 +67108864,2,0.0441057 +134217728,4,0.0781742 +268435456,8,0.146914 +536870912,16,0.31458 +67108864,1,0.0619172 +134217728,2,0.100494 +268435456,4,0.188636 +536870912,8,0.323401 +1073741824,16,0.582574 +134217728,1,0.154259 +268435456,2,0.311089 +536870912,4,0.428251 +1073741824,8,0.639237 +2147483648,16,1.15877 +268435456,1,0.386635 +536870912,2,0.752762 +1073741824,4,0.970545 +2147483648,8,1.28242 +4294967296,16, +536870912,1,0.949373 +1073741824,2,1.81308 +2147483648,4,2.43234 +4294967296,8,3.93738 +8589934592,16, +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..11fff73 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0119286 +33554432,2,0.0289505 +67108864,4,0.0570644 +134217728,8,0.0997847 +268435456,16,0.179529 +33554432,1,0.0303411 +67108864,2,0.062039 +134217728,4,0.113605 +268435456,8,0.203377 +536870912,16,0.347992 +67108864,1,0.0732068 +134217728,2,0.136252 +268435456,4,0.21679 +536870912,8,0.401846 +1073741824,16,0.631664 +134217728,1,0.182089 +268435456,2,0.310047 +536870912,4,0.431878 +1073741824,8,0.804061 +2147483648,16,1.43775 +268435456,1,0.444469 +536870912,2,0.686124 +1073741824,4,1.35632 +2147483648,8,2.06201 +4294967296,16,3.40746 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..074a6a7 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,50 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0100372 +33554432,2,0.0184156 +67108864,4,0.0352676 +134217728,8,0.0593203 +268435456,16,0.0990177 +33554432,1,0.02287 +67108864,2,0.0389396 +134217728,4,0.063148 +268435456,8,0.103315 +536870912,16,0.193931 +67108864,1,0.064426 +134217728,2,0.0930171 +268435456,4,0.147283 +536870912,8,0.214569 +1073741824,16,0.406287 +134217728,1,0.156894 +268435456,2,0.218598 +536870912,4,0.327473 +1073741824,8,0.45267 +2147483648,16,0.703426 +268435456,1,0.396662 +536870912,2,0.513813 +1073741824,4,0.990692 +2147483648,8,1.48297 +4294967296,16, +536870912,1,0.958219 +1073741824,2,1.20614 +2147483648,4,1.4342 +4294967296,8, +8589934592,16, +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_managed.txt new file mode 100644 index 0000000..8310ccc --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_managed.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0219832 +16777216,2,0.0727163 +33554432,4,0.147128 +67108864,8,0.299238 +134217728,16,0.673503 +16777216,1,0.0553472 +33554432,2,0.146389 +67108864,4,0.284975 +134217728,8,0.548181 +268435456,16,1.15068 +33554432,1,0.0994273 +67108864,2,0.294668 +134217728,4,0.544358 +268435456,8,1.06538 +536870912,16,2.16724 +67108864,1,0.238842 +134217728,2,0.662405 +268435456,4,1.17096 +536870912,8,2.1106 +1073741824,16,4.14013 +134217728,1,0.576579 +268435456,2,1.38175 +536870912,4,2.49457 +1073741824,8,4.48523 +2147483648,16,8.24802 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..a8c8ef4 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0262687 +16777216,2,0.0606628 +33554432,4,0.132055 +67108864,8,0.233272 +134217728,16,0.42504 +16777216,1,0.0499712 +33554432,2,0.11939 +67108864,4,0.229404 +134217728,8,0.422885 +268435456,16,0.738818 +33554432,1,0.100362 +67108864,2,0.241072 +134217728,4,0.480041 +268435456,8,0.769428 +536870912,16,1.38403 +67108864,1,0.23884 +134217728,2,0.51668 +268435456,4,0.978991 +536870912,8,1.55289 +1073741824,16,2.82411 +134217728,1,0.574527 +268435456,2,1.12843 +536870912,4,1.98151 +1073741824,8,3.44955 +2147483648,16,6.04607 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..a8cabf0 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0140145 +16777216,2,0.0440474 +33554432,4,0.080041 +67108864,8,0.162275 +134217728,16,0.393552 +16777216,1,0.0297114 +33554432,2,0.0843284 +67108864,4,0.158408 +134217728,8,0.29462 +268435456,16,0.66044 +33554432,1,0.0702464 +67108864,2,0.181828 +134217728,4,0.310323 +268435456,8,0.560461 +536870912,16,1.16433 +67108864,1,0.171015 +134217728,2,0.413277 +268435456,4,0.655204 +536870912,8,1.17638 +1073741824,16,2.30058 +134217728,1,0.420885 +268435456,2,0.952573 +536870912,4,1.50264 +1073741824,8,2.46576 +2147483648,16,4.42639 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..8edd37e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_managed_32.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0117361 +16777216,2,0.0280863 +33554432,4,0.0505272 +67108864,8,0.10901 +134217728,16,0.270091 +16777216,1,0.0231188 +33554432,2,0.0541307 +67108864,4,0.09762 +134217728,8,0.180542 +268435456,16,0.420773 +33554432,1,0.0552274 +67108864,2,0.117297 +134217728,4,0.193082 +268435456,8,0.336505 +536870912,16,0.679866 +67108864,1,0.14256 +134217728,2,0.280616 +268435456,4,0.415934 +536870912,8,0.673036 +1073741824,16,1.26227 +134217728,1,0.364345 +268435456,2,0.65565 +536870912,4,0.937378 +1073741824,8,1.41996 +2147483648,16,2.51885 +268435456,1,0.900495 +536870912,2,1.6444 +1073741824,4,2.19446 +2147483648,8,3.28565 +4294967296,16, +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..29b1779 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0153027 +16777216,2,0.035116 +33554432,4,0.0679076 +67108864,8,0.128524 +134217728,16,0.229472 +16777216,1,0.0306094 +33554432,2,0.0660408 +67108864,4,0.124288 +134217728,8,0.236119 +268435456,16,0.403732 +33554432,1,0.0666563 +67108864,2,0.13492 +134217728,4,0.258693 +268435456,8,0.431547 +536870912,16,0.728695 +67108864,1,0.171244 +134217728,2,0.303256 +268435456,4,0.502834 +536870912,8,0.827554 +1073741824,16,1.41484 +134217728,1,0.412305 +268435456,2,0.667201 +536870912,4,1.11412 +1073741824,8,1.91398 +2147483648,16,3.24511 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..ec49ae9 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,51 @@ +intersect tests +8388608,1,0.0166717 +16777216,2,0.0248187 +33554432,4,0.0454615 +67108864,8,0.085718 +134217728,16,0.163085 +16777216,1,0.0244019 +33554432,2,0.0434657 +67108864,4,0.0799785 +134217728,8,0.13587 +268435456,16,0.255811 +33554432,1,0.0539709 +67108864,2,0.0888187 +134217728,4,0.157425 +268435456,8,0.24063 +536870912,16,0.419382 +67108864,1,0.137261 +134217728,2,0.204082 +268435456,4,0.31848 +536870912,8,0.51637 +1073741824,16,0.868741 +134217728,1,0.355326 +268435456,2,0.480255 +536870912,4,0.724696 +1073741824,8,1.02292 +2147483648,16,1.62888 +268435456,1,0.874898 +536870912,2,1.12277 +1073741824,4,2.11837 +2147483648,8,2.81142 +4294967296,16, +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_managed.txt new file mode 100644 index 0000000..3420e91 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_managed.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0185682 +33554432,2,0.0669501 +67108864,4,0.132988 +134217728,8,0.269438 +268435456,16,0.559408 +33554432,1,0.0443382 +67108864,2,0.140349 +134217728,4,0.26889 +268435456,8,0.51517 +536870912,16,1.04561 +67108864,1,0.102418 +134217728,2,0.26093 +268435456,4,0.557798 +536870912,8,0.986896 +1073741824,16,2.00255 +134217728,1,0.253509 +268435456,2,0.701043 +536870912,4,1.19875 +1073741824,8,2.11936 +2147483648,16,3.96369 +268435456,1,0.6015 +536870912,2,1.26104 +1073741824,4,2.45516 +2147483648,8,4.4356 +4294967296,16,7.98802 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0219832 +16777216,2,0.0727163 +33554432,4,0.147128 +67108864,8,0.299238 +134217728,16,0.673503 +16777216,1,0.0553472 +33554432,2,0.146389 +67108864,4,0.284975 +134217728,8,0.548181 +268435456,16,1.15068 +33554432,1,0.0994273 +67108864,2,0.294668 +134217728,4,0.544358 +268435456,8,1.06538 +536870912,16,2.16724 +67108864,1,0.238842 +134217728,2,0.662405 +268435456,4,1.17096 +536870912,8,2.1106 +1073741824,16,4.14013 +134217728,1,0.576579 +268435456,2,1.38175 +536870912,4,2.49457 +1073741824,8,4.48523 +2147483648,16,8.24802 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..c760549 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_index_nomanaged.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0202752 +33554432,2,0.0547246 +67108864,4,0.111913 +134217728,8,0.202385 +268435456,16,0.35841 +33554432,1,0.0469064 +67108864,2,0.111024 +134217728,4,0.225743 +268435456,8,0.385973 +536870912,16,0.645602 +67108864,1,0.110916 +134217728,2,0.242035 +268435456,4,0.416214 +536870912,8,0.755983 +1073741824,16,1.29628 +134217728,1,0.258133 +268435456,2,0.530756 +536870912,4,0.858458 +1073741824,8,1.85418 +2147483648,16,2.66905 +268435456,1,0.613959 +536870912,2,1.16148 +1073741824,4,2.31747 +2147483648,8,3.57564 +4294967296,16,6.19864 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0262687 +16777216,2,0.0606628 +33554432,4,0.132055 +67108864,8,0.233272 +134217728,16,0.42504 +16777216,1,0.0499712 +33554432,2,0.11939 +67108864,4,0.229404 +134217728,8,0.422885 +268435456,16,0.738818 +33554432,1,0.100362 +67108864,2,0.241072 +134217728,4,0.480041 +268435456,8,0.769428 +536870912,16,1.38403 +67108864,1,0.23884 +134217728,2,0.51668 +268435456,4,0.978991 +536870912,8,1.55289 +1073741824,16,2.82411 +134217728,1,0.574527 +268435456,2,1.12843 +536870912,4,1.98151 +1073741824,8,3.44955 +2147483648,16,6.04607 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..68b7656 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0109097 +33554432,2,0.0376719 +67108864,4,0.068353 +134217728,8,0.135574 +268435456,16,0.310204 +33554432,1,0.0289137 +67108864,2,0.0835625 +134217728,4,0.145621 +268435456,8,0.265196 +536870912,16,0.550008 +67108864,1,0.0710492 +134217728,2,0.189354 +268435456,4,0.312841 +536870912,8,0.524655 +1073741824,16,1.05892 +134217728,1,0.177973 +268435456,2,0.342451 +536870912,4,0.671905 +1073741824,8,1.08255 +2147483648,16,2.06928 +268435456,1,0.433854 +536870912,2,0.79433 +1073741824,4,1.49553 +2147483648,8,2.21621 +4294967296,16,4.18388 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0140145 +16777216,2,0.0440474 +33554432,4,0.080041 +67108864,8,0.162275 +134217728,16,0.393552 +16777216,1,0.0297114 +33554432,2,0.0843284 +67108864,4,0.158408 +134217728,8,0.29462 +268435456,16,0.66044 +33554432,1,0.0702464 +67108864,2,0.181828 +134217728,4,0.310323 +268435456,8,0.560461 +536870912,16,1.16433 +67108864,1,0.171015 +134217728,2,0.413277 +268435456,4,0.655204 +536870912,8,1.17638 +1073741824,16,2.30058 +134217728,1,0.420885 +268435456,2,0.952573 +536870912,4,1.50264 +1073741824,8,2.46576 +2147483648,16,4.42639 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..40627aa --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_managed_32.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.00832512 +33554432,2,0.0229919 +67108864,4,0.041599 +134217728,8,0.0808253 +268435456,16,0.19252 +33554432,1,0.0201656 +67108864,2,0.0441057 +134217728,4,0.0781742 +268435456,8,0.146914 +536870912,16,0.31458 +67108864,1,0.0619172 +134217728,2,0.100494 +268435456,4,0.188636 +536870912,8,0.323401 +1073741824,16,0.582574 +134217728,1,0.154259 +268435456,2,0.311089 +536870912,4,0.428251 +1073741824,8,0.639237 +2147483648,16,1.15877 +268435456,1,0.386635 +536870912,2,0.752762 +1073741824,4,0.970545 +2147483648,8,1.28242 +4294967296,16, +536870912,1,0.949373 +1073741824,2,1.81308 +2147483648,4,2.43234 +4294967296,8,3.93738 +8589934592,16, +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0117361 +16777216,2,0.0280863 +33554432,4,0.0505272 +67108864,8,0.10901 +134217728,16,0.270091 +16777216,1,0.0231188 +33554432,2,0.0541307 +67108864,4,0.09762 +134217728,8,0.180542 +268435456,16,0.420773 +33554432,1,0.0552274 +67108864,2,0.117297 +134217728,4,0.193082 +268435456,8,0.336505 +536870912,16,0.679866 +67108864,1,0.14256 +134217728,2,0.280616 +268435456,4,0.415934 +536870912,8,0.673036 +1073741824,16,1.26227 +134217728,1,0.364345 +268435456,2,0.65565 +536870912,4,0.937378 +1073741824,8,1.41996 +2147483648,16,2.51885 +268435456,1,0.900495 +536870912,2,1.6444 +1073741824,4,2.19446 +2147483648,8,3.28565 +4294967296,16, +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..f4e9d14 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0119286 +33554432,2,0.0289505 +67108864,4,0.0570644 +134217728,8,0.0997847 +268435456,16,0.179529 +33554432,1,0.0303411 +67108864,2,0.062039 +134217728,4,0.113605 +268435456,8,0.203377 +536870912,16,0.347992 +67108864,1,0.0732068 +134217728,2,0.136252 +268435456,4,0.21679 +536870912,8,0.401846 +1073741824,16,0.631664 +134217728,1,0.182089 +268435456,2,0.310047 +536870912,4,0.431878 +1073741824,8,0.804061 +2147483648,16,1.43775 +268435456,1,0.444469 +536870912,2,0.686124 +1073741824,4,1.35632 +2147483648,8,2.06201 +4294967296,16,3.40746 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0153027 +16777216,2,0.035116 +33554432,4,0.0679076 +67108864,8,0.128524 +134217728,16,0.229472 +16777216,1,0.0306094 +33554432,2,0.0660408 +67108864,4,0.124288 +134217728,8,0.236119 +268435456,16,0.403732 +33554432,1,0.0666563 +67108864,2,0.13492 +134217728,4,0.258693 +268435456,8,0.431547 +536870912,16,0.728695 +67108864,1,0.171244 +134217728,2,0.303256 +268435456,4,0.502834 +536870912,8,0.827554 +1073741824,16,1.41484 +134217728,1,0.412305 +268435456,2,0.667201 +536870912,4,1.11412 +1073741824,8,1.91398 +2147483648,16,3.24511 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..dbcb2c4 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0100372 +33554432,2,0.0184156 +67108864,4,0.0352676 +134217728,8,0.0593203 +268435456,16,0.0990177 +33554432,1,0.02287 +67108864,2,0.0389396 +134217728,4,0.063148 +268435456,8,0.103315 +536870912,16,0.193931 +67108864,1,0.064426 +134217728,2,0.0930171 +268435456,4,0.147283 +536870912,8,0.214569 +1073741824,16,0.406287 +134217728,1,0.156894 +268435456,2,0.218598 +536870912,4,0.327473 +1073741824,8,0.45267 +2147483648,16,0.703426 +268435456,1,0.396662 +536870912,2,0.513813 +1073741824,4,0.990692 +2147483648,8,1.48297 +4294967296,16, +536870912,1,0.958219 +1073741824,2,1.20614 +2147483648,4,1.4342 +4294967296,8, +8589934592,16, +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,1,0.0166717 +16777216,2,0.0248187 +33554432,4,0.0454615 +67108864,8,0.085718 +134217728,16,0.163085 +16777216,1,0.0244019 +33554432,2,0.0434657 +67108864,4,0.0799785 +134217728,8,0.13587 +268435456,16,0.255811 +33554432,1,0.0539709 +67108864,2,0.0888187 +134217728,4,0.157425 +268435456,8,0.24063 +536870912,16,0.419382 +67108864,1,0.137261 +134217728,2,0.204082 +268435456,4,0.31848 +536870912,8,0.51637 +1073741824,16,0.868741 +134217728,1,0.355326 +268435456,2,0.480255 +536870912,4,0.724696 +1073741824,8,1.02292 +2147483648,16,1.62888 +268435456,1,0.874898 +536870912,2,1.12277 +1073741824,4,2.11837 +2147483648,8,2.81142 +4294967296,16, +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2/weak_scaling/weak_scaling.sh b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/weak_scaling.sh new file mode 100755 index 0000000..95c40b8 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2/weak_scaling/weak_scaling.sh @@ -0,0 +1,94 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8 16) +# gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "weak_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + # echo "keycount / dev: ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8 - 1)) ? $kc : 2**($2 * 8 - 1))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + kcdev=$((kcdev / 2)) + # echo "keycount / dev : ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8 - 1)) ? $kc : 2**($2 * 8 - 1))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/duplicate_keys.sh b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/duplicate_keys.sh new file mode 100755 index 0000000..015ad18 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/duplicate_keys.sh @@ -0,0 +1,73 @@ +keycount=33 +gpucount=16 +tablesizes=($(seq 21 1 33)) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,tablesize,gpucount,time" >> $resultsfile +echo "duplicate_keys" +echo "keycount,tablesize,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +keycount=$((echo $keycount - 1) | bc) +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..89df96e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_managed.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..89df96e --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..e5b11b5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,9.19325 +8589934592,4194304,16,8.5119 +8589934592,8388608,16,8.71239 +8589934592,16777216,16,8.39451 +8589934592,33554432,16,9.13138 +8589934592,67108864,16,9.23199 +8589934592,134217728,16,9.25678 +8589934592,268435456,16,8.79075 +8589934592,536870912,16,9.32799 +8589934592,1073741824,16,9.25154 diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..d35f431 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,13 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,5.8532 +8589934592,4194304,16,6.08746 +8589934592,8388608,16,6.37243 +8589934592,16777216,16,5.88324 +8589934592,33554432,16,5.86087 +8589934592,67108864,16,6.38923 +8589934592,134217728,16,5.51001 +8589934592,268435456,16,6.24494 +8589934592,536870912,16,6.85461 +8589934592,1073741824,16,6.20287 diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..3af4c36 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_managed.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..3af4c36 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..6a619ae --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_managed.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,9.19325 +8589934592,4194304,16,8.5119 +8589934592,8388608,16,8.71239 +8589934592,16777216,16,8.39451 +8589934592,33554432,16,9.13138 +8589934592,67108864,16,9.23199 +8589934592,134217728,16,9.25678 +8589934592,268435456,16,8.79075 +8589934592,536870912,16,9.32799 +8589934592,1073741824,16,9.25154 +8589934592,2147483648,16,11.9001 +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,52.1995 +4294967296,1073741824,16,33.2493 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..685e7db --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,30 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +8589934592,2097152,16,5.8532 +8589934592,4194304,16,6.08746 +8589934592,8388608,16,6.37243 +8589934592,16777216,16,5.88324 +8589934592,33554432,16,5.86087 +8589934592,67108864,16,6.38923 +8589934592,134217728,16,5.51001 +8589934592,268435456,16,6.24494 +8589934592,536870912,16,6.85461 +8589934592,1073741824,16,6.20287 +8589934592,2147483648,16,6.70001 +8589934592,16,oom +8589934592,16,oom +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,0 +4294967296,1073741824,16,0 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..17eb3cf --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..17eb3cf --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..2e52e97 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,52.1995 +4294967296,1073741824,16,33.2493 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..e111f25 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,14 @@ +intersect tests +4294967296,2097152,16,0 +4294967296,4194304,16,0 +4294967296,8388608,16,0 +4294967296,16777216,16,0 +4294967296,33554432,16,0 +4294967296,67108864,16,0 +4294967296,134217728,16,0 +4294967296,268435456,16,0 +4294967296,536870912,16,0 +4294967296,1073741824,16,0 +4294967296,16,oom +4294967296,16,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/experiments.lsf b/hash-graph-dehornetify/experiments/dgx2_old/experiments.lsf new file mode 100755 index 0000000..39c88e9 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/experiments.lsf @@ -0,0 +1,91 @@ +#!/bin/bash +#BSUB -P BIF115 +#BSUB -W 2:00 +#BSUB -nnodes 1 +#BSUB -alloc_flags gpumps +#BSUB -J snmg-hg-experiments +#BSUB -o snmg-hg-experiments.%J +#BSUB -e snmg-hg-experiments.%J + +buildpath="../../build" +includepath="../../include" + +declare -a modes=("noindex_nomanaged" "index_nomanaged" "noindex_managed" "index_managed") +# declare -a exps=("strong_scaling" "weak_scaling" "duplicate_keys") +declare -a exps=("strong_scaling" "weak_scaling") + +# 64-bit section +sed -i 's/^#define B32/\/\/&/' $includepath/MultiHashGraph.cuh +keysize=8 +echo "64-bit keys" + +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=16 + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=16 + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.txt + head -n -$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/build/$exp\_$mode.txt + tail -n +$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/intersect/$exp\_$mode.txt + done + done + +# 32-bit section +sed -i 's/^\/\/.*#define B32/#define B32/' $includepath/MultiHashGraph.cuh +keysize=4 +echo "32-bit keys" + +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=12 + continue + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=12 + continue + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode\_32.txt + head -n -$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/build/$exp\_$mode\_32.txt + tail -n +$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/intersect/$exp\_$mode\_32.txt + done + done diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_managed.txt new file mode 100644 index 0000000..58dccc5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_managed.txt @@ -0,0 +1,55 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0296028 +16777216,2,0.0211814 +16777216,4,0.0228475 +16777216,8,0.0461783 +16777216,16,0.124136 +33554432,1,0.0534559 +33554432,2,0.0364984 +33554432,4,0.0404316 +33554432,8,0.0484895 +33554432,16,0.128258 +67108864,1,0.10477 +67108864,2,0.0773919 +67108864,4,0.0601795 +67108864,8,0.0641454 +67108864,16,0.139177 +134217728,1,0.193333 +134217728,2,0.109293 +134217728,4,0.106492 +134217728,8,0.124455 +134217728,16,0.161923 +268435456,1,0.403223 +268435456,2,0.243539 +268435456,4,0.182845 +268435456,8,0.154625 +268435456,16,0.182925 +536870912,1,oom +536870912,2,0.480817 +536870912,4,0.357916 +536870912,8,0.245329 +536870912,16,0.24292 +1073741824,1,oom +1073741824,2,oom +1073741824,4,0.766616 +1073741824,8,0.476475 +1073741824,16,0.420252 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,0.92134 +2147483648,16,0.73098 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,1.34331 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +17179869184,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..5ccba5d --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_index_nomanaged.txt @@ -0,0 +1,55 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.022741 +16777216,2,0.018942 +16777216,4,0.0277197 +16777216,8,0.0688957 +16777216,16,0.234838 +33554432,1,0.0383621 +33554432,2,0.0319109 +33554432,4,0.0385516 +33554432,8,0.073684 +33554432,16,0.238519 +67108864,1,0.0706621 +67108864,2,0.050602 +67108864,4,0.0551864 +67108864,8,0.087724 +67108864,16,0.245663 +134217728,1,0.136801 +134217728,2,0.082772 +134217728,4,0.0808161 +134217728,8,0.115148 +134217728,16,0.271013 +268435456,1,0.268511 +268435456,2,0.152663 +268435456,4,0.113039 +268435456,8,0.145236 +268435456,16,0.302733 +536870912,1,oom +536870912,2,0.307621 +536870912,4,0.220303 +536870912,8,0.211846 +536870912,16,0.358412 +1073741824,1,oom +1073741824,2,oom +1073741824,4,0.384056 +1073741824,8,0.337852 +1073741824,16,0.457368 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,0.639807 +2147483648,16,0.668983 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,1.09689 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +17179869184,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..493d259 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_managed.txt @@ -0,0 +1,55 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0233011 +16777216,2,0.0149637 +16777216,4,0.0101581 +16777216,8,0.00941056 +16777216,16,0.0120607 +33554432,1,0.0420454 +33554432,2,0.0267848 +33554432,4,0.0214405 +33554432,8,0.0144456 +33554432,16,0.0155832 +67108864,1,0.0737126 +67108864,2,0.0497254 +67108864,4,0.0394107 +67108864,8,0.0287048 +67108864,16,0.022187 +134217728,1,0.149059 +134217728,2,0.0848794 +134217728,4,0.0676147 +134217728,8,0.0455834 +134217728,16,0.0316498 +268435456,1,0.296057 +268435456,2,0.150069 +268435456,4,0.129866 +268435456,8,0.111344 +268435456,16,0.0522158 +536870912,1,oom +536870912,2,0.344782 +536870912,4,0.233312 +536870912,8,0.152255 +536870912,16,0.147939 +1073741824,1,oom +1073741824,2,oom +1073741824,4,0.474309 +1073741824,8,0.292828 +1073741824,16,0.145578 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,0.823648 +2147483648,16,0.304572 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,0.673159 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +17179869184,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..562d1ad --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,55 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0182999 +16777216,2,0.0128737 +16777216,4,0.0151777 +16777216,8,0.0353987 +16777216,16,0.124003 +33554432,1,0.0316549 +33554432,2,0.0220211 +33554432,4,0.0249856 +33554432,8,0.0412436 +33554432,16,0.123635 +67108864,1,0.0582021 +67108864,2,0.0384553 +67108864,4,0.0406108 +67108864,8,0.0496108 +67108864,16,0.13178 +134217728,1,0.099458 +134217728,2,0.0639355 +134217728,4,0.0570204 +134217728,8,0.0629524 +134217728,16,0.149638 +268435456,1,0.207813 +268435456,2,0.114447 +268435456,4,0.080724 +268435456,8,0.0887398 +268435456,16,0.159819 +536870912,1,oom +536870912,2,0.219101 +536870912,4,0.155303 +536870912,8,0.124144 +536870912,16,0.189925 +1073741824,1,oom +1073741824,2,oom +1073741824,4,0.325184 +1073741824,8,0.195276 +1073741824,16,0.265833 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,0.384909 +2147483648,16,0.393063 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,0.66076 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +17179869184,1,oom +17179869184,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_managed.txt new file mode 100644 index 0000000..58bc206 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_managed.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.0332902 +8388608,2,0.0282337 +8388608,4,0.0465828 +8388608,8,0.110171 +8388608,16,0.362483 +16777216,1,0.0634706 +16777216,2,0.0444355 +16777216,4,0.054229 +16777216,8,0.114781 +16777216,16,0.359557 +33554432,1,0.109586 +33554432,2,0.0822753 +33554432,4,0.0876564 +33554432,8,0.12997 +33554432,16,0.371552 +67108864,1,0.225124 +67108864,2,0.135093 +67108864,4,0.137595 +67108864,8,0.175028 +67108864,16,0.383305 +134217728,1,0.438625 +134217728,2,0.263342 +134217728,4,0.205079 +134217728,8,0.251693 +134217728,16,0.440024 +268435456,1,oom +268435456,2,0.552923 +268435456,4,0.396929 +268435456,8,0.311404 +268435456,16,0.504994 +536870912,1,oom +536870912,2,oom +536870912,4,0.715746 +536870912,8,0.514472 +536870912,16,0.630289 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,1.19107 +1073741824,16,1.09204 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,1.5707 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..d22fab9 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.025303 +8388608,2,0.0289085 +8388608,4,0.0618701 +8388608,8,0.187703 +8388608,16,0.679554 +16777216,1,0.0487721 +16777216,2,0.0417792 +16777216,4,0.0687923 +16777216,8,0.196258 +16777216,16,0.691112 +33554432,1,0.0771205 +33554432,2,0.07074 +33554432,4,0.0946575 +33554432,8,0.205549 +33554432,16,0.696418 +67108864,1,0.153574 +67108864,2,0.101545 +67108864,4,0.125143 +67108864,8,0.235686 +67108864,16,0.716024 +134217728,1,0.294188 +134217728,2,0.186253 +134217728,4,0.191461 +134217728,8,0.280536 +134217728,16,0.754705 +268435456,1,oom +268435456,2,0.326551 +268435456,4,0.289156 +268435456,8,0.372831 +268435456,16,0.857161 +536870912,1,oom +536870912,2,oom +536870912,4,0.516215 +536870912,8,0.543917 +536870912,16,0.981005 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,0.933815 +1073741824,16,1.27896 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,1.86628 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..55ed5c0 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.02368 +8388608,2,0.0164137 +8388608,4,0.0231926 +8388608,8,0.044161 +8388608,16,0.140991 +16777216,1,0.0500808 +16777216,2,0.0302336 +16777216,4,0.0331008 +16777216,8,0.0515144 +16777216,16,0.137426 +33554432,1,0.0921139 +33554432,2,0.0563671 +33554432,4,0.0545987 +33554432,8,0.056833 +33554432,16,0.162279 +67108864,1,0.168991 +67108864,2,0.104736 +67108864,4,0.0845251 +67108864,8,0.0784486 +67108864,16,0.15598 +134217728,1,0.328196 +134217728,2,0.185333 +134217728,4,0.161569 +134217728,8,0.127795 +134217728,16,0.190407 +268435456,1,oom +268435456,2,0.37553 +268435456,4,0.25179 +268435456,8,0.224412 +268435456,16,0.273172 +536870912,1,oom +536870912,2,oom +536870912,4,0.52832 +536870912,8,0.342413 +536870912,16,0.313677 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,0.781138 +1073741824,16,0.480032 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,0.805078 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..4a6aedc --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.0178647 +8388608,2,0.0199434 +8388608,4,0.0411331 +8388608,8,0.123337 +8388608,16,0.462122 +16777216,1,0.0373729 +16777216,2,0.0279429 +16777216,4,0.0458639 +16777216,8,0.129113 +16777216,16,0.466076 +33554432,1,0.0620022 +33554432,2,0.0516823 +33554432,4,0.0580301 +33554432,8,0.136749 +33554432,16,0.474614 +67108864,1,0.118382 +67108864,2,0.0812513 +67108864,4,0.0839414 +67108864,8,0.154767 +67108864,16,0.475404 +134217728,1,0.213736 +134217728,2,0.126583 +134217728,4,0.135884 +134217728,8,0.192721 +134217728,16,0.510652 +268435456,1,oom +268435456,2,0.235599 +268435456,4,0.220126 +268435456,8,0.245657 +268435456,16,0.565218 +536870912,1,oom +536870912,2,oom +536870912,4,0.347546 +536870912,8,0.419769 +536870912,16,0.666904 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,8,0.636389 +1073741824,16,0.833957 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,16,1.22513 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_managed.txt new file mode 100644 index 0000000..640ef59 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_managed.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,4294967296,16 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,8388608,1 +8388608,8388608,2 +8388608,8388608,4 +8388608,8388608,8 +8388608,8388608,16 +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,1,oom +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,2,oom +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..640ef59 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_index_nomanaged.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,4294967296,16 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,8388608,1 +8388608,8388608,2 +8388608,8388608,4 +8388608,8388608,8 +8388608,8388608,16 +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,1,oom +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,2,oom +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..640ef59 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,4294967296,16 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,16,oom +intersect tests +8388608,8388608,1 +8388608,8388608,2 +8388608,8388608,4 +8388608,8388608,8 +8388608,8388608,16 +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,1,oom +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,2,oom +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,8,oom +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..7bf4792 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_managed_32.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,536870912,1 +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,1073741824,2 +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,2147483648,4 +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,2147483648,8 +4294967296,2147483648,16 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,2147483648,16 +intersect tests +8388608,8388608,1 +8388608,8388608,2 +8388608,8388608,4 +8388608,8388608,8 +8388608,8388608,16 +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,2147483648,16 diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..19cc702 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,6 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0869712 +16777216,2,0.0935405 +16777216,4,0.146539 diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..7bf4792 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,104 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,536870912,1 +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,1073741824,2 +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,2147483648,4 +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,2147483648,8 +4294967296,2147483648,16 +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,8,oom +8589934592,2147483648,16 +intersect tests +8388608,8388608,1 +8388608,8388608,2 +8388608,8388608,4 +8388608,8388608,8 +8388608,8388608,16 +16777216,16777216,1 +16777216,16777216,2 +16777216,16777216,4 +16777216,16777216,8 +16777216,16777216,16 +33554432,33554432,1 +33554432,33554432,2 +33554432,33554432,4 +33554432,33554432,8 +33554432,33554432,16 +67108864,67108864,1 +67108864,67108864,2 +67108864,67108864,4 +67108864,67108864,8 +67108864,67108864,16 +134217728,134217728,1 +134217728,134217728,2 +134217728,134217728,4 +134217728,134217728,8 +134217728,134217728,16 +268435456,268435456,1 +268435456,268435456,2 +268435456,268435456,4 +268435456,268435456,8 +268435456,268435456,16 +536870912,1,oom +536870912,536870912,2 +536870912,536870912,4 +536870912,536870912,8 +536870912,536870912,16 +1073741824,1,oom +1073741824,2,oom +1073741824,1073741824,4 +1073741824,1073741824,8 +1073741824,1073741824,16 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,2147483648,8 +2147483648,2147483648,16 +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,8,oom +4294967296,2147483648,16 diff --git a/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/strong_scaling.sh b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/strong_scaling.sh new file mode 100755 index 0000000..e0b4e62 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/strong_scaling/strong_scaling.sh @@ -0,0 +1,92 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8 16) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "strong_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +# $1 is sizeof(keyval) +# $2 is 32-bit vs 64-bit +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8)) ? $kc : 2**($2 * 8))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + kc=$((kc / 2)) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8)) ? $kc : 2**($2 * 8))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_managed.txt new file mode 100644 index 0000000..9f30f17 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_managed.txt @@ -0,0 +1,55 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0298322 +33554432,2,0.0362977 +67108864,4,0.0671898 +134217728,8,0.0916808 +268435456,16,0.193936 +33554432,1,0.0536771 +67108864,2,0.0683346 +134217728,4,0.105288 +268435456,8,0.140878 +536870912,16,0.261272 +67108864,1,0.0988774 +134217728,2,0.121122 +268435456,4,0.193174 +536870912,8,0.204649 +1073741824,16,0.366627 +134217728,1,0.194202 +268435456,2,0.231426 +536870912,4,0.339146 +1073741824,8,0.591455 +2147483648,16,0.847773 +268435456,1,0.406828 +536870912,2,0.483953 +1073741824,4,0.653965 +2147483648,8,1.29227 +4294967296,16,1.61224 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +17179869184,1,oom +34359738368,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..adb07c1 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_index_nomanaged.txt @@ -0,0 +1,55 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0209367 +33554432,2,0.0291533 +67108864,4,0.0497725 +134217728,8,0.109378 +268435456,16,0.301607 +33554432,1,0.0384215 +67108864,2,0.057174 +134217728,4,0.0769597 +268435456,8,0.144017 +536870912,16,0.360753 +67108864,1,0.0690063 +134217728,2,0.0812861 +268435456,4,0.133393 +536870912,8,0.19951 +1073741824,16,0.47268 +134217728,1,0.131793 +268435456,2,0.204273 +536870912,4,0.230898 +1073741824,8,0.349207 +2147483648,16,0.665837 +268435456,1,0.274487 +536870912,2,0.299337 +1073741824,4,0.418372 +2147483648,8,0.591882 +4294967296,16,1.19747 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +17179869184,1,oom +34359738368,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..f6e100d --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_managed.txt @@ -0,0 +1,55 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0213699 +33554432,2,0.0249836 +67108864,4,0.0508662 +134217728,8,0.0450826 +268435456,16,0.0707983 +33554432,1,0.0422175 +67108864,2,0.0604733 +134217728,4,0.0664504 +268435456,8,0.0807475 +536870912,16,0.132561 +67108864,1,0.0800297 +134217728,2,0.0872991 +268435456,4,0.134306 +536870912,8,0.12742 +1073741824,16,0.162735 +134217728,1,0.142644 +268435456,2,0.152611 +536870912,4,0.257606 +1073741824,8,0.228059 +2147483648,16,0.334188 +268435456,1,0.29696 +536870912,2,0.304001 +1073741824,4,0.595716 +2147483648,8,0.70221 +4294967296,16,0.748408 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +17179869184,1,oom +34359738368,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..6e2cd6f --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,55 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0164301 +33554432,2,0.0206705 +67108864,4,0.0370903 +134217728,8,0.0661299 +268435456,16,0.161486 +33554432,1,0.0315709 +67108864,2,0.037847 +134217728,4,0.0567747 +268435456,8,0.0933038 +536870912,16,0.224515 +67108864,1,0.05312 +134217728,2,0.0600607 +268435456,4,0.0893553 +536870912,8,0.130834 +1073741824,16,0.252743 +134217728,1,0.0996424 +268435456,2,0.110338 +536870912,4,0.146989 +1073741824,8,0.204247 +2147483648,16,0.400767 +268435456,1,0.202672 +536870912,2,0.238949 +1073741824,4,0.298118 +2147483648,8,0.436054 +4294967296,16,0.692522 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +17179869184,1,oom +34359738368,2,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_managed.txt new file mode 100644 index 0000000..fa71425 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_managed.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.032002 +16777216,2,0.0439511 +33554432,4,0.0778803 +67108864,8,0.185199 +134217728,16,0.4344 +16777216,1,0.0640133 +33554432,2,0.0760402 +67108864,4,0.127516 +134217728,8,0.219534 +268435456,16,0.501742 +33554432,1,0.109628 +67108864,2,0.134808 +134217728,4,0.2393 +268435456,8,0.374207 +536870912,16,0.791305 +67108864,1,0.215864 +134217728,2,0.258725 +268435456,4,0.395774 +536870912,8,0.504975 +1073741824,16,0.93146 +134217728,1,0.439924 +268435456,2,0.551598 +536870912,4,0.825741 +1073741824,8,1.15351 +2147483648,16,1.50097 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..3079e42 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.0252119 +16777216,2,0.0423301 +33554432,4,0.0914565 +67108864,8,0.246365 +134217728,16,0.76514 +16777216,1,0.0472453 +33554432,2,0.0619878 +67108864,4,0.122486 +134217728,8,0.301793 +268435456,16,0.837675 +33554432,1,0.0755016 +67108864,2,0.102109 +134217728,4,0.191248 +268435456,8,0.393048 +536870912,16,1.00322 +67108864,1,0.144906 +134217728,2,0.170739 +268435456,4,0.312002 +536870912,8,0.541097 +1073741824,16,1.288 +134217728,1,0.297868 +268435456,2,0.337736 +536870912,4,0.550718 +1073741824,8,0.928493 +2147483648,16,1.92879 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..5bcdf3d --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.0234127 +16777216,2,0.0299008 +33554432,4,0.048171 +67108864,8,0.0808899 +134217728,16,0.20291 +16777216,1,0.0497531 +33554432,2,0.0564255 +67108864,4,0.0967403 +134217728,8,0.151015 +268435456,16,0.265727 +33554432,1,0.0914371 +67108864,2,0.110712 +134217728,4,0.149991 +268435456,8,0.239398 +536870912,16,0.360778 +67108864,1,0.161769 +134217728,2,0.182073 +268435456,4,0.251822 +536870912,8,0.30669 +1073741824,16,0.595355 +134217728,1,0.328665 +268435456,2,0.380084 +536870912,4,0.54903 +1073741824,8,0.634734 +2147483648,16,1.33727 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..e6a0000 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,56 @@ +intersect tests +8388608,1,0.0192655 +16777216,2,0.0297472 +33554432,4,0.0624876 +67108864,8,0.16026 +134217728,16,0.513717 +16777216,1,0.0372326 +33554432,2,0.0455895 +67108864,4,0.0923013 +134217728,8,0.192305 +268435456,16,0.562518 +33554432,1,0.0594627 +67108864,2,0.0755589 +134217728,4,0.132043 +268435456,8,0.256164 +536870912,16,0.657183 +67108864,1,0.108893 +134217728,2,0.129621 +268435456,4,0.205032 +536870912,8,0.381038 +1073741824,16,0.828546 +134217728,1,0.222663 +268435456,2,0.244882 +536870912,4,0.353723 +1073741824,8,0.578017 +2147483648,16,1.23607 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_managed.txt new file mode 100644 index 0000000..c568831 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_managed.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,4294967296,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..c568831 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_index_nomanaged.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,4294967296,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..c568831 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,4294967296,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..f781da2 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_managed_32.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,2147483648,16 +536870912,536870912,1 +1073741824,1073741824,2 +2147483648,2147483648,4 +4294967296,2147483648,8 +8589934592,2147483648,16 +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,2147483648,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..c568831 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,4294967296,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,1,oom +536870912,2,oom +1073741824,4,oom +2147483648,8,oom +4294967296,16,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..f781da2 --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,104 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,2147483648,16 +536870912,536870912,1 +1073741824,1073741824,2 +2147483648,2147483648,4 +4294967296,2147483648,8 +8589934592,2147483648,16 +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +68719476736,8,oom +137438953472,16,oom +intersect tests +8388608,8388608,1 +16777216,16777216,2 +33554432,33554432,4 +67108864,67108864,8 +134217728,134217728,16 +16777216,16777216,1 +33554432,33554432,2 +67108864,67108864,4 +134217728,134217728,8 +268435456,268435456,16 +33554432,33554432,1 +67108864,67108864,2 +134217728,134217728,4 +268435456,268435456,8 +536870912,536870912,16 +67108864,67108864,1 +134217728,134217728,2 +268435456,268435456,4 +536870912,536870912,8 +1073741824,1073741824,16 +134217728,134217728,1 +268435456,268435456,2 +536870912,536870912,4 +1073741824,1073741824,8 +2147483648,2147483648,16 +268435456,268435456,1 +536870912,536870912,2 +1073741824,1073741824,4 +2147483648,2147483648,8 +4294967296,2147483648,16 +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +4294967296,8,oom +8589934592,16,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +8589934592,8,oom +17179869184,16,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +17179869184,8,oom +34359738368,16,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +34359738368,8,oom +68719476736,16,oom diff --git a/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/weak_scaling.sh b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/weak_scaling.sh new file mode 100755 index 0000000..1606cec --- /dev/null +++ b/hash-graph-dehornetify/experiments/dgx2_old/weak_scaling/weak_scaling.sh @@ -0,0 +1,94 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 8 16) +# gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "weak_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + # echo "keycount / dev: ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8)) ? $kc : 2**($2 * 8))) + fi + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + kcdev=$((kcdev / 2)) + # echo "keycount / dev : ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + let ts=$kc + if [ $2 -eq 4 ] ; then + ts=$(($kc < (2**($2 * 8)) ? $kc : 2**($2 * 8))) + fi + + # # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 32) / 32") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + # ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_managed.pdf b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_managed.pdf new file mode 100644 index 0000000..7759f3f Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_nomanaged.pdf new file mode 100644 index 0000000..746788b Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_build_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_managed.pdf b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_managed.pdf new file mode 100644 index 0000000..ede5aca Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_nomanaged.pdf new file mode 100644 index 0000000..b2e2f16 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_strong_scaling_intersect_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_managed.pdf b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_managed.pdf new file mode 100644 index 0000000..2750aa0 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_nomanaged.pdf new file mode 100644 index 0000000..f95f4c4 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_build_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_managed.pdf b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_managed.pdf new file mode 100644 index 0000000..50807e8 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_nomanaged.pdf new file mode 100644 index 0000000..3363f84 Binary files /dev/null and b/hash-graph-dehornetify/experiments/dgx2_weak_scaling_intersect_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/snmg-hg.ipynb b/hash-graph-dehornetify/experiments/snmg-hg.ipynb new file mode 100644 index 0000000..2ecb409 --- /dev/null +++ b/hash-graph-dehornetify/experiments/snmg-hg.ipynb @@ -0,0 +1,622 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['strong_scaling', 'weak_scaling']\n", + "['build', 'intersect']\n", + "['noindex_nomanaged', 'index_nomanaged', 'noindex_managed', 'index_managed']\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "import sys\n", + "import matplotlib\n", + "import matplotlib.pyplot as plt\n", + "import json\n", + "import glob\n", + "import itertools\n", + "\n", + "from collections import defaultdict\n", + "\n", + "# EXPERIMENTS = [\"strong_scaling\", \"weak_scaling\", \"duplicate_keys\"]\n", + "EXPERIMENTS = [\"strong_scaling\", \"weak_scaling\"]\n", + "EXP_TYPES = [\"build\", \"intersect\"]\n", + "MODES = [\"noindex_nomanaged\", \"index_nomanaged\", \"noindex_managed\", \"index_managed\"]\n", + "\n", + "SYSTEMS = [\"dgx2\", \"summit\"]\n", + "\n", + "# data[EXPERIMENT][EXP_TYPE][MODE]\n", + "results = dict()\n", + "\n", + "%matplotlib inline\n", + "from six import iteritems\n", + "from matplotlib.lines import Line2D\n", + "\n", + "fmarkers=Line2D.filled_markers\n", + "\n", + "plt.style.use('ggplot')\n", + "\n", + "print(EXPERIMENTS)\n", + "print(EXP_TYPES)\n", + "print(MODES)\n", + "\n", + "for sys in SYSTEMS:\n", + " results[sys] = dict()\n", + " for exp in EXPERIMENTS:\n", + " results[sys][exp] = dict()\n", + " for exp_type in EXP_TYPES:\n", + " results[sys][exp][exp_type] = dict()\n", + " for mode in MODES:\n", + " results[sys][exp][exp_type][mode] = defaultdict(list)\n", + " # strong_scaling keycount --> tuples of (gpucount, time)\n", + " # weak_scaling keycount / dev --> tuples of (gpucount, time)\n", + " # duplicate_keys keycount --> tuples of (tablesize, time)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "def loadStrongScaling(system):\n", + " exp = \"strong_scaling\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " gpucount = int(line_info[1])\n", + "\n", + " if line_info[2] == \"oom\" or line_info[2] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][keycount].append((gpucount, time))\n", + " \n", + " \n", + "def loadWeakScaling(system):\n", + " exp = \"weak_scaling\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " gpucount = int(line_info[1])\n", + " \n", + " key_per_dev = int(keycount / gpucount)\n", + "\n", + " if line_info[2] == \"oom\" or line_info[2] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][key_per_dev].append((gpucount, time))\n", + " \n", + " \n", + "def loadDuplicateKeys(system):\n", + " exp = \"duplicate_keys\"\n", + " for exp_type in EXP_TYPES:\n", + " for mode in MODES:\n", + " file_name = \"./\" + system + \"/\" + exp + \"/results/\" + exp_type + \"/\" + exp + \"_\" + mode + \".txt\"\n", + " with open(file_name, \"r\") as data:\n", + " for line in data:\n", + " if exp in line or \"keycount\" in line or \"tests\" in line:\n", + " continue\n", + "\n", + " line_info = line.strip(\"\\n\").split(\",\")\n", + "\n", + " keycount = int(line_info[0])\n", + " tablesize = int(line_info[1])\n", + " gpucount = int(line_info[2])\n", + " \n", + " if line_info[3] == \"oom\" or line_info[3] == '0':\n", + " continue\n", + "\n", + " time = float(line_info[2])\n", + "\n", + " results[system][exp][exp_type][mode][keycount].append((tablesize, time))\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "for system in SYSTEMS:\n", + " loadStrongScaling(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "for system in SYSTEMS:\n", + " loadWeakScaling(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'duplicate_keys'", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0msystem\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mSYSTEMS\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mloadDuplicateKeys\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m\u001b[0m in \u001b[0;36mloadDuplicateKeys\u001b[0;34m(system)\u001b[0m\n\u001b[1;32m 68\u001b[0m \u001b[0mtime\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mline_info\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 70\u001b[0;31m \u001b[0mresults\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0msystem\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mexp\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mexp_type\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mmode\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mkeycount\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtablesize\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 71\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 72\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mKeyError\u001b[0m: 'duplicate_keys'" + ] + } + ], + "source": [ + "for system in SYSTEMS:\n", + " loadDuplicateKeys(system)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "9\n", + "16777216 [(1, 0.0126341), (2, 0.0156662), (4, 0.0180244), (8, 0.0218276), (16, 0.0320809)]\n", + "33554432 [(1, 0.0303186), (2, 0.0292536), (4, 0.0320338), (8, 0.0314122), (16, 0.0497111)]\n", + "67108864 [(1, 0.0733604), (2, 0.0617933), (4, 0.0593879), (8, 0.0592865), (16, 0.0660326)]\n", + "134217728 [(1, 0.188676), (2, 0.137769), (4, 0.116816), (8, 0.100187), (16, 0.0940442)]\n", + "268435456 [(1, 0.443171), (2, 0.313036), (4, 0.251006), (8, 0.197405), (16, 0.185187)]\n", + "536870912 [(2, 0.688961), (4, 0.500017), (8, 0.389059), (16, 0.342239)]\n", + "1073741824 [(4, 1.45416), (8, 0.823694), (16, 0.700535)]\n", + "2147483648 [(8, 2.19697), (16, 1.58418)]\n", + "4294967296 [(16, 3.42857)]\n", + "here3?\n" + ] + } + ], + "source": [ + "print(len(results[\"dgx2\"][\"strong_scaling\"][\"build\"][\"noindex_nomanaged\"].items()))\n", + "for k, v in results[\"dgx2\"][\"strong_scaling\"][\"build\"][\"noindex_nomanaged\"].items():\n", + " print(k, v)\n", + "print(\"here3?\")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "def get_cmap(n, name='hsv'):\n", + " '''Returns a function that maps each index in 0, 1, ..., n-1 to a distinct \n", + " RGB color; the keyword argument name must be a standard mpl colormap name.'''\n", + " return plt.cm.get_cmap(name, n)\n", + "\n", + "def plotThroughput(system, exp, exp_type, mode):\n", + " fig, axes = plt.subplots(nrows=1,sharex=True)\n", + " # fig.set_size_inches(5,3)\n", + "\n", + " plot_data = results[system][exp][exp_type][mode]\n", + " \n", + " cmap = get_cmap(len(plot_data.items()))\n", + " for i, data in enumerate(plot_data.items()):\n", + " keyc = data[0]\n", + " data_points = data[1]\n", + " x = [i[0] for i in data_points]\n", + " y = [i[1] for i in data_points]\n", + " \n", + " for j in range(len(y)):\n", + " if exp == \"weak_scaling\":\n", + " y[j] = (keyc * x[j]) / y[j]\n", + " else:\n", + " y[j] = keyc / y[j]\n", + " \n", + " label = \"2^\" + str(int(np.log(keyc) / np.log(2)))\n", + " axes.plot(x, y, label=label,\n", + " color=cmap(i),\n", + " marker=fmarkers[i])\n", + " \n", + " axes.set_ylim(ymin=0)\n", + " \n", + " #axes.plot(threads, threads, label=\"Ideal\",color=plt.rcParams['axes.color_cycle'][4])\n", + " #axes.set_yticks(np.arange(0, 5.0, 0.5))\n", + "\n", + " plt.title(system + \" \" + exp + \" \" + exp_type + \" \" + mode, pad=20)\n", + " plt.legend(loc='upper left', ncol=1, fancybox=True, shadow=True, fontsize=11)\n", + " \n", + " axes.set_ylabel(\"Keys / sec.\",fontsize=8) \n", + " axes.set_xlabel(\"GPU Count\",fontsize=8) \n", + " #plt.xticks(fontsize=8)\n", + " #plt.yticks(fontsize=8)\n", + " \n", + " title = system + \"_\" + exp + \"_\" + exp_type + \"_\" + mode\n", + " plt.savefig(title + \".pdf\", format=\"pdf\");\n", + " plt.show()\n", + " \n", + "def plotSpeedup(system, exp, exp_type, mode):\n", + " fig, axes = plt.subplots(nrows=1,sharex=True)\n", + " # fig.set_size_inches(5,3)\n", + "\n", + " plot_data = results[system][exp][exp_type][mode]\n", + " \n", + " cmap = get_cmap(len(plot_data.items()))\n", + " for i, data in enumerate(plot_data.items()):\n", + " keyc = data[0]\n", + " data_points = data[1]\n", + " x = [i[0] for i in data_points]\n", + " y = [i[1] for i in data_points]\n", + " \n", + " # compute keys / sec throughput (higher is better)\n", + " for j in range(len(y)):\n", + " if exp == \"weak_scaling\":\n", + " y[j] = (keyc * x[j]) / y[j]\n", + " else:\n", + " y[j] = keyc / y[j]\n", + " \n", + " # compute speedup of throughput\n", + " for j in range(len(y)):\n", + " if j == 0:\n", + " continue\n", + " y[j] = y[j] / y[0]\n", + " y[0] = 1.0\n", + " \n", + " label = \"2^\" + str(int(np.log(keyc) / np.log(2)))\n", + " axes.plot(x, y, label=label,\n", + " color=cmap(i),\n", + " marker=fmarkers[i])\n", + " \n", + " axes.set_ylim(ymin=0)\n", + " \n", + " #axes.plot(threads, threads, label=\"Ideal\",color=plt.rcParams['axes.color_cycle'][4])\n", + " #axes.set_yticks(np.arange(0, 5.0, 0.5))\n", + "\n", + " plt.title(system + \" \" + exp + \" \" + exp_type + \" \" + mode, pad=20)\n", + " plt.legend(loc='upper left', ncol=1, fancybox=True, shadow=True, fontsize=11)\n", + " \n", + " axes.set_ylabel(\"Speedup\",fontsize=8) \n", + " axes.set_xlabel(\"GPU Count\",fontsize=8) \n", + " #plt.xticks(fontsize=8)\n", + " #plt.yticks(fontsize=8)\n", + " \n", + " # plt.savefig(mytitle + \".pdf\", format=\"pdf\");\n", + " plt.show()\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unmanaged Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd3wc1bX4v2e2Slr15op7NxgwxhgCNt30vqYlgeRBIEDgkfcDkhDgwYPwEiDwqCEQignBCzGBUAyE0IvBFINlbOMKbuptpe1zf3/MaLVadWnXkuz5+iPvzsydO2dm79xz7zn3nitKKSwsLCws9my0gRbAwsLCwmLgsZSBhYWFhYWlDCwsLCwsLGVgYWFhYYGlDCwsLCwssJSBhYWFhQVDXBmIyE0isn6g5bDoO8m/oYhcICLRgbh2P/JZICJKREaZ22PN7R90c95mEbm+v9fvgXwpe6bmfZ2firwsdh09KWtDWhmkAxHJE5G7RaRMRJpEZKeI/F1EpvYz30dE5O0Uibk7swQYOdBC9JIPgeHA9oEWpBOG4jO12MVYyqA9w4FxwA3A/sCJgAf4t4jkp/viIuJM9zUGM0qpgFKqfKDl6A1KqbBSaqdSSh9oWTpiKD5Ti13PkFEGIuISkQdFpF5EakXkQcCVlEYTkdtEpFJE/CLyjIhc1dJFFoOXReRTEXEknPMvEflAROxKqW+UUqcopf6ulFqrlFoBnIuhJDrt9ouIQ0TuEpGtIhISkR0i8ox57Cbgp8B8s5utROQC85gSkV+IyNMiUg/81dw/xZTVb/79U0QmJlzvAhGJisghIvK5iDSb9zU7Sa6jRORrEQmKyFci0iJDj7r6InKKiHxh5l8nIp+IyH4JxyeIyLMiUmOm+UpETjSP5YvIUyLynYgERGStiPxSRKSL67UxaeyK+xSRc0Vko3nuv0RkXMKxdqYkEfmBmfdYc7uNmaiTa8wSkQ/Na6wTEW8P5OrpvR8kIu+az7jWLEslyfn0Id/DzWfZ8kwP70DGUhF5XIx3rtF8jw5LOH6tWW7GJuy7UUSqu3peCWlvEpH1ZjlcI0Zv/S0RmZCU7ngR+UyMd69CRB4QkayE44+bv+0VYryjfjF66w4RuUREtpjP7mFJaJCJyNEi8rZZvutF5B0ROTDp2kpEfi4ii81n8L2IXJOU5lwRWW7mUSXGuz05Kc1+IvJxQhk5U5LMOyLiEZF7RGSb+bt9ISKnJ+XT67IGgFJqSPwBfwQqgFOAqcAdQAOwPiHN1YAf+CEwydyuAaIJaYqBbcAd5vZvzDR7dXHt8YAC5nWR5mpgK7AA2AuYA1xlHvNgVPIfAsPMvwzzmAKqgSuACcBkIAPYArwJzDb/3gLWA07zvAsAHXgXONR8Jq8DGwC7mWYk0Aw8AkwHjgQ+M695fg+e+TAgDFyD0VuahqEY9044Xg78C0NRTjB/n+MTjl+L0cMaB5xv/j4XJlzjpqTf8IKk3ytt92leuwl43/y95gDLgZWAdCSfue8HZt5jze0F5vYoc3usuf0DczsDo8y9AswC5gGfmjJf34V8Pbn3YRjvwdPA3qZsXwHv9fOZjjCfzWPmMz3azDf+TM37Wg38HTgAmIjxPoWAaWYaAV4DPgLs5vUiwMk9fO9bfqNlGO/BLOAL4J2ENPsAUYw6YhpwHPAdsDghzeNAPfCEmeZkIGj+Jk+a93giEAAuTTjvNOAsjPdyBkYZqwEKE9IojPfgIox34BfmvsMT0lxo5j8B2A94EfiW1vc5E9gB/NO8n4Mw6ot4GTGf5VvA2+bvPB64GOMdPbI/ZU0pNTSUAZBl/nAXJe1fQduKZBtwS1KaZ0h4Ecx9h5uF50azYJ7exbVtZkH8BNC6SHcP8G/MSqSD448Ab3ewXwGPJu37qfnjFSXsKzUL6o8SXmgF7J+Q5iBz3xRz+1ZgM2BLSLOQniuD/Uio9Do4fguwE8jqxW95D/BGwvZNdK8M0nKf5rUVMDFh32Rz31EdyWfu660y+A8MJZifkMdMM013yqC7e78FoxHiTEgzy0xzWD+e6f9gNEjsCWlOpK0yuMC8tj1J7n8Ddydsl2BUdA8A3wP39KK83ITxrhYn7DsbQ5m5ze3FwCdJ551iphljbj+O0ZhMfE4vA1WAK2HfC8BzXcijAbXAeUnv8P8lpVsD/K6LfArM8w4xty8yy0huQpqpiWXELGfBxDTm/r8A/+hPWVNKDRkz0QQMk9CHSfvfb/kiIjkYrZmPk9J8lJyZUuot4E6MgvaIUmppRxcVERtGq2EyhsLoyib8GEbLbL2IPCQiZ0jP7f+fJG3PAFYrpaoSZC4H1prH4rsxWrEtbDM/S83P6cCnSqlYQpp2z6MLvsJo1a0SkedF5EoRGZ1wfDbwoVKqqaOTxTDBXSciX5pdYz9wCTCmFzJAeu+zUikVNwMppdZhVBDTeyljV0wHvlFK1SZcZxVGS7U7urv3GcDHSqlwQt4rzbwTy0pv852OUcEmjkJ6n7bMweiZ1EmrOdOP0fqflCBPBfAT4FKMXvA19I7tSqnKJFkFQ8mAcZ/vJp3zjpkm8Xf8JvE5YTRk1iqlQkn7Ek1s40zzz3oRacDoheXSvgx/mbS9jdZniYjsa75Dm0SkEaPnQkI+LWUkXiaUUmuAuoQ85wBOYFvS8z6f1ufd57Jm7y7BIKHFxqz6mcZIaFTyhwAxYKKIiDJVaEIaJ/A3jFbWAqXU1q7yVEp9KYat+WiMnsc9wC0icpBSqqEbkTqqTDu6D0narydVgC3HtA72dZVvhyilYiJyHEYhPAo4A7hdRM5SSr3Ug/x+CfwKw4T2OdAI/CdwQk9lMEnrfXZAok9DT9oGcPQhv77K05d7725/T/LtSObkbQ34BsOUkkxz0vZ8jPetFKMyrehCtmTCSdt9fQaRDo51tC8x35cwGgeXYfRqwhhKMbmh15GMGoCIZGKY4d7HUIo7zTRlSfl0V0Y0jEp9TgfHWq7f57I2VHoG6zFu9pCk/Qe3fDE16nYMG1kiB3WQ300Yrf1DMFq31yYeNH+8FzG07GFKqe+SM+gIpZRfKfW8UuoXGDbUaRgvAab8tp7kg1FIZohIUYJMpabMZT3MAwx77hxT+bWQ/Hy6RBl8opS6TSl1GEaL60Lz8GfAIYmOuiQOA5YppR5VSn1htsAndZK2P/TnPosTnZGmU68Qo5IDo9IqScp7/17KVwZMF5G8hOvMwKgU+0sZMC/J6TnLzLs3ZaWjfOcm3XfyAIoVGHbrBqXU+qS/+DBbETkK+C8MO/0W4AmRzgcR9FHW+Un75mNUiqv7mqmIFGLUAbcrpV5TSq3GMNOUdH1mO6Zh+Cp/o5R6Syn1DZBP20bGamCaiMTLhIhMAfIS0qwwt90dPO+WOqrPZW1IKAPTDPEQ8D8icrIYI21+j2FTS+RO4CoROU9EJonIVcAxJGhKEZmP0Vr9sVJqOYat7mYROcg8no1hGpkCLAJ0ERlm/mV0JqOI/D/zujPMHsJPMFpC68wkm4Cp5vEiEXF1lheGM7ASWCIi+4sxyuMZjK7nku6fWJwHMFpiD4rINDFGg9xqHutJD+pgEfmtiMwVkb1E5EgM51bLC/YARhl6QYyRKeNE5ESzNwGGWWuBGKNSJovI/wBzeyF/T+nPfTYDj4nIbBE5AMPB+DWGUxwMh10mRi9vgoichdFK7A1PY/SKnjJHehyEYecN9DKfjrgPyAEeF5GZYkx0Wwy8r5R6rx/5PohRgT1sPtMjaX2mLfwVo1y/LCLHiDHZbq6I/EpETgUQkWJTnjuUUq8A52A04q7uh2zJ/AHYX4zRfFNFZCFwL/DXnjbkOqEW4z28yCy/8zCsBb393bZgONWvMMvQkRiWg8Sy+VcMW/+TIrKPiMwFHjWv1ZLu3xjlcqmInCYi481ye4WIXGSm6XNZGxLKwOQ64B+YziIMDXl/Upq7MV6OezBGHByEoSCCACJSADyF4cB6FUAp9XcMe//fTK08G6MFNBbDproj4W9RF/I1YBTwjzAqk9OAM5RSa83jj2J49T/EKGDndJaRUiqAocRCGLbQdzBMSQuTbJ5dopTahtEaOxjDpnkP0DJMLdiDLOoxWtgvYIx8+AtGob3FzH8HxrNqxBi9UIZRYbS0eG4xZX8B47nkA//XU/l7Sj/vcwfwMMaImA8wXprTWsyG5u93EYbTchWGkv91L+VrBo7H6HF8gvEMW0bH9QvTl3QMMAqjfL1kynlGP/PdBpwEHEjrM706KU0QowW+AuMdWgcsNc/ZYrb+H8eoDH9rnrMJw290m6l8+41S6iuM338+xju7GMM5fEk/89UxRhJNwPCfPY5Rx+zoZT5VGHb9ozHekTswekp6QpqWMlKK8Ts+ZV7Lj1mGzTJ5MsYzvgvDSf0yhtl1Q1I+vS5rLcPndltE5C/ALKXU7G4T7wGIMQb8HWAfpdTXAy1PuthT7tNi90VExmCMkjtZKfXPdF9vqDiQe4SIjMBokb+FYaI5CfgRcPlAyjWQiMilGK2l7Rj2zz8Cy3e3CnJPuU+L3RcxJkhuwzC9jQF+j9Gren1XXH8omYl6QgyjW/c+hpnoRxgTSB4aUKkGljEY/oa1GHbg9zBH84jIrxOHqCX/DaDMfaHT+7QYnJh+qE7Ln4icN9Ay7mIKMeYjrcHwTXyHMYAl1OVZKWK3NxNZdI7pQyno7Hji+HsLi1QjInYM31xnlCulGneROHs8ljKwsLCwsNjtzEQWFhYWFn3AUgYWFhYWFpYysLCwsLCwlIGFhYWFBUNwnoHX6/0LRijdCp/PN7ObtGMwZs0WY8QgP9/n83UZcM7CwsJiT2Qo9gwex4hV3xPuAJ70+Xz7ADcDv0uXUBYWFhZDmSHXM/D5fO96vd6xifu8Xu8EjDhFxRiBxy7y+XxrMGai/qeZ7C2M2EYWFhYWFkkMxZ5BRzwMXOHz+WZjBIB6wNy/ktaAXacB2V6vt3AA5LOwsLAY1Ay5nkEyXq/XgxGt8lmvN77uc0t46P8C7vN6vRdgRP/chrGEnoWFhYVFAkNeGWD0bup8Pt++yQd8Pt924HSIK40zfD5fT5YatLCwsNijGPJmIp/P1wBs8nq9ZwF4vV7xer2zzO9FXq+35R5/hTGyyMLCwsIiiSEXm8jr9f4NWAAUAeXAjRgrAD0IDMdYn/YZn893s9frPRNjBJHCMBNd5vP5dkkEQAsLC4uhxJBTBhYWFhYWqWfIm4ksLCwsLPpPWhzIPZkl7PV6F2Cs8ekAqnw+3/weZG11YywsLCz6hnR1MF2jiR7HWJj+yY4Oer3ePIy5AAt9Pt93Xq+3pKcZb9++PSUCpoKioiKqqqoGWoxOGezyweCXcbDLB5aMqWCwywf9k3HEiBHdpkmLmcjn872LEQuoM84Flvp8vu/M9BXpkMPCwsLComcM1DyDyYDD6/W+DWQD9/h8vs56ERcDFwP4fD6Kiop2mZDdYbfbB5U8yQx2+WDwyzjY5QNLxlQw2OWD9Ms4UMrADswGjgQygI+8Xu/HPp9vXXJCn8/3MEa4CQA1mLpyg71rOdjlg8Ev42CXDywZU8Fglu+DtU5++EAhoYjgcjhY/PNqDpkS7lUeA2Ym6gFbgWU+n6/J5/NVYcwBmDVAslhYWFgMSj5Y6+THDxYQihi+31BE+PGDBXyw1pnyaw1Uz+AFjJhBdsAJzAX+2NfMlFI0NTWxq+dMBAIBotHBF+pIRMjKyhpoMSwsLPrJD+8vJBRtOwgoENb44QOFbLxnR0qvla6hpfFZwl6vdyvGLGEHgM/ne8jn833j9XqXAV8BOvCIz+db1dfrNTU14XK5cDgc/Rd+NyAUCrFt2zZLIVhYDAGUgooGjfU77awvt7PB/Fxfbm+nCFpo6SmkkrQoA5/Pd04P0vwB+EMqrqeUshRBAi6Xi2AwyLPPPsuhhx5KTk7OQItkYbHHE47Clkqzom+p+M3vjcFWi32mS2dCSZQDJ4R5sc5GTG9f8bscqbeC7A5RSy06wGazYbPZ+OKLL5g/vyfz+SwsLFJBbZOwfmdLRe9gQ4WN9TsdbKlqW7EPy4sxsTTKGXMDTCyNMGFYlImlUYbn6YiZbPydbiK17ZWBI99SBha9QEQGpU/DwmKoE9Nha42tvWlnp51qvy2ezmlXjCuOMnVkhBP3DzCxNMrEYVEmlEbxuLuv0CO1HY/x6Wx/f9hjlUHG0qVk3347tu3biY0YQeN11xE4/fSU5F1TU8OVV17J5s2bcblcjB07lv/93/+lsLB1kbXbbruNP/3pT1x55ZVcffXV8f26rvOzn/2MNWvW4HK5KCoq4vbbb2fs2LFtrnHXXXdx55138uabbzJ16tSUyG1hYdGWpqCwsSLJtLPTzsaKtvb8Ao/Ryj92VpAJpUZlP7E0yujCGHZbFxfoAqWn6CZ6yB6pDDKWLiX3mmvQAgEA7Nu2kXvNNQApUQgiwqWXXsrBBx8MwC233MJtt93GnXfeCcAdd9zBypUr+fDDD7niiitwuVxcdtll8fPPOussjjrqKDRN47HHHuOaa67B5/PFj3/99dd8/vnnjBw5st+yWljs6SgF22rgkzXOuA1/g+nA3V7bWkVqotiryKj0508PtWnlF3j6V3PHwlC1zk55mYOKVQ7js2zX+kF3O2WQc8MNOFav7jKN87PPkHDbSRtaIEDeL39J5tNPd3peZPp0Gm6+uVsZ8vPz44oAYP/99+fJJ40J1vfffz8bNmxg8eLFOJ1Onn76aa644gr+/Oc/c9FFF6FpGsccc0z83NmzZ/PII4/Et0OhEL/+9a+5//77Oeuss7qVxcLCwiAUgc3JDlzzsymkYSyRAh63zsTSKAdNCjOxtJmJpi1/bHEUVwrq51CDULHaQXlLpb/KQdW3dnRzhJAjU6dkepSZZwb4/PFdNyJwt1MGPSLcyey9zvb3A13XefLJJ+MVfGIPAMDtdvPnP/+50/Mfe+wxjj766Pj2HXfcwRlnnMFee+2VclktLHYHavxam9Z9S8X/XZUNXbWadkbkG5X8onnNzJrgZpinjomlUUpzWx24/UEp8O/UKF9ltPLLywwFUP9da7WbVRyjZGaE8UcEKZkRoXRGhPxxMcR0CVjKoB/0pOVecuCB2Ldta7c/NnIk1c89l1J5rr/+erKysrjwwgt7fe6DDz7It99+y7PPPgvAihUr+PLLL/n1r3+dUhktLIYa0Rh8X21rNy5//U47tU2tRnqXXTG+JMrM0RFOPSAQb+WPL4mSleDALSpyUlXV98agHoOajfYEE4+d8lUOAjWtsuSPizJsnwizzm02Kv6ZETwlXZuXsopjNFW2dzpkFcf6LGtn7HbKoCc0XnddG58BgJ6RQeN116X0OjfffDObNm3i8ccfR9N65/1/7LHHeP7551myZAkZGRkAfPzxx2zYsIGDDjoIgB07dnDeeedx1113WcNHLXZL/EGJ2/FbKvwNO+1sqrQTTnDgFmUbtvzj9wvGbfkTS6OMLIhhS/HAm0hAqPzGbrT2zcq/8hs7UXOugM2pKJoSYdKxZmt/ZpTiaRFcnt4PB738y/L493THT9ojlUGLkzhdo4kAbr/9dr766isWL16My+Xq1blPPfUUTz31FD6fj/z8/Pj+yy+/nMsvvzy+PXfuXJ544glrNJHFkEYp2F6rsaHcYZh3Klor/511ra1im6YYUxRj4rAIR84Mxp23E0qj5GelJxRNc41GxSp73KFbvspBzQY7ypwv4MrVKZ0eYd/zmymdGaFkRoTCSVFsQ3AO7B6pDMBQCKms/BNZu3Yt9957L+PHj+fkk08GYK+99uLRRx/t9ly/3891113HqFGjOPvsswFjRvFLL72UFlktLHYVwQhsShimGR+5U2GnOdTafM9260wcFuXQqeaIHbPCH1McxZmmGkspqPvO1mrfNz8bd7Qqo+wRUUpnRphyYpDSmYZ9P2dULCX+hcHAHqsM0smUKVPY1oFPoid4PB62bt3ao7TLly/v0zUsLNKFUqYDN9G0s9POpioHmyuGoxIcuKMKDHPOgROb25h2inNS48DtjFgEqs1hnOWrHFSsdlC52kGwvhQA0RSFk6KMnheidEYk7tjNKNi9V921lIGFhUWvicZgS5UtHnJhfUIrv66ptZXvduhMKI0xZ4LOabObmTgsYpp2YmQ401+5hhqFym9abPuGg7dqnYNY2BzGmaFTPC3Kfot0ciY0UjozQtGUCI6MtIvWI44pymJ2OMpV/jDpXnrHUgYWFhad0hBo68BtGbmzudJOJNbafC/JiTGhNMpJCSEXJpZGGZEfQ9NanJ+NaZNTKfCXa1SUtTXz1G5ureIyC41hnAf8h5+SmRFKZ0TJHxdFs7XI15w2+fpKmdPGtw6NJVlOfqxHuUQTSvX0KFFLGVhY7OHoOmyvtbW15ZsRNcvrW23mdk0xttio6I/dJxgPrDahNEpu5q4zoegxqN1ka+PUrShz0FzVKmve2Cgl0yPM9DYbpp6ZETyl6TU/pYoYUKcJtZohbNgU+jFN54lSD4uawlzlD6dcKVjKwMJiDyEQxoiz02ZCloMN5TaCkVbTTm6G4cBdEA+5YJh29iqK4ehjnJ2+EglA1VpHmzANld/YiTQb8moORdHkKBOODMXH7pdMi+DKGXj7vgICArWaUCtCjVnBJ/8ZFb8W367XOtZYxgRl4cksB89lbubbncNTKq+lDCwsdiOUgqpGrd24/PXldrbW2OIOXBHF6IIYE4dFOXhyKG7WmTgsSqFnYFrQgVpp09IvL3NQvd6OMs1Rzmyd0hkR9jmnOe7YLZocxZb6FSDbEQPqNaFGOq7Q21fuxl+oiwfp0RX5CX9jorrxXSnydZ08XXFFfmY8vaai6MTQeJ1m7Umg+9GJvcFSBhYWQ5BIzFwoZaedHX6NrzbmxSv++kBrKz/DacTZOWB8mEXzzMBqJVHGlUTJ2AWVaEcoBQ3bEodxGhO4Gra1VkeeYTFKZ0aYtLB1GGfu6NYwDf0hILSr1CNajO89zk4qd416AdVJxW5TbSv1vaI6s+KVesd/ebqis8cfIUqd5qdO8wPTERVFEUPxGshidGr7/xA6wFIGFhaDmLpmaRtJ02zlb6m0E01cKCVXmDAsyqlzEmPmRxiep9PLye8pJRaBmvV2Nm/R2PBxjuHgXe0gWGcIJZqiYEKUkXPC7H9hEyUzopTOiJBZ2H0U0L601us0IdhhpR6DHDdZevuKPV/FEvbp7Sr2bAUd5xijXpqp1Rqp1Rqp0xr53tFIneY3t81Pabvt1wIJuTyEkjLgKVSalEAL6VoD+S/AiUCFz+eb2UW6OcDHwCKfz5faoEBdULTxGJyhsnb7w64ZVI1/vd/5p3M9g7lz5+JyueKzmn/zm9+wYMGCfstsMXDo5kIpiYHVWr5XNrQa6R02I87OlBFRTtgvGI+Zf+D0XMLN6QtT0FPCTUY0zpa4PBVlDirXOoiFjKrS7s6ieFqEqScG4vb9oqlRnJmqTWv9M02o1ezdVO59a63nddJan5CXj15VTUexAnR0GiUQr9BrtUY22f183kFFnvhZL00o6dh3oSmNXD2LPOUhX8+mWM9jcnQ0ebqx3fIH63g5YwX/cvtR2AlL+harSlfP4HHgPuDJzhJ4vV4b8L/Aa2mSoVPCGbNxhL5FaA1MpXASzjggJfmnez2Dhx9+2ApBMQRpDgkbzSUQEydlbaqwE0xY4DwvS2fSsAhHzTTi7LSM2ulsoZScTNjVoyL9FVpb+/4qB7WbbWD6JJwFOp6ZEQr/oxk1K4bMdVI+uokyp/B+vFJ3Uqu5umitG6Sqta5QNEmwTaVeoflZa2/kPU1na055h632Os2PLp33VHL0TPL17HhFPiY6jHzdQ77KJk/PJl/3mJ+taXJUJho967KdEvwBFVotDxX/kye0V9DR06IU0qIMfD7fu16vd2w3ya4A/g7MSeW1c3begCPU9XoGqDAQSdoZxRFaReGWMzs9LeKaTsOwgV3PwGJwoxRUNGjtl0Mst7Otpv1CKRNKoxw2ta0Dt78LpaSKgEC1ErZ9Z2f7KgfVZQ4av3YQXmWH8taKLDg2Ru2+OjsvCLF1X53K/RX+kYm1sQZEsSkneQkV9uiozj5d2NVbbOsdtdYDEqJWGttU3FuTTTDSdrtO8xPpohLNynS3VtgqmxGRQnO7bUUeb70rD7m6BzvpH2JVoufzf7GruaTyJO72PMsK55qUX2NAfAZer3ckcBpwBN0oA6/XezFwMYDP56OoqP08vEAg0G5fl4iTmK0EW6wCQaEQYrZikNR71FK9ngEQD1Y3Z84crrvuOnJzc9udp2kamqbh8Xg6fGaDBbvdPijle3uVcMrvjBa72zGcF34VZcHM1i5/OAobdsK67cLabeaf+b0h0Nom9bgVU0Yq5s9QTBkZZcoIxZSRMGGYMhdK0QCn+dc3unuGMRR1QDVQI6rTz9oQBFdpaCsF9xca+SttFK3UcPqN+4nZFTXTdSoXxmiYFSG6XwzZR5GbB4VKmA4cChQojcIYFCihEOOz1G4nKxpFWjSEADYIEaaGBmqkwfikgSppYC0N1LbsSzjW8j0onYebzlAuCskhX+VQSA4jKaFA5VCg5xiftH4WqlzyyabEXoCWEAW1Rb5dUM/3GLvdzvSCSTyMGcI+xa/NQDmQ7wau9fl8Ma/X22VCn8/3MPCwuak6CuGauOh7T1ruAFqknNIN80CFQFxUjV+Gbi/pofg9J5XrGQAsXbqUkSNHEgqFuPHGG7n++uu59957252r6zq6ruP3+9Ma9ra/pDssb1/4YK2THz9YEDfdBCPCibfaOW5WkEBE4gulxBIcuMPzjGiaZxwYiUfTnDgsyrBOFkpprIe+zMcNQDv7eSQnm++DTZ3a1+tF2tnWXbVQtNJG6ecaI77UKPrCxog1WrxCVB6Fbe8omeeGyJ1pTOAaOTlCkSOptR4EdhojYOo1P7Wa3zDBSCMbND8rzFZ5wBZhZ6yqXau9WQt2eq9OZTda4jGjJT5KL2JvfRx5qmPTS57uIU/3kNFhX6IrFFqRDLpymEx/3pURI0Z0m2aglMEBwDOmIigCjvd6vVGfz/ePXSWA7iilKXcRWXWLacpdlBZFkOr1DID4uscul4sf/1M1gKMAACAASURBVPjHfVIyFl2z6IFCVKRt5RmJCS9+7mbaiCgzRkU4ZXYgXumPL4nicfdukpMO1AttJht1+ydCsMMJScZImMxEE4tSjIro5McUed8JnpU2HF/Z0L+yE1xlI5iw2panNGY4dA+PUDQzRMbMBvRxNdTZW80s32l+ViZU5MnO0katc6eFTWkUkEOu3XCYjogVMl0f22GF3mJ+ydezyVCu1p6ERdoZEGXg8/nGtXz3er2PAy/tSkXQgr/oKhyhdfiLr0p53ulYz6C5uZloNEpOTg5KKV544QVmzJiRatH3aNbvtLdTBK0I/7q+st3eALC9pxV6F631FjSlWm3rSjEypjMz0rldfUJePqqqGkcUqje0rLZlZ3uZjcoyJ+Fau9ELEYVtYj36AeWEf7qV+lmbqNr3W6qGb4+36LsaASNKyFVZ8cq7SM9lYnSU4SxtqdSVp11rPVtlUlxUPOhb3oOWmCLr4SCOBzaQdZmTpovcYEu9kkzX0NK/AQuAIq/XuxW4EXAA+Hy+h9Jxzb6gO0qpHvv3lOebrvUMKisrueiii9B1nVgsxqRJk7jttttSLv+eyIZyG3e/ks0/VmRgBBJo/7JpDsXP8jN62Fo36LC13mGlrscdpjmK+DiTlhEwLaNfas2hjBWBIGu/cdG0Kge9rBj1RRHOshHYgobvIeoKUb33BirPXEflvt9Sud9aqvfeQMRj+Nc6GwFjmGDaO0vzdA+5KqvHI2AsUoNtY4z8S5qwb4whAci+I0jG0gi1D2URG59ah4YoNfAxPHqB2r59e7udjY2NZGdnD4A4g5dNmzaxbt067HY7Rx555ECL0ykD7TPYVGHj7lezWfpJBi6H4sL5TTww2w5/dEMooZJ3KfhNExOmxbqcWZo8EsadcK0AoU7HpHc0+ajlu70ym+IvJ1H8xRSKvpxE8ReTyV+3F6KMijmc34h/1hYis7Yj+1TimllH7qQgeVpWa4t9AEbAJDLQv3N3DFb5SmfWodUpEjtrSgM9Xyj/Kq/H+Zg+gy67E9YMZIs9ki1VNu55NZvnlmfgsMFFRzRx1rFNPDfCDh6B64Nws9uIDuZQ8Jt/wz538O72RwkT6bAy/87mZ6UjsTL3x1v0dZq/yxEwbt1JXiybERsmMOyLOUz6cgKelWNwfDUC2dHa0HGNDpA/I8Cwk6oYPR1GzVSM2yef6uoCoKA1w14OsLMYJCiFfY2Oe1kY92sRbLXtG+uiQ3RK6pW5pQws9ii2Vtu4Z5kH30eZ2DS4cEETpy1swjfSwQmZmfHZJzIzhPI1A8tAngIzFMCkYed2OQLGoexxG3qens2YaCmz9InmBKQEk0swB9s3JUS/LqTp62yqy9xUlDkI+80wDTYjGmfJwRFKZtQb0TinR8jI76BysHysQ5uYwrkiintZBPdrEexbdJRAZLaN5lMcuF+PkBihQs+C5rNTPwzeUgYWewTbajTufS2bZz7MRAR+dFgTJxzXzDOj7ZySkYkAxwXqGR17iwc9+6Czqo0SaOG85qPbm17is009ZCp3uxEwoQYjTEP5qtZQzKu/taObjmpHpk7J9CgzzggYQdlmRiiaHMHuxmJ3JaBwvWdU/u43ItiqFcoJoR/Y8V/mJni0A71EQxoU7rfqIZDQCLAJwaMtZWBh0St21Gnc91o2T3+QiVJw7iHNHH1CE8/s5eBMdwZOFHtHVtEoT/Bi5pcATI6MIkN38Y3TDzgIS+ts9ZsaOh/KqxQ07kgI02AqgPqEYZxZxcZqW+OPCMbX1s0bG0MbRJObLNKD1Oq43zQUgOsto7Wv5wjBIx0Ej3UQOtyB8rRtSKgcYec3hm8g3X4NSxlY7JaU12vc/5qHp97PIqbDooObWXBiM38ba+d8dyYOFcKjXqBRW8yXjgYODE/jvPoLWRg8kBf33oemShvzk/JsKq2Gz0OAsdpWzUZ7fMEVI0aPnUBNa62ePy7KsH0izDq3OR6YzVMyOEJNWOwabNt03K+FcS+L4Pw4isQgNkwIeF0EFzoIHWQH5+Cw81nKwGK3orJB4/7XPSx+L4tIDM46qJk5Jzfy5wkR/urIR1QDsBh4mQPDEzkueB5HBw+gSG8dmdFU2XEzPau8kNeubYqvthUNGvZ9m1NRNCXCpGPN1v7MKMXTIrg8Q2qknkUqSHIAO7+OARCZrOH/uZvgQgeRfWzQxXDkgWKPVgbNWjnv5P+c+bUPkqmnbgZyOkNYB4NBbrrpJt577z3cbjezZ8/m97//fcpkH6pUN2o88IaHx9/JJBwVTj6oEfeZZSwbl8cz2migCpf+CEcHKzgxuD+Hhx7AozK6zTeZb/6ZQen0CPue32w4dWdEKJwUxeZI/T1ZDBG6cADXX59B8BgHsQmD3w64RyuDlZ67KXcuZ6XnbuY1pG7yVjpDWN966624XC7ef/99RITKyvYzYvckavzCn/7l4S/vZBEMC/vM20Djok/5x14zQfZGU+XMDb3Gpf4M5odOw0n/au0ry3Zao3cs2jqAX49gq+nYATyU2O2UwfKcG6hxdBPCGogRpsr5OYhibdZiqh2rsHUTObIgMp25DQMXwrqpqYnnnnuOFStWIGaNVFxc3K08uyO1TcLDb3r481uZBMIa+Yd8TPM5K/hy5NHASeTrNVzQuJFf+AtwMi9l17UUwZ5LXxzAQ4ndThn0lCbb1oQtRZNtKzmx8Sm/TipDWG/evJn8/HzuuusuPvzwQ7Kysrjmmms48MADUy73YKWuGW59O8Jzbw4jHHTAIW/Boo34Rx0LsjeTIgH+X2MTxwXtaH2I8fvl4szuE1nsMQwlB3B/2e2UQU9a7s1aOX8vnUd8jrcowrZ65lc9kFLfAaQ2hHUsFmPLli3MnDmT3/72t3z++edccMEFfPDBB7t1OI4oMd6KruOed2x8+eoBqGYPzHuHYWc10Tj2QJq0uewXivILfxMLQrE+x7n8+D4P7/wuB5tTEQu3zyWrONa/G7EY/AxhB3B/2e2UQU9Y6bkbRduRHgo95b6DVIewHjVqFHa7nVNPPRUwzE8FBQVs3LiRWbNmpUzuwUCAEO+6V/Ki+oJlb44h+OIZ4M+l6IBVTDxTKJu8Hzs1jQVBQwnMDfe9olYK3vldNsvvz2baKc2ccHcdNtNiOFhj1likkJhC3g+Qs6R50DqAtfJK7IsuRbv3FvSS9CwGtUcqgwrnZ+hJcWJ0CVPhXJGya6QjhHVBQQEHH3ww7777LvPnz2fDhg1UVVXFRxoNderEz7/cK1jmXs5b+lqCy05Cnv8lyp/D9H13MumMGG/MGMPHmnBcIMIV/mZmRfo3bl+PwRu/zuXLp7LY9/wmjr6t3poAtifQzgFch30QO4A9dz+CfPgpnj/+mYbf/Sot17CilqaBtWvXcsQRRzB+/HjcbiOmQG9CWE+dOpVRo0bF76klhDXAli1b+OUvf0ltbS12u51rr72WI444ol0+QyVqaahI8bfmZSzLWM5HzjKiIQfZL/+Q8D/OJtSYxUEzAxR4w7y5t0YEODUQ4XJ/mCnR/k/eioXhpSvzWfNiBgdd3shh1zW2cxAPhZ6BJWPPkFod97+Myj/ZAew4q4Cq2cFB5wAePn4eEmof4FC5nOzY+FGP87Gilg4QU6ZMYdu2bX061+PxsHXr1k6Pjxkzhueee66vog0K1tu38qp7Oa+5P+EL57fghHH+ccx94RbK/vkD6hqdHDgjRNaiet7dW0NDw9sc4ef+EGNjqWm8RALCPy7OZ+O/3Sz4TQNzf+5PSb4Wgwvb1pjR+l8Wwbm8cwdwUVE2qio00OK2o/yjF8m5+W7cr72NFgiiZ7gJLjychhtSvyCXpQws0o6OzkrHBpa5l/OqezkbHIai3Dc8kRuaL6X2hQUseXUUHzTY2G9aCPfZDXy8j+BSGhc2hfmZP8wIPXU92FCD8NwFBWz9xMmx/1vHvud3vmSjxRBDKezfJCiAVUPbAayXFqOys5BgCOV2IcEQenZWWvwGljKwSAsRonzkLGNZhtED2GmrwaY05oVn8pO641nQeCBvvz+a+1/PZXutsPeUMKP+s5nP9xOydeFyf5iLmsIUplAJADRXa/jOLaByjYOT769l2imdh6O2GCLEFM5PE2YAfzf4HMD9QauqoelHZ+K6/CeE7vsLtor0mNssZWCRMpolyNuuL3nVvZw33Suo15pw604OD+3HwuBcjgzOJitihJE+c1k2O+pszJimk3dFE1/vDwUxxTUNIS5oCpObBldWwzaNJecU0rDNxumP1TDhiMFnFrDoIaYDOGNZBNcbCTOAD3Xgv8J0ABcPHgdwf6h95A7A8Ls03HZd2q6TrjWQ/wKcCFT4fL6ZHRw/D7jW3PQDl/p8vpXpkMUivdRII2+4P2WZeznvulcSlDB5uodjggdyXHAuh4VmkaFcRGLg+yiTe5Z52FZjZ+LEMBOvCFC2PwzTdW6qD3Nec5jMNI1nqNlgY8k5hQQbNLxP1zB6buerjlkMTqQmYQbw27vfDOCBJl09g8eB+4AnOzm+CZjv8/lqvV7vccDDwNw0yWKRYrbZKlnm/oRX3cv5xLmamOiMiBZxbtNRLAzOZW54enyN3UgMnlmewT2vZvNdtZ2x4yOM/nkT62cr9orp3B9zcFxFA70bfNs7ylfZ8Z1XCArOebaaYXtHuj/JYlDQqQN4kctQALvRDOCBJi3KwOfzvev1esd2cfzDhM2PgVHpkMMiNSgU6+zf86p7Ocvcy/nauRGAyZHRXOY/neOCc9k7Mr7NCl/RGCz91FACmyvtjBoXoeRnTWyeo5gcjfF/dWFOCUQYVlREOgccbv3UyXM/KsDp0Vn0t2oKJ1qziAc1nTmAp2jG+P8WB7AVJCrlDAafwU+BVzs76PV6LwYuBvD5fBQVtfeiBwLW6t/JaJqGpml4PJ4On1l36Oh8Iqt5QXuPF7X3WC/GcNe5+gxujV7CyfqhTGY0uCCxWR/TYcn7Grf+3cb6HcLIcTqFFwXYOifGfkq4N2rnZOVAy8qALLDb7X2SryeseU3wnWMnbxRc9IpO/l753Z+URDrlSxVDXsaYQj4Mor3oR3vRj2yOogTUPDfR3+Wjn5QFk5zJRW3XyTdISLeMA6oMvF7v4RjK4AedpfH5fA9jmJEAVEcTV6LRaL/kKNdquNvzLJ851/J61V39ygvSt57B999/z09+8pN42oaGBvx+P2VlZe1k0HUdXdfx+/09nuwTJsKHrlW86l7O6+5PqLDVYVc2DgntzU+DJ3BMcA7D9IJ4+qqENn1Mh39+lsEfX/GwvtxOyV5Rsq8Ns+0gnQPDUe6pCcXjBtUkXDNdk5G+edHNS7/Ip2hyFO/T1cQydfpymcEwWao7hqSMXTiAg5dlJjiAY0ADae0+diTfIKQ/MpqTzrpkwJSB1+vdB3gEOM7n81UPhAwtSmBJ1lsodMLSP6XSQrrWMxg9ejRvvPFGPN0NN9xALNY/s0eTBPi36wtecy/nTfdnNGjNZOpuDg/tx3HBuRwRnE2uyur0fF2Hl75w88dXslm3w0HhqCgZ1wSoOCjGgnCUX1SH+hU3qC98+ddMXrs2l5EHhDnziRrc6RiaZNFrOnUAH2U6gBdYDuCBZECUgdfr3QtYCvzQ5/OtS2XeN+Q8ymrH5i7ThImw1VZJha0OUKiE8ndm4W87PW96ZCw3N/y0WxnStZ5Bm3sIh3n++ed5+umnu5UnmWqtntfdn/Kqeznvu74iJBEKYjkcH5jHwuBcfhDam4xuOuS6DstWurnz5WzWbHeQPzKK85cBqg+JcVwowhXVoX7HDeoLyx/M4u3/yWXc4UFO+3MtjgxLEQwkLQ5g+7+3Muy9QBsHcOBYB2HLATxoSNfQ0r8BC4Air9e7FbgRjCWmfD7fQ8ANQCHwgNfrBYj6fL4D0iFLR6xzbKVRmruJ1JEaUrmeQSKvv/46w4YNY++99+7y+o1aM7/K+RMfuco4t/loXnMv5xPnGnTRGRUt5odNx3JccC4HhKfGRwB1hVLw+ldu7ng5m9VbHeQMj2G7OkD9wVFODUe4vCo1cYN6i1Lw7u3ZfHxfNlNODHDSvbXxyKMWu5BOHMD6dKflAB7kpGs00TndHP8P4D/Sce2etNwrtFrTPPRv9CTz0HPVt6RUnlSuZ5DIkiVLOPvsszs9N4bOUu1tvsrezHdZIRD479zHmBYZw5X+M1kYmMuM6Ng2I4C6Qin41yoXd76UzdffO/EMiyFXBggcGuXsUITLqkOMSVHcoN6idHjjN7l88WQWs85r4pjfWZFHdymdzAAOH2Cj/rfGDOD8A0tpHOQ2+T2dwTCaaJdToudzW8PFXOU/q1OlkApSvZ5BCzt37uSjjz7innvu6fT8HbYqVstmYujxHtAH5Q8wNjasV3IoBW+tNpTAl1ucZJbE4IoAkcOi/DQU5pKqMMNTHDKiN8Qi8Mp/5rH6+Uzm/ryR+b9uH3m0rzRr5TxjX8TB2r0pX/RoyLMHzQDeU9gjlUELyUphhXNNyvJOx3oGLfh8Po488kgKCgo6ONugo+q5N4pAKXj3Gxd/eCmbLzY7cRfH4PIg2mERrgiFuag69XGDekskAC9cUsCGf7mZ/6sGDro8tZFHV3ruZqt8mPJFj4YqlgN492aPVgYttCiFVLF27Vruvfdexo8fz8knnwz0bj2D6667jlGjRsXNQInrGYChDG65pWtzlgBakkr4NOdmjClkKuGzBRX/93XZNJb8/VTWrBuBoygEPw9iX1DLObzJsZG3ydQCrMk28gBQotrkm5hfh9+lJZ3CaXMRyg+25tXuHAXS9nxQRBsy2XDhLfjfH8aoe/5Iw8Uv8Ho879Z0rfdKh/uNh9X+mehEqHGUgSjWZS0mojWSHRtDRqyYDL2UjFgxmXop7lgRdtxd/hZDma5mAAeOdRCeZweHpQB2ByxlkAbSuZ4BwPvvv99tPjmEGUETm3FjUzox0Vib+SSGmhDTV9DyZ+zZUHYIr/3t12xafQi2whBcEsR9xFaOtz/MXLUEFwFqbEJN/Hxaz1eSlDdJ16HtOco4ZtPsxOyxNvK09WOY26p1f7Qqn+9Oup3gV5MZ9cRvyF30GuF4/q3p28vQelwS9yszXcI91NtbB7kpdLa4XyYm4dZ1sxNw6nmmcighI1ZCRptPQ2lkxIpxqrwe+2gGDGsG8B6LpQx2UzQUk6ljIlFW46A+si/nVy7tMO3y9U7+8FI2H61zYc/X4aIQIw8PcVk4xFlVmbi4Ckj9YhrQ+4k0Dds1fOcWEv3ezpmP1jDhyCug6oqUytSslfP30nmtFb8Y/51V/gmCRsBWSUCroNlWQUCrIJDwWen8nGatnJjWPjS2ppxmz6JFWRSTGSslQy8mw/zMjJXg1ouxsQuHQkUVzhVdO4Bj4y2P/O6OpQx2c9zEOIWdTAq1n+T96QYHf3g5hw/WuLDl6fDTEGMPD/KLSJhT6iKDrnDUbrLxzNmFBOs0znqqmr3mpSfy6ErP3UkmNKN38JXnPuY13Eam3rXvRaGIiJ+AVk7AVklzksIIaJX47d9Rqa0gaOt4vqUrlm8oDb043svIbNfrKMGpcvp0jxJQuN41Wv+uf5kOYBeEfmA5gPdUBtv7bpEGdAlT4VwR3/58k4Pfv5zNe6vdaLk6XBhi2hFBroqEOLYhymCsAipW2/GdW4geNSOP7pO+yKMVzs/Qpa2iSX6GXSEITpWNM5ZNbmxil2l1IgS0qiRl0dLrqCRgK6fC+SnNtgp0ab/+gk25yaIUV1FhGyWRaZqoWvwbGXoxthqbsQZwiwM4aDmALVqxlMFuiic2moNj12Ovt3Pk9iMBWLnFwe0vZ/PuKjeSo8OPQ+x/RID/jIaY3xgbtNbsbSscPPujQhwZinOfraZoUmqHACdzStXr8e/pjlmj4SBLH06WPrzLdApFWBoSlEZlvOehZ9RTq76nwb6Jcm05IVtt/LzczSOY/MIRTP7HEYx4b3+0mJ2mEbVs+PG3VB6/k+aDw7i1kriJKkMvwaGyB79vwyLlWMpgN2VrjY3/edaGbs9lhyuDV1dl8PZXbsSj4IchDjkywNXREHObBndI503vunj+J/l4SnUWPVNN7ujBLW+6EASXysUVzSWPSW2OFTmLqKo2FZZSaKuDOF5vIHOZImOVMay5cVota69ezsaTP2HbnM8NpWKrbNcDArDpbsMsleQIb+1llJi+jSI0qwrZbbB+yd0UZZq8IzG49ul8cCs4L8RRRzTzn3qIWc27PmREb1n7spsXL8uncKIRedRTsmtkLtp4DM5QayTYlniPYdcMqsa/3vFJA0lU4fwo0uoA/l5HidN0ADtNB3A+uYxnP05kP9NNYfQ26ky/RnncMZ5osqqzf8sO24eEtbr211WCWy9McoCXdujngMEdHtpiD1UGxxRlMTsc5Sp/mNI0TJxKVwhrgDfeeIM//OEPKKVQSnH11Vdz/PHHdyuTFoM359cyOTT4lQDAV89ksOz/5TF8vwhnPVmNOy+NE9yUjqgQ6AFEhYi6JuEIrUVoNUcpnIQzdln4rG5JdAA73qynqFpvdQD/omcOYKO3kY8rmk8ek7tMGyVI0FbV6SiqgFZJnWMtAa0S1cFMfofKwl1S3GY0VUdDcd16IVoPYmRZpJ49UhmUOW1869BYkuVkUVM45UohXSGslVJceeWVLF26lKlTp7J69WpOPfVUFi5c2G24Cz0iTB6AAHK9RsX49E8Z/PuWfMYdWs8ZD67C5W5GAiFEDyIqaH6GEBWENvuSvuuh+D5azmmXPmQogm6JEc7YH/QQaOlcpLNzpEbv0AGsn+ChfoGeVgewHTee2Cg8sVHQhe9eoRPS6to5w8lqpCb8HQFbOXX2tWx3vUdEa2h/j0rDrRe16V0kztXISPh0qMy03Oueym6nDG7IcbHa0X3LImxOmlmc5WRxlpOSmGJUTO9ydPf0SIybG7qvONIZwlpEaGxsBIzFbUpKSnoU98hl78MwTBVtU8m2r3j7XkG3pLFvjFAabTb2xYK8+vhv+fdff8u+hz3LD687H/vOnsutsKM0N0rc5qcL4t/d6Fo2ytFyPAMlLpTmNtIknKM0Nxn1f8fV9CFCFIUGaBTsuBK9/HqCnqMJZh9PyLMApWV0K1d/sH2fMAHsk4QZwGe3zgAuGl5McJAEgRM03HoBbr2A/OjU+P4idxFVdW1ljBJoZ5pKHEUV0CqpdXxj9jba+4rselYHvYv2Jiq3XogMyjFyg4vdThn0FmUqhXIbNGsaM1Mcgz+VIaxFhIceeogLL7yQzMxMmpqaeOKJJ7qVIdPexLNn30XujjUdtIyN7yRW7C2VNn0ftaPEaVSwZuXaUtkiLuO7Iw8lbrSMPIJhRUxl8PId5/PxkvnMPm0FJ924Dr/91jYVtEqo2JW4QMtIqvhTV5xDmYdSumEeqCiIk/Lx7+AIr8Xd8AoZ/mVkNixFlwxCniMIZJ9AyHMkyubp/4WVwr7aUAAZyyI4ynbfGcB2MsiO7UV2bK8e9DZqE/wa5e38HDWOMgKut4ho7eNTibLh1otM5dDq34h/6iXYmUJUHNhVepX7YGa3UwY9abmPHNE6UcepjHZfi7moJMU+hFSGsI5Go9x333089thjzJkzh08//ZRLL72Ut99+m6ystquRiTl71qlF+OdJJ7Og8DOU392ugtY1D9gK21a0LRVwvAXtBs3cbpPO1WabNhVzz+y+RUVF1Oyo4tWr8yhbmsmcn/k5/LcjCEjvn1cq0R2lNOUuIqvuKZpyF6E7RxFyjiLkOZJ6dTvO5o/IaHgFd+MyMhpfRomLYNZ8gtnHE8w+GmXL6/nFokkhoL+3ZgAnYvQ2CnHrhXTXPolIc9uht1plG99Gs1ZBtaOMoFaJkqSG33Bw6NntZoQn+jRatl16/m7X29jtlEFPSbcSgNSHsC4rK6O8vJw5c+YAMGfOHDIzM/n222/Zd99925w/Jvs7bpz73+RlKaYfvpSd9sEZgjkShH9cnM/61zM49JoG5v3CP2gavf6iq8jUN+IvTgrFIQ7CWYcRzjqM+mG34gyswN3wMhmNr5Dhfx21w04o6wemYliIbi9sl3ebGcBvRLDVql47gC3a41CZOGJjyYmN7TKdToyQVhPvXdhym6ls3pikNL5iq6uCqNbc7nxRdjL0om4m+hmfqQhkuCvCqe+RymBGOMYB5miidCgBSE8I6+HDh7Njxw7Wr1/PxIkT+fbbb6msrGTMmDHt8lBaFiCE3PuhD1JFEPILj55nZ8PbTo6+tY79L2j/0g0kuqOU6Kw30buyx4uNcOZcwplzaSi9CUfwS7PH8Ap5O69B7byOcOZBhikpuhDn2/nWDOBBgIbNNBEVQxSK9CKq/B3/zhFp6sCv0TqKqsm2nSrHSoJaVSeBDHPjs8A7G4KbESvBpfI7ney3K8Kpi1Kprwy9Xu9fgBOBCp/PN7OD4wLcAxwPNAMX+Hy+z3uQtdq+fXu7nY2NjWRnZ/dP6BSydu1ajjjiCMaPH4/bbbQKehPCeurUqYwaNSp+T4khrJcuXcr999+PmM3n//qv/2LhwoXt8tm0cT3r378R+/CzOPzoU1N1aykjUCM8+8NCyr92cPwf65hxRmCgReqQPs9AVgp7qIzMNe+QsawZ27v7oL6ejeg29NJmgsc6aD4uPyUhoNM9SzoVDHYZUyGfTpSgVt1qltIqabaVt3GIG72OzgIZOlqH3iYoCpvu5sucO9Algk25OaP8o173DkaMGAHdLPSbrp7B48B9wJOdHD8OmGT+zQUeND93C9IZwvr000/n9NNP7z4jsaGX/ghdG3ydv8YdGkvOLaRui50fLYlSOm9wKoI+0cYBvBeOsnMBiE6OEvvpKrS5T+MY8zKZAnb3LIL1xxPIOZ6Yc/wAC27RXzTsZOqlZOqlXaZrDWTY+Sgqv20rlY7P2wUyVOhp6x2kaw3kd71e79gukpwCPOnz+RTwsdfrzfN6vcN90T5V4gAAIABJREFUPt+OdMhjMXio3WxjyTmFBKo1zlpczYyTchjEDcae0ZkDeI7dcAAf6yA2zgYsABZgC2+Km5JyKn9HTuXviLimEcg+gWDO8USdk3eL0UIWHdM2kOGELtP6tW0sLT00HqRQlzDrs5Ywy39Vyn0HA9VsHAl8n7C91dxnKYPdmMo1dpacU0gsLJztq2b4vumLPJpuOnUAH+rAf6Wb4FGdO4BjznH4iy7DX3QZtsg23I2v4G54heyqO8mpuoOIc4LpfD6BiHumpRj2YL723EfyIrbp6h0MlDLoqHR36Lzwer0XAxeDsdxjUVH7GCeBwG5kZkgRmqahaRoej6fDZ7ar+e4T4W9n2nFkwCWvRRk2PRcAu90+KORrR0yh/V8dtt9vpOTafPQr8qBOR3ulCe1FP/JGMxJQqDwN/XgPkZOzUEdnoXk0MoGez40tguGzgF8RCe9Aq3oBW9XzeKrvJ7v6XpR7LHrhaehFp6Gy54C0VzCD9hkmMNhlHKzy1dhXdhhOvSbzS4qcqZV3oJTBVmB0wvYooL1nGPD5fA8DD5ubqiMnTzSa3pDGQxFd19F1Hb/fP+COu83vOVn6kwKyimMseqYae0ksbhoajI5F28YY+Zc0oW2MIQGwXV+F9t9VSMAYLBIbLjSf7SR4rIPwQS0O4BAEQ9DeL9gLHOA8E0aciVZSg9v/Gu6Gl3Ftuw/btj8Ssw8jkH08wezjCWceGJ/LMRifYTKDXcbBKt8JvBL/nixjFT2X13Qgd8lAKYMXgcu9Xu8zGI7jestfsHuy7lU3L/48n/zxURY9XY2ndPDHRyo6tRGtVtEyJ0kiQARUBlQtzSayd/pnAOv2AprzzqE57xwkVo/b/wbuhlfIqvsrntq/ELMVEcxeSDD7BCg4Ka2yWOwZpEUZeL3ev2F4y4q8Xu9W4EbAAeDz+R4CXsEYVroeY2jpwE43tUgLX/syePWXeQybFeGsxdVk5Kcx8mgKiU624fqobW9TgPB+diL77Pr2k7LlEsg9k0DumUjMj6vpTTIaXiGjfilZdU+hdhSQl3UUgezjCWUdNmCB9CyGNukaTXRON8cVcFlXaSyGNiseyeLNG3MZc2iI0x+twZk1NBQBQPM5ThxfRdGaWvfpWdB89i5cpL4TlM1DMOcUgjmngB7A3fQOueE3cVf9k8x6H7qWTdBzlBlI7/D/396dx0dV3X0c/8ySzExWEsIWdmSTXcQFhSiLFdGqj4Xj0la7qC2te4WCtdZia1Ottj5PFVQqanHhiKK2oLZoURAXtAoICiogJIJAAiSBZNb7/HEnw0zIMiRzM5Pwe79eviAzNzO/RJLfveee8z2WB+mJ9uOY17uHF4y1eW9vTqffjd3o/rNC+t3Yjbc3J+4Hvby8nO9///uMHz+eyZMnc/XVV1NWFjtf+O6776Z3797cf//9MY+HQiGuueaayOdedtllbN++PfL8ihUrOPfcc5k0aRLf+c532LFjR8LqTgTDgNX3ZfP6b3IZOLWaaU+UtalGAFBzTjo46vwzd9jMx1OJ3UNN9hSCgxaye8A6yno+SXX2+bgOrSS/9Bq6bBlOXsm1uA++hC14dICbENGaE35yf9OHpLa3N6dz1bx8vH7zB97rt3HVvPyENYTa/QxWrVrFihUr6N27N3fffWQaWPR+BqtXr+bBBx+M+fzp06fz5ptvRn7xz5o1C4ADBw5w00038dBDD/H6669zxRVXMGfOnHprcOzaheP3vyf7nnvwvPBCQr6uphgheP03Obx9fzbD1GEumrcfZxscsTBybOz+tANfl+bh8w7g69I8dn/aASMnhc+D7C4zRK/wPr4Z8DH7ej1Lde500g+/T/7XP6Pr5yPI3/kDPAc0tmA9u5aJ494xDxNprW+2opBEueO5HDaVpDV6zLtfpGMYsT/Y1T47l/5vR07v33B+/pAefuZOP3pDjrqs2s9g+/btdOrUiRNOMBeqTJw4kRtuuIHy8nLy8/NjiwgEzP1w9+8nN9xMquNZudxMoQC8cmsHPnkugzFXVzHxNxX1zYIUrcHmxJc5Hl/meA52/V1MkJ676t9xBemJ40+TP65Kqb9F/d2mlFrQ2PFtQd1G0NTjLVHffgbz5s0jPd28Cqndz+Caa66p9/Oj9zPo168fe/bs4eOPPwZg6dKlAE1GX9irq8kuLk7I11OfQA28+JM8Pnkug3G3VjDxTmkEKSMcpFfRdS7f9F/L3j7/5FD+NTh92+iwexZdPh9Fx6+mkVG+ELt/d7KrFUkUz5VBJDRFa20opRpfP51k8Zy597uxW2SIKJorzWDJzWX1fEbzJXI/g5ycHObNm8edd96J1+tlwoQJ5Obm4nQ2/b/RUVpK/ve/j3f8eLxFRQQGDUrI9EjfIRsv/Cifr1a7mDT3IGN+fKjpTxLJYbPh95yE33MSFZ1/hdO78UjC6je3wze34/OMiaxlCKb3bPo1RbsRTzPYp5S6GlgDjAUS+9syCf7+szKumpdPte/I6asnPcQTM8oT+j6J3s8AoKioiKKiIgD27t3L/Pnz642wrsvIzMTx1VfkvvEGAMHOnfGOG4e3qAjv+PGEunY9pvoAqvebyaO716dx/l/2M2y6rARvM2w2Au5hVLqHUdl5Fk7vlshQUu6eueTumYvPPYKa7KlUZ08l6Erpc0CRAPE0g6sw4yB+DmwGrrS0olZw5iAfT8wo5/sPdcTrt+FKM3hiRjlnDmrGPsENsGI/A4A9e/bQuXNnQqEQxcXFfO973yMjo/Hwg5DHw8HiYqovuQRHaSnpq1fjeustXG++SUb45rJ/4EDzqmH8eHxjx2JkNb6FY9U3ZvLo/q1OLn5kPwOntGjprUiygGsgVZ0GUtXpZjNIr/IV3BXLydlbTM7e4nCQnnnFEHAl5qpSpJYm9zNQSjmA6UAnzKjpk7TWa1uhtvoc9/sZ3Hrrraxduxa/309RURF33nln5D2i7Vizhi2zZ5Pj8XDWT35S/83jUAjnp5/iWrXKbA7vvYetpgbD6cQ3enTkqsE/ahREDUUd2OFg8WUdObTXziWPldNnfPObaKrGANRK9frA2hqjg/TSq9diwyCQ3s9MWD2GIL1U/z6men3Qshrj2c8gnmbwDPAmcKXW+gyl1Aqt9eRmVdRybaIZpIJt27axZcsWnE4nkyZNiu+TampI/+ADszmsWkXa+vXYDINQdjbeM87AO348JT3O5ZlfnkTAa2P638soHN2y5NFU/yFM9fqg9Wq0+7/BXfUqnoplpB9+FxtBAmk9I0NJfs/oeoP0WrPG5kr1+sD6ZhDPMFEnrfV8pZRqVhWi7XC78Y0bh2/cOCrnzMFWXo5rzRrzqmHVKva8VsZT/BSnfS8/OedP5O3oh7fXOEIpmPYoEi+U1oXDeVdxOO8q7IHaIL3lZJY/Rlb5ww0G6Ym2IZ5msEcpdSngUUr9D7LnwHHDyM+n5oILqLngAr56O50XfpBHpruSq0f8ksL3nsf+mrl4yT9kSGRIyXfaaRgeiUBo7xoO0ns6KkjvXGqyz8ebeUbTLyiSLp5m8CPgauC/mFHT9U+IF+3W5/9y8dJP8+nQO8Clz9Rg7zqX3cHfkLZhQ+R+Q+Zjj5E1fz5Gejq+MWPM5lBUhH/YMHDIGWJ7FhOkFzqEq6o2SG8pmQeeImTvgLH/QlxpEyVIL4XF0wwGa63/qpTqDPwA6AN8ZmVRInV8ssTD8ls60HWEn+lPluHJD99jcjjwjxqFf9Qoqq6/Hlt1NenvvRdpDjnFxVBcTKhDB7xnnhm5cgjGMQ1WtF2GPZOanAupybkwEqTnrliGZ99LdAw+ScieFRWkN1GC9FJIPM3gPmASMBfzRvJCzPUGop378LFMVvw6l95nevmfx8pxZTU82cDwePCefTbes88GwL53L6633zbvN7z1Fp5lywAI9O59ZH3DmWeC3G9ov8JBejXZU3Dm51C540XcFctxV71KRsWLhGxuvFkTqck+n5qsSRgOmQSSTPE0gwyllAtwaa2fUUr91OqiRHIZBqz5Sxar/5TDgHOrufCh/TiPnr3aqFCnTlRffDHVF18MhoHjyy8js5Q8L79M5lNPYdhsGKNHkz12rHm/4ZRT4BjXZIg2wp6ON2si3qyJHDSKST/8Tnj186t4Kpdj2NLxZhaFp6x+C8PRIdkVH3fiaQaLgJeA3yil3MA2a0uy3l9HdeHQ3qPHsTM7Bbnu429a/Prl5eXceOONbN++HZfLRZ8+ffjjH/9Ix45HAsHuvvtuHn74YW688UZuueWWmM+fM2cOa9euxWazkZaWxpw5cxg/fjxgrjq+4YYb2LlzJ263m3vuuYfRo0e3uOZahgFv/DaHDx7NYti0w5x33wHsLd31wmYj2L8/h/v35/APfwiBAGkffYRr9Wqy3nmHrPnzyf7rXwm53fhOPz2y+C1w4olwjCu3RRtwVJDeh7gr/om78hXyqlaEg/TOjArSk6vH1tDkOoMUk5B1Bn/s3vB+oL8srXcr5mOyf/9+Pv3000hy6V133cWBAwe47777ADPCeu3atdx///1cf/31TJo0iZ///MhePxUVFeTk5ACwceNGLr30UjZs2IDNZuOWW26hV69e3HTTTbz//vvcfPPNrF69GludhT/NWWcQCsCrszqwYXEGJ/+oikm/tT5wrqCggLLt20l/550j6xu2bAEgWFBwZEhp3DhC3btbW0wD9bXn+eetJa4aDYO0mo9xVy7HU7Ecp387BnZ8GaeFrximEErrlrz6kiwV1hm0KSvuyGHPpsYjrBvz9LSG43w7D/Ezea61EdZApBGA2eiif9H/4x//4L333gPg1FNPxeVysW7dOkaNGnVsX2gdAS/847o8tiz3cMbNlYz7RWWrJQ4YWVl4zzkHbzid1b5rV6QxuFavJuPFFwHwn3AC3qIifOPH4x07FiPq+yTagaggvcpOt+H0bsJTsaxOkN7J4bUM50uQXoI12AyUUnla6/3NfWGl1BTgAcABLNBaF9d5vhfwBNAhfMxsrfXy5r5fqqovwjpabYR1Xffeey9Lly7l4MGDPProo9hsNsrLyzEMI2bvgu7du/P111+3qBn4DttY+uM8tr/lZuKdBznlmuQmj4a6daNaKaqVAsPA+dlnkeaQ8eyzZC1ciBGezVQ7hdV30kmQ1vyTAJFibDYC7qFUuoeGg/Q+x125DE/FcnL33EXunrvwuYdTk32+BOklSGNXBn9QSuUBnwP/AtZorQONHB8RzjN6EDgHKAHWKqVe1lpvijrsdkBrrecppYYAyzGnrbZIPGfujQ0TXbEkNSKsZ86cycyZM1m9ejW///3vI3sXJFrNARvPXdmRXR+lcd79+xlxaYolj9psBE48kcCJJ3Lo2mvB5yP9ww8jq6KzHniA7D//mVBmJr6xYyNTWAMDBkiYWjsScA2gynUTVQU34fBtNzfqiQnSGxyJxQi4Bsv/+2ZosBlorX8KoJQagPlL/drw/sfvAS9orUsaed1TgS+01lvDr/EscBEQ3QwMoPY6Pxdo+WB9imlJhHWtcePGUVlZyWeffcaIESMAYnY2Ky0trR0PPGZVe+zoKzpS/qWTix7ez6CpbSB5ND0d39ix+MaOpfKXv8R24IAZmRFe3+BesQKAYNeusRHdnTsnuXCRKMH0PlR1/BlVHX+G3V8aSVjN2vdnsvfdTyC9b1SQ3nBpDHFq8p6B1vpzzKuDh8Jn/KcB3TDP+BvSHdgZ9XFJ+POi3Qn8Syl1PZAJ1Bt+p5S6FjNCG601BfXMS6+uPraz2cxOwQZnEyVKcyOsDcPgyy+/pH///gCsW7eOsrIyevXqBcAFF1zAk08+GbmBXFNTE2kS0ex2O3a7naysrHq/Z+XbYfG0NA7ugh8tDTBwchbQeGy1FZxOZ731xa2gAPr3hyuvJAT4tm/H/sYb2F5/Hc9//kPGkiUAhIYOxZg0idDEiRhFRZCZ2Tr1tYLju8YC6DYSmI3ftxv7vpewly0lq2we2WV/xXD1JlRwMaGCSzCyT20wSO/4/h6aLJlNpJSaDpyrtb46/PH3gVO11tdHHXMLYNNa36eUGgv8DRimtQ418tJtIrW0JRHWoVCIadOmceDAARwOB263m5kzZ0Y2tNmzZw/XX389JSUleDwe/vCHP3DKKacc9TqNzSba97mTxZd1xF9tY/qTZXQf07Lk0ZawdBZHKETaxo2RIaX099/H5vVipKWZkRnhKwf/iBExEd2tVl+CSI1HswfKcVX9C0/FMlyHVmHDHw7SOy8cpHdaTJBee/8eJnM2UQkQfau/B0cPA/0YmAKgtX4nvIahANhjUU2tZtCgQU3uS9wQu93OC+ENZ+rTuXNnFi9e3NzS2LUujee+m4/dCVcs2UfnIXHdBmqb7Hb8w4fjHz6cqp//HKqrSV+79khkxr33wr33EsrJMSMzwusbgn374lm6lOziYhxff03nwkIqZ8+uf08IkZJCznyqO1xGdYfLwkF6K8JBes+QtX8hQUfH8OpoCdKr1WQzUEpN0Vq/qpTqD9wMLNZav9XEp60FBiil+gKlwGXAFXWO2YEZc/G4UupEwA3sPdYvQMRvxzvpPP+DfNwdQlz2bBl5fRM3LNYmeDz4iorwFRVR+atfYS8rM3d9CzcHzyuvABDMy8NeUYEtaH5/nKWl5M6aBSANoQ0yg/S+Q3Xud8JBem/gqVgWG6RX/m1c6bVBese43L6diOfK4FbgVeA24FHgf4GjxyWiaK0DSqnrgNcwp40+prXeqJSaC3ygtX4Z+AXwqFLqZsybyT/QWrepFXBtyRf/NpNHc3sGuPSZMrK7NTYad3wIdexIzUUXUXPRRWZkxrZtuFatImfu3EgjqGWvrib39tsJdu2Kf/hwjBQalhTxM4P0vk1NzrfDQXpvmVNWy16mY/Dv4SC9SeYVQ9YEDHvjW8q2J/HsdPYe5raXv9ZaX6OUWqm1Prs1iqtHm7hnkAqi7xl0q7iAZTd1oPNQP9MXlZORnzqNIBXHarv16IGtiZ8L/wkn4B85Ev+IEeafQ4dixHlTOtFS8XtYV6rXWJCfQ+WOl3BXLMNd9SqO4P6oIL2p1GRNTnqQXircM/gD8Dvgd+Fx/XebVY1oVQd3Ovj37xw4vLnkl+ZhTzO4bHEZrmy5+GpKsLAQZz33fALdunHw3ntJW7eOtPXrca1ZQ0b4/o5htxMYMAD/iBH4apvEkCEgG/20DfZ0vFkT8GZNCAfpvRtey/BKnSC9qeEgvbxkV5xw8TSDvpirg2tPyWdbWI+wSMhvk0YQp8rZs8mdNQt71JTlkMdD5W234Z0wAe+ECZHH7d98Q9r69aSvX0/aunW4Vq4k47nnADAcDgKDBh1pDiNH4h88WJJZU53NiS9zHL7McRzs8jvSqz/AXbkMd8XyqCC9M6KC9Dolu+KEiKcZbAXuUUrlAv8Almity60tS4jkqb1JXDubKNjIbKJQly4xuUoYBvZduyLNIW39etyvvkrmM8+YT6el4R88ONIcfCNHEhg0SKI0UpXNji/jVHwZp1LR+U7SataFg/SW0WH3bIzdt+HLOC28+vk8y4L0WkPc6wyUUvnAfMzpoK8BD2itV1tYW33knkGcPn5rB4t+tQWHtwP5pRcCiUlkTbSUH0tORH2GgaOkJNIc0tetI23DBuwHD5pPu1z4hwyJGWIKDBjQ4NoHS2q0WKrXeMz1GYYZpBeOxUjzmUm7Ps/o8OrnqQTTeyW3xigJuWeglDoPc2poHuaVwbXhF30JKGpWZakgaJD5SA1ZD3qpus7FoWvc4EjMsnUr9zOYP38+Tz31FNu2bWPhwoWcU3tGKlKXzUawZ0+CPXtSc8EF5mOGgeOrr0hbty5yFeF5/nkyn3gCgJDbTWDYsJghpkC/frKfdKqIDtLrNBOn94twkN6yOkF6U8NBev2TXXGT4jn1GA7cprWOuaOmlLrGmpKs59gaJO+nh3BuDWKvhuw/1eB5wc/++ZkE+7X8h81mszFjxoyY/QzuvvvumP0M1q1bx5o1a7j++utxuVwxaaZz5sxpcD+D008/nXPPPZeZM2e2uE6RRDYbwT59CPbpY05tBQiFcGzdGjPElPH009jDK9dDmZnmIrraIaYRIwj26ZO8r0FEBFz9qXLdSFXBjTh8X0WGknL2/pGcvX/E7xoUbgznp2yQXjzNYDHwa6VUFnAV8EOt9QKt9WZrS2uenDsOk7ap8cVU6WsDEDhyzWSvhrSNQTpPqMB3SsPfEv8QBxVzm553bOV+Bs2Nqk5k7pKwiN1OsH9/qvv3P3J/IhjE+cUXMUNMmU8+ia3GDBUM5eTA6NFkh4eZ/CNHEuzZMyV/2Rwvgum9OdRxBoc6zqgTpPcXsvf9OSpIbyp+94iU+X8VTzP4G3Ad8JDWOqiUuhxYYG1Z1jIywF4n6doGhCxYX5LI/QyORW7PIOfcHsTpPMikSal3r0DEKTwjKTBokLm/A4Dfj3PLliPDS5s2kfXoo9j8ZsZUqEOHI8NLtQ2isDBlfukcT0Jp3TmUfzWH8q/GHtgT2fO5NkgvkNYjMpTk95zcYJBea4inGTi01p+p2n+IkNKb0sZz5u553kvunMPYo/ZwCWXCwd9lUP2dxE77S+R+Bunp6QmtTbRRaWkEhg4lMHQoXH45aQUF7CstJW3z5pgrCNe8edgCZvZUsGPHyCK52kYR6to1yV/I8SXk7MzhvCs5nHcltkA57nCQXmb5QrLKHyHo7EpN9hSqI0F6Tgq2fot078bIa9SG1ftcQ9nX718JrS+eZvCGUmo+UKiUegD4d0IrSIKac9LJvb0aMwUjzGGj5pzE/rK1aj8DIY7ickWuBCKqq0n79NMjM5jWr8e1ciW2kLkCPdilS+wiuZEjCaV4jHN7YcQE6VWYQXqVy8k48CyZ+x+PBOmFnF0xvFuwcSRZ2CAdn2dMwmuKZz+Du5RSw4DXgc2YAXNtmpFjY/enHSx9D6v2MxAibh4P/tGj8Y8ezeHwQ7bDh0nbuNG8ggg3iOwVKyLxG4HCwpiYDd/w4RhR26yKxDMcOVTnXkJ17iVHgvQql+OpeBF76BBHTf632anqdFPC62hsD+TngO9qrX1a60+AT5RSgzC3wDw14ZW0I5s3b+b//u//6NevHxdeaM7xj3c/A8MwmDVrVsx+BvPmzaNDB7N5zZs3jwULFlBeXs7NN9+My+Vi5cqVss5CxMXIyMB3yin4ovbAsFVVkfbJJzFDTLUJrgCBXr1iZjD5hw/HyM1NRvntXmyQXg2uQ2+Rs2cuTt82bJhXBYdyLyXkTPzOfY1dGTwLLFdKTdNaH1BKnQvcBVyZ8CraGSv3M5gxYwYzZsxobmlCHMXIysJ3+un4Tj898pjt4EHSNmyImebq+ec/I88H+vaNjdkYNgwjq/V3ymvX7G682d+izD2SLl+OBcNr2VUBNL4H8vNKqV3AMqXUa8AZwLe01gcsqUQIkTKM3Fx848bhGzcu8pitvJz0DRuOXEG8/z4ZL75oHm+zEejfP+YKIjBsGIYE9bVYKK0Lh3IvJfPAIsuuCqDxYaK7MO+wlgK3APOAW5RSaK3vsKQaIUTKMvLz8Z51Ft6zzoo8Zt+7l7T164/MYFq1ioznnzePt9sJDBqEf8QI7GecQVq/fmaSq/v43DymJaoKbiIjtNWyqwJofJhoRfjP14EHLatACNFmhTp1wjtpEt6ofbbtu3fHzmBasQLH4sV0Agyns/4kV5k23ahQWhcCI18nZGG+U2PDRG9a9q5CiHYr1LUr3q5d8YYXWmIYFFRXU7Vy5ZH7D8uXk/n00+bT6en4Tzwxdohp4EBJcm1l8cUiCiFEc9ls0KsXNVOnUjN1qvmYYeDYsSM2qO/FF8n8+9/Np91u/EOGxAb19e8vQX0WsqwZKKWmAA9g7oG8QGtdXM8xCrgT897EOq31FVbVI4RIITYbwd69CfbuTU14+jWhEI7t22OD+hYvxr5wofm0x3N0UF+/ftDMBZ0iliXNQCnlwLzPcA5QAqxVSr2std4UdcwAYA5wptZ6v1LKmlvkjbB/s5e8n93G/nl/INQ5cSsvrYqwDoVC/OQnP+Gzzz7D5XJRUFBAcXExfSS5UrQHdjvBfv2o7teP6osvNh8LBnFu3RqzBiJj0SLsC8x4tFBWltkgatdAjBxJsHdvyWFqBquuDE4FvtBabwVQSj0LXARsijrmGuBBrfV+AK31HotqaVDWXxaQ/t5HZP35USr+MCdhr2tVhDXA9OnTmTx5Mna7nYULFzJr1iy01gmrXYiU4nAQGDCAwIABVE+bZj4WCOD8/POYm9SZjz1Gls8HQCg314zZiA7q69FDGkQTrGoG3YGdUR+XAKfVOWYggFLqbcyhpDu11q/WfSGl1LWYG+qgtaagnuyU6qi9anPu+BNpm7Y0Wlz6u/+NLL8HyHpyCVlPLsGw2fCdPrrBz/MPGUjF3FsbfW2wLsLabrdH0k8BTj75ZBYsqD9A1m63Y7fbycrKqvd7liqcTqfU10LHZY1du0J4wycAv8+HbdMmbB9+iO3DD0n7739Jf/jhSFCf0bEjxujRGCefTCj8J927RxrEcfk9rPv6Fr1ufS24bsSGExgAnA30AFYppYbVXdSmtX4EeKT2Nerb9i0Q/h8eL9/o4Ti/2om9/AC2kIFhtxHK70Cgd89jep14WBlh3dhOZ6FQiFAoRFVVVfvabrCVpXp9IDVG9Ohh/le7WVBNDWmffRYzxOR84w0cQXNvj2CnTpErB9u4cZT37Uuoc6uPVsctAdteNsqqZlACRP9m7QHUDdUvAd7VWvuBbUqpzZjNYW1L3jieM3eA3Nl3k7HoBQxXOvj8VE+dlNCholpWRVjPmzePzz//nOeeey7RJQvRPrjd+EeNwh+1IZStuhrnxo0xN6ldb7yB7f776QoEu3aNXQMxYgShqHt97ZlVzWAtMEAp1RdzBfNlQN2ZQi8ClwOPK6UKMIe9vGeMAAAOjElEQVSNtlpUz1Hs+8o5dOU0Dn/3EjKeegHHnsSftVgVYb1w4UKWLl3K4sWL8chyfyHiZng8+MeMwT/mSAS07dAhCnbu5PBbbx0J6nvttcjzgR49YoP6RozA6GBt6nEyWNIMtNYBpdR1wGuY9wMe01pvVErNBT7QWr8cfu5bSqlNQBCYqbUus6Ke+uxf8KfI3yvunp3w17cqwnrRokUsWrQIrTV5eXkJr1uI442RmYkxbhyHBg+OPGarqDCTXKNuUnuWL488H+jTJ3YviOHDMdp4crDNMI5Ky05lxtdfH72FY2VlZUpFOG/evJmJEyfSr18/3OEclngjrEOhENOmTYuJsJ45cyZFRUVUVVUxePBgevToEfl6XS4X/4xKk6y1bds2tmzZgtPpZFJUVECqSfXx7lSvD6TGRIinPtv+/UcluTpLSiLP+084IWYvCP/QoRiZma1aY0PC9wwanU4lK5AtYFWEdVZWFiVR//iEEK3HyMvDV1SEr6go8pi9rMwM6qu9/7BmDRnhn1/DbicwYEBkBzrfiBH4hw6FFB3alWYghBDNFOrYEe+ECXgnTIg8Zv/mG3N4KdwkXP/5DxnhiR6Gw0Fg4MDYm9QnngjHMJRsFWkGQgiRQKEuXfCecw7e2mnfhoF9166Y4SX3a6+R+eyz5tNpafgHDz5yk3rkSAKDBkWC+jwvvEB2cTGOr7+mc2EhlbNnU33JJQmvW5pBO9XG7gUJ0X7ZbIQKC6kpLKRmyhTzMcPAUVISu9XoP/5B5lNPmU+7XPiHDCGUlYXr3Xex+f0AOEtLyZ01CyDhDaFdNAPDMDAMo96FWccjwzAIhhfWCCFSkM1GsGdPgj17UnPBBeZjhoFj+/bYvSBWr45JSwCwV1eTXVwszaA+brebiooKcmWTbgAOHDjAzp1mGog0SCHaCJuNYN++BPv2pSa8irpbjx71HuqoZ1ZlS7WLZpCens7evXvZu3cvjlbMO7fb7YRCoVZ7v3gYhkF5eTkHDx7E6XTSrVu3ZJckhGimYGEhznpmJgbjiJc4Vu2iGQB0796dTz75hK1bt7baEElGRgaHDx9ulfc6Vna7nYEDBzJw4MBklyKEaKbK2bPJnTULe1QYZ8jjoXJ24hfKtptmADBs2DCGDRvWau/XHhbSCCFSV+19gdrZREGZTSSEEMen6ksuofqSSyw/uZP94oQQQkgzEEIIIc1ACCEE0gyEEEIgzUAIIQTSDIQQQiDNQAghBNIMhBBCYOGiM6XUFOABzD2QF2itixs4bhrwHHCK1voDq+oRQgjRMEuuDJRSDuBB4DxgCHC5UmpIPcdlAzcA71lRhxBCiPhYNUx0KvCF1nqr1toHPAtcVM9xdwH3ADUW1SGEECIOVg0TdQd2Rn1cApwWfYBS6iSgp9b6n0qpWxt6IaXUtcC1AFprCgoKLCi3eZxOZ0rVU1eq1wepX2Oq1wdSYyKken1gfY1WNYP6dlSJbNejlLIDfwZ+0NQLaa0fAR6pfY1USuFM9VTQVK8PUr/GVK8PpMZESPX6oGU1Fsax/4FVw0QlQM+oj3sA0VvzZAPDgJVKqe3A6cDLSqkxFtUjhBCiEVZdGawFBiil+gKlwGXAFbVPaq0PApHrHaXUSuBWmU0khBDJYcmVgdY6AFwHvAZ8aj6kNyql5iqlLrTiPYUQQjSfZesMtNbLgeV1HrujgWPPtqoOIYQQTZMVyEIIIaQZCCGEkGYghBACaQZCCCGQZiCEEAJpBkIIIZBmIIQQAmkGQgghkGYghBACaQZCCCGQZiCEEAJpBkIIIZBmIIQQAmkGQgghkGYghBACaQZCCCGQZiCEEAJpBkIIIbBw20ul1BTgAcABLNBaF9d5/hbgaiAA7AV+pLX+yqp6hBBCNMySKwOllAN4EDgPGAJcrpQaUuewj4AxWusRwBLgHitqEUII0TSrrgxOBb7QWm8FUEo9C1wEbKo9QGv9n6jj3wW+Z1EtQgghmmBVM+gO7Iz6uAQ4rZHjfwy8Ut8TSqlrgWsBtNYUFBQkqsYWczqdKVVPXaleH6R+jaleH0iNiZDq9YH1NVrVDGz1PGbUd6BS6nvAGOCs+p7XWj8CPFL7Gvv27UtIgYlQUFBAKtVTV6rXB6lfY6rXB1JjIqR6fdCyGgsLC5s8xqpmUAL0jPq4B/B13YOUUpOBXwFnaa29FtUihBCiCVY1g7XAAKVUX6AUuAy4IvoApdRJwMPAFK31HovqEEIIEQdLZhNprQPAdcBrwKfmQ3qjUmquUurC8GH3AlnAc0qpj5VSL1tRixBCiKZZts5Aa70cWF7nsTui/j7ZqvcWQghxbGQFshBCCGkGQgghpBkIIYRAmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYRAmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYRAmoEQQgikGQghhECagRBCCCzc9lIpNQV4AHAAC7TWxXWedwFPAicDZcClWuvtVtUjhBCiYZZcGSilHMCDwHnAEOBypdSQOof9GNivte4P/Bn4oxW1CCGEaJpVw0SnAl9orbdqrX3As8BFdY65CHgi/PclwCSllM2ieoQQQjTCqmGi7sDOqI9LgNMaOkZrHVBKHQQ6AvuiD1JKXQtcGz6OwsJCi0punlSrp65Urw9Sv8ZUrw+kxkRI9frA2hqtujKo7wzfaMYxaK0f0VqP0VqPCX9OyvynlPow2TW05fraQo2pXp/UeHzUl6AaG2VVMygBekZ93AP4uqFjlFJOIBcot6geIYQQjbBqmGgtMEAp1RcoBS4DrqhzzMvAVcA7wDTgDa31UVcGQgghrGfJlYHWOgBcB7wGfGo+pDcqpeYqpS4MH/Y3oKNS6gvgFmC2FbVY7JFkF9CEVK8PUr/GVK8PpMZESPX6wOIabYYhJ+NCCHG8kxXIQgghpBkIIYSwMI6ivVJK9cSM0egKhIBHtNYPJLeq+oVXgn8AlGqtL0h2PdGUUh2ABcAwzCnFP9Jav5PcqmIppW4GrsasbwPwQ611TZJregy4ANijtR4WfiwfWAz0AbYDSmu9P4Xquxf4NuADvsT8Ph5IRn0N1Rj13K3AvUAnrfW++j6/NTRUo1Lqesz7sQFgmdZ6VqLeU64Mjl0A+IXW+kTgdODn9URtpIobMW/gp6IHgFe11oOBkaRYnUqp7sANwJjwD6MDc1Zcsj0OTKnz2Gzgda31AOB1kjsZ43GOru/fwDCt9QhgCzCntYuq43GOrrH2RO8cYEdrF1SPx6lTo1JqAmZywwit9VDgT4l8Q2kGx0hrvUtr/d/w3ysxf4l1T25VR1NK9QDOxzz7TilKqRygCHNGGVprXzLPFBvhBDzhdTAZHL1WptVprd/i6PU40dEuTwAXt2pRUeqrT2v9r/AMQ4B3MdcdJU0D30MwM9JmUc/i19bWQI0zgGKttTd8zJ5Evqc0gxZQSvUBTgLeS3Ip9fkL5j/sULILqUc/YC+wUCn1kVJqgVIqM9lFRdNal2Keee0AdgEHtdb/Sm5VDeqitd4F5skK0DnJ9TTmR8AryS6irvCU91Kt9bpk19KIgcB4pdR7Sqk3lVKnJPLFpRk0k1IqC3geuElrXZHseqIppWrHGj9Mdi0NcAKjgXla65OAQ6TYOhOlVB7mGXdfoBDIVEp9L7lVtW1KqV9hDrM+lexaoimlMoBfAXcku5YmOIE8zOHpmYBOZLinNINmUEqlYTaCp7TWLyS7nnqcCVyolNqOmRg7USm1KLklxSgBSrTWtVdUSzCbQyqZDGzTWu/VWvuBF4AzklxTQ75RSnUDCP+Z0OGDRFBKXYV5Q/S7KZg0cAJm018X/pnpAfxXKdU1qVUdrQR4QWttaK3fx7zqL0jUi8tsomMU7sR/Az7VWt+f7Hrqo7WeQ/gmnVLqbOBWrXXKnNVqrXcrpXYqpQZprTcDk4BNya6rjh3A6eGzxmrMGj9IbkkNqo12KQ7/+VJyy4kV3ujql8BZWuvDya6nLq31BqKG1sINYUwyZxM14EVgIrBSKTUQSKdOynNLyArkY6SUGgeswpxqWDsef5vWennyqmpYVDNItamlozBvbqcDWzGnGyZlOmRDlFK/BS7FHNr4CLi69uZdEmt6Bjgb84zwG+A3mL8kNNALs4lN11onJfSxgfrmAC7MHQ0B3tVa/zQZ9UH9NWqt/xb1/HaS3Awa+D7+HXgMGIU5TfdWrfUbiXpPaQZCCCHknoEQQghpBkIIIZBmIIQQAmkGQgghkGYghBACWWcgjlNKqbMwp+vZgSDwa631GqXUQeC/QBpmdEIhMFlrfXv48+4EVmqtV0a9VgZm/MfA8Oc9orV+gmYKJ7pOTNEFjaKdkisDcdxRShUAvwUu1lqfjRnsVh1+eoPWegLwC8xsp3j8Bngz/FrjgG0tLLEDcEkLX0OIYyJXBuJ4NBVYVJspFU6f/ajOMR8Tf7rmGVrrX4ZfywDeAlBK/S/mAqEK4LuYoYaTtda3K6V+EP7clZgLicoxIxEuAq4FzlFKrcRcQLb32L9EIY6NNANxPCrEXEGOUuoK4GeYq2JvjTqmCNjc3DcIJ0pmaq2LwgF3P6XhdNs8zCyky4HvYG583iuVIkRE+yfDROJ4tAuzIaC1fhr4HkcCv4Yrpf6D2SCKgRrMKIVabo4MKTXmBMx7D2BmGvUnNic/Om1yk9Y6BJRiDhEJ0erkykAcj5YDS5RSWmt9kNifg9p7BgAopXzASUqp2hOn0cA9dV5vjVLqu1rrp8JBhmdi5i19K/z8GMztHg8C3cKPDQfWh/9et0n4MXdWE6LVyJWBOO6Ex+B/C7yklHoDeAhzX+v6ji3DjCt/CzOgcEk9IXC/Bc4Kj/G/DZwQjhiuVkqtAq4A5mP+8i9USi0HOjVS4m4gXym1JLy/sRCWk6A6IYQQcmUghBBCmoEQQgikGQghhECagRBCCKQZCCGEQJqBEEIIpBkIIYQA/h/+gMsVuJInQAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeXwV1dn4v2fuln0jrAlJCKvgAiLuVdytVVSqg1b7Vlu3Lv602lJbrbXaorWWV1/XqnVvlVGxoraKVkFBRXGrioQtAZJAyL7e5N6ZOb8/ZhJuQla4l2zn+/nkAzNz5swz5555nrM85zlCSolCoVAohidafwugUCgUiv5DGQGFQqEYxigjoFAoFMMYZQQUCoViGKOMgEKhUAxjlBFQKBSKYcyQMgJCiFuEEJv6W47+RAhRJIS4qb/l6IqO8gkhVgghHu2PZ+9DPk8IId6KOO6x3gkh5gohpBAie1+f3wv5olKmQohLhBBmNGRS7D/6Wte8sRZosCOESANuAU4B8oB6YDVwo5Ryff9JNmSYDww2RXMNA7sBNRjLVNFPDOSKPFAYC0wAbgYOBc4EkoC3hRDp/SnYUEBKWSWlrOtvOfqClLJWSlnd33J0xWAsU0X/MWiNgBAiIIR4UAhRK4SoFkI8CAQ6pNGEEIuEEOVCiAYhxHNCiGtbu7jC4TUhxMdCCF/EPW8JIVYLIbxSym+klGdLKV+UUhZIKdcC38MxDsd2I98qIcQfIo5/73bRTo44t1IIcWfE8Snuc4NCiBIhxONCiBER1w8VQvxbCLHLfZ+PhRCn91BOJ7tldH0vytQnhFgshCgWQrQIIXYIIZ7rkGaBEOITIUSzEKLSlSc9Qv4VQogq95krhRCH9/DMdkMXrcdCiN8KIXa6eT0hhEiMSNPt79oD8W7+dUKICiHEn4QQbd9BZ0NGbvoVEcfthoO6eK+r3XJsEkK8AeT0JFgv310IIX4hhNgihAgJITYLIa7tLJ+9yPe2iLr1HLBHI6e7Oiqcb/IzIcQ/I9LHCyG+EkIs6en93fRFQohbhRD3uHKWCSHuEkJ4ItL4hBB3uM8PCSHWCSG+1yEf6f4GS4QQjUKIbUKI84QQqUKIvwsh6t0y/G6H+/4ohPjG/d22CyEeEkKkRly/RAhhCiGOEUJ86qb7WAgxu0NZPuL+NkH3OYuEEB3107WRdUQI8X3RYRhHCDFbCLHc/U3KhRBLhRC5HfLpc11rh5RyUP4B/wvsAs4GpgF3AXXApog01wENwPeBye5xFWBGpBkJlAB3ucc3umlyunl2PiCBo7pJcyvwQcTxe668t7vH8UALcLp7fCLQBFztyjoHeAd4FxBumrnAD4DpwBTgD0AImBLxnCLgJvf/F7nv/71elul1QLH7nBxXhmsjrl8KhIHfujIcjDM0kulePxc435VtBvCoW5YjOpPPPV4BPNrhuMb9facBp7vHv+/L79rF+xW5deRWYKp7fyNwXVfyueceBVZEHD8BvBVxfAvt693ZOMMx17ll8SOgzK0z2d3I15t3/ykQBK5w3/0qoBn40T6W6TVuWfzAlXmhmybyW+lNHZ2CM2T6M/f4EWALkNrLOlgEVAM3uM9Y4JblpRFp/gxUsruu/QawgZMi0khgp/s+k4AHXNn/DVzinrvXfefI+nkT8C2cod+TgPXAkxHXL3Gf9a6bbhqwHNgMeN00Gs63eYSbzzxgR4fybh2yu8Z9z0uA0sg6gvONNQC/d59zEPA8sAGI25e61q7M91UZ98cfkOhW/Ms7nF9L+4+xBLitQ5rn6KAsgBPcgvwdjpKb382zPcDrwEeA1k26uW6eKUACjsK/HvjIvX4KjgJPjPhQ7+iQR477Y87s5jlf4MxPtFNiwC+AWuCUPpTrPcDbuB90J9e3Aff1IT8N54O+qKN8Eccr2FNh/bdDPg/R3qD26nftRJ4i4L0O5xYBxV3J557rqxFYBfy9Qx539fRh9vLdtwN3dkjzv8CWfSzTYuCPHdK8QHsj0Ks6iqN4m3GMbQg4vA91pghY1uHc68Cz7v9bv6WfdEjzEvB2xLEE7o44HumeuzfiXLp77sxu5DnXfZ7mHl/i3nNoRJoj3XNTu8nn58DGiOPVwNMd0txBeyPwBPBchzQBHGN2zr7Utci/wTocNBGnMN7vcH5V63+EECnAOODDDmk+6JiZlPId4C84H/OjUsqlnT3U7ZI+hWNx50sp7W5k/ADnAzgOp8Ww1b13ltu9PBHHIDS66ecA17rdvgYhRAOwzr022X3+SCHEA0KI9UKIGjfNDKBd9xCnlfgH4EQp5ZvdyNiRx3FaG5vcbvB3hRB+99mjgPE4rZ5OEUJMEEI8LYTYJISow2l1p3YiX0983uG4BBjtPqPXv2sXdEy3Gshy840W0+mmbvZAT++ejdMKjWQlkCeESNiHfLN6IXOPdRRASvkk8DJOj/G3UsqPupGrT7LitOD9dF4GMzqc+yJCpnLAAv4bca4a5xsd1XpOCDFfCPGuEKLUfb+/u88bE5GvjMzblY8IGRFCXC6EWOMOZzUAt9P+O5hOz3V4DnBuh/KuBOLYXd77UteAwesdJNx/5T6mcRI6yv0YnEoySQghpGtSI9L4gWeBQ4C5Usri7vKUUrYIId7H6VKGcFop5UKI9Ti9hBOBNyJu0YA/AU93kt1O998ncFpeC4FCnGGB53AqaSQf4PRufiSE+LTju3Qj8+dCiAk4vZQTcHoGtwkhjoxM1k0WrwIVOEMW23Hee1Un8vVEqKNo7J6/6vXv2ktEh2O7k3O+vch3b+Xr7t27yrujvH3Nt7dl2ps6ihAiCceJwsJpMPWVvS2DjufCneTd8Vxb3kKII3CGW24HfonTiz0SeJL2ddiWUlqdyNKaz/nA/ThDWitxGkPnA3/s4R06ouGU9R2dXKvsQz49PmQwsgmnohzT4fzRrf+RUtbijLEd1SHNkezJLTiV9RhgNvCryItuC2sZjtU9Tkq5rZdyvo2j7E8E/hNx7lz3OW9HpF0LzJBSburkr8FNcxzwgJRymZTyS5xxxvxOnvsljhKfDzwshOiNkgBAStkgpXxJSvn/gMOAA4DjpZS7cIYMTuvsPndycDrOcMEbUsp1OEMCozpLv7f08XftjI7pjgJK5W5vml04PY1IZvVJSKd13LFudjzuM66MxcDxHS4dBxRKKZv2Mt9anNZsTzL3po4CPIhjAE4ELhZCXLA3cnXBJpzhmc7K4Ot9zPtYoEJKeZOUco2UcgNOz6uvHAd8JqVcLKX8REq5EWduIJJ19FyH1+LMu23upLxbvdP2ua4Nyp6AlLJRCPEQ8AchRBlQgDMhMg3nI27lL8Dv3db3R8B3gFOJsJxCiOOBXwNnSSnXCCEuB54VQqyQUn4ohEgG/oVTGc4GbCFEa9ewVkoZ7EbUt3GGZSycCbTWcy/gtEgiu383A8uFEP+L0/Kox+nynY8zyRZ03/MiIcQqnLmJW91/Oyujr913ext4XAjxwx6GrxBC/BJHwX6OM+54oSv7BjfJ74EH3TJ/AacRcQJOb6QKKAcuF0JsBkYAd+L0VqJNj79rN8wUQtwC/APHyF2D0who5S3gJ0KIl3CG8K7C6cZX9VG+54UQH+HUnWNxJqGjwe3AX4QQG3HG6E8EfozT+9oX/oLT61uPM0wxDzi5Q5oe66gQ4mL3+Ei3Z/kb4K9CiDVSysJ9lBEpZZMQ4v9cWctx6ur5ON/mKfuYfQEwUgjxI5zv9VjgJ3uZz4+EEGcDX+G4lc/vkOYvwBK3jvwbpwH7P+611nq8CKd+PyOEuAfn+8oDzgHukVJuIRp1rTcTBwPxD8e75q84k5+1wMM4H0jkBJ3mnqvAmWV/DseToN69noEzbPGXDnn/FWe4JRVn6EZ28XdJDzJ6XNm+iDiXhjNh/FYn6b+Fo4TqcbwWvgHuZrfXwUE4439BnAm0n7jpn4jIo4j2E6+TcCZ0/w54epD3SuATnO5rA/AxcHaHNBfhjIe24HRJXwPS3GvHu9eacT6E7+K03G7pRr4V7DmJ+WiHZ94EFPX2d+3m/YpwuuSPu+9YheNp4olIk4zTBa/GaVDcQh8nht1z1+C0roPub/QDejcx3NO7C5yhikKchsQWIjy49rFMF7ll2ohj5H/Onk4UXdZRt67VAVd3kPffwBrA14vvul39cM91LH8fzhBJCc6IwDo6eMC5ZX1xh3MmHb5ZnLp6WcTxbTjeNY04SvVCN6889/olnZRJtptmboR8f3XrVx1Og+NngOxw388j6sgbOHN5kvbeSgfhzK9Uu+k24ei6jH2pa5F/rW5dwwYhxGPAIVLK2T0mVgwa1O+qGOwIIW4GrpFSjugxcRQZlMNBvUUIMQ5n/P0dnGGNs3C6XD/rT7kU+4b6XRWDHeEsTr0ep7fRiDOs+kucCeX9K8tQ7gkIIUYDS3AmV+JwulL3Sikf6VfB+gkhxNd07a75jJTyqv0pz96iftfBiTs/8Juurkspk/ajOP2KEMKL4003G2cIshDHhfzPUsr9GvdpSBsBRXvc5eZduTvWSccDSKGICUKIDJx5uE6RUg7rCMD9hTICCoVCMYwZrOsEFAqFQhEFlBFQKBSKYYwyAgqFQjGMUUZAoVAohjGDbp2AruuP4SzD3mUYxoE9pM0FHsMJI1sFXGwYRreB3xQKhWI4MRh7Ak/gbIrRG+4CnjIM42CcODu3x0oohUKhGIwMup6AYRjv6rqeF3lO1/WJOCvtRuIEPrvcMIz1OFEtf+4mewf4JwqFQqFoYzD2BDrjYeBqwzBm4+yo9YB7/gucIGbghBlI1nV9v8blUCgUioHMoOsJdETX9SScMKzP67reerp1Q+dfAPfpun4Jzk5EJTiRBBUKhULBEDACOL2ZGsMwZna8YBhGKW4cb9dYfNcwjNr9LJ9CoVAMWAb9cJBhGHVAoa7r5wPoui50XT/E/X+mruut7/hrHE8hhUKhULgMuthBuq4/i7PRSybO5g+/w9k960FgLE6AtOcMw7hV1/XzcDyCJM5w0E8Nw2jpD7kVCoViIDLojIBCoVAoosegHw5SKBQKxd4z2CaGVbdFoVAo9g7R2cnBZgQoLS3tbxHayMzMpKKior/F6JKBLh8MfBkHunygZIwGA10+2DcZx40b1+U1NRykUCgUwxhlBBQKhWIYo4yAQqFQDHDKNMFJ3jC7tE6H9fcJZQQUCoVigHN3kp/3heTuJH/U8x50E8OdIaWksbGR/b3mIRgMYpoDLxSREILExMT+FkOhUOwjQeD1OC9/T/RjC1iS6OfahhCj7OjpuiFhBBobGwkEAvh8vv4WZUDQ0tLCjh07SE5O7m9RFApFH5DARq/GioCXlQEvHwY8NAsBbgPXxukVLKqLXuCDITEcJKVUBiCCQCBAU1MTL730Ei0tKkqGQjGQqRHwSpyXX6TGMWd0EieMSuL3qXEUewTnNoXwSQnCmQsICcGSRH9U5waGhBFQ7InH4yEcDvPll1/2tygKhSICC1jr8/CX5ABnZSZw0JhkrspI4LV4H7NCFnfWBPmorJ6V5Y345J4rvFp7A9FiSAwHKTrH4/GonoBCMQAo1QQr47ysCHhZFfBSowmElMwM2/y/hhBzW0xmhaw9FPInfi8h0d4MhIRgrd8LROfbHrY9gfilSxl1+OGMzc5m1OGHE790adTyrqqq4vvf/z7f+ta3OPnkk7nsssuorKxsl2bRokXk5uayePHidudt2+byyy9vu/eCCy6gqKhoj2csXryYrKws1q9fHzW5FQpFdAgCKwIebkkJcMLIROaMSeYXafGs9Xs4rTnMA1VN/HdnA69WNPLL+hbmdGIAAJZXNFJSWkdJaR0tIX/b/5dXNEZN1mFpBOKXLiV14UK8JSUIKfGWlJC6cGHUDIEQgh//+Me89957vPXWW+Tm5rJo0aK263fddRdffPEF77//PqtWreL+++9vd//555/PypUreeuttzjttNNYuHBhu+tffvkln376KVlZWVGRV6FQ7BsS2ODVeDjRz/cyEjhwbDIXjUjkqUQ/YyzJb2ub+c+uBtaWNbC4ppmzm00yBkgE5yE3HJRy88341q3rNo3/k08QoVC7c1owSNr115Pwj390eV94+nTqbr21RxnS09M5+uij244PPfRQnnrqKQDuv/9+Nm/ezNNPP43f7+cf//gHV199NY888giXX345mqZx6qmntt07e/ZsHn300bbjlpYWfvOb33D//fdz/vnn9yiLQqGIDTUC3nO9eFbEednhcdrUk8IWFzWGmNticVTIJH5g6PouGXJGoFd0MAA9nt8HbNvmqaeealPsP/3pT9tdj4uL45FHHuny/scff5xTTjml7fiuu+7iu9/9Ljk5OVGXVaFQdI0FfObzuGP7Hj73ebCFIMWWHNti8vOWFua2mGRZA1zrd2DIGYHetNRHHX443pKSPc5bWVlUvvBCVOW56aabSExM5NJLL+3zvQ8++CAbN27k+eefB2Dt2rV8/vnn/OY3v4mqjAqFonMiJ3TfC3ipjZjQvaYhxPHNJrPCnY/nDxZiIruu648BZwK7DMM4sIs0c4G7cbaDrDAM4/hYyNIZ9TfcQOrChWjBYNs5Oz6e+htuiOpzbr31VgoLC3niiSfQtL5Nvzz++OO89NJLLFmyhPj4eAA+/PBDNm/ezJFHHgnAjh07uOiii1i8eDHHH7/fik+hGLIEgTUBT9tirQ0+DwBjLJvTm8Mc32zyrRZrwIznR4NYGbAngPuApzq7qOt6GvAAcLphGNt0XR8VIzk6JTh/PgDJd9yBp7QUa9w46m+4oe18NLjjjjv473//y9NPP00gEOjTvc888wzPPPMMhmGQnp7edv5nP/sZP/vZz9qOjzjiCJ588kmmTZsWNbkViuFE64TuM5rFvzISWOOu0A1IyREtFguampnbYjLVtDvfkWUIEBMjYBjGu7qu53WT5HvAUsMwtrnpd8VCju4Izp8fVaUfSUFBAffeey/5+fnMmzcPgJycHP72t7/1eG9DQwM33HAD2dnZXHDBBYCzAvjVV1+NiawKxXCj8wldi0lSDKoJ3WjRX0NZUwCfrusrgGTgHsMwOu01DEamTp1KSSdzDr0hKSmJ4uLiXqVds2bNXj1DoRhO9GZC99yEFBIq6vpb1H6hv4yAF5gNnATEAx/ouv6hYRgbOibUdf0K4AoAwzDIzMzcI7NgxNi+wkHTNDRNIykpqdMyGyh4vV4l3z6iZNyTYiRvajZvCsl/NJsaAULCYVLwa1twiq0xRwq8ngAkOPKZw7QM+8sIFONMBjcCjbquvwscAuxhBAzDeBh42D2Une2xORDDOfc3tm1j2zYNDQ0Deu/Ugb6360CXD5SM0P2E7mlBs9MJ3Zr9KF80iNUew/1lBF4G7tN13Qv4gSOA/+0nWRQKxSCjdUK3VekPxwndaBErF9FngblApq7rxcDvcFxBMQzjIcMwvtF1/XXgvzhB8R41DOOrWMiiUCiGBj2t0D2hxeTIkDVsJnSjRay8gy7sRZo/A3+OxfMVCsXgZ6iu0O0LL2eeSrX/690n3FGd9NAMzq5YHpVnDOaFbgqFYohR0iHk8u4VutaQWaHbF0aFZlPr24gtdoe00aSfUaHDovaM4VKWCoViADIcV+h2R7NWRY23gGpvATW+Aiq9X2LTPqaZQOOQhmuj9sxhaQT26GK5RKuLVVVVxTXXXENRURGBQIC8vDz+9Kc/MWLEiLY0ixYt4q9//SvXXHMN1113Xdt527a58sorWb9+PYFAgMzMTO644w7y8vIAZ5VwIBBoW4V84403Mnfu3H2WWaHYH6gJXYeQqKPGW0CNb0Obwq/2FtDsKW9L47dTSQtPIdWcTJ13C1JYaNLPpMYFJNjRC7IwLI1ArLtYrfsJtIaTvu2221i0aBF/+ctfgPb7CVx99dUEAoF20UXPP/98Tj75ZDRN4/HHH2fhwoUYhtF2/eGHH1ahIhSDhhoBK4XNK6lxw25CNyyaqPVupNq7nhpfATXeDVT71tPk2dGWxmsnkGZOJbvlRNLDU0kzp5EWnkKCPQaBoEkr48XRR2FhRb0XAEPQCKxJuZkqX/f7CViEsAm3O2djUun7in+POK/L+zLC0zmirn/3E1AoBjqdT+iapHh8Q3ZC16SZOu9mqn0FbgvfUfj1nm3OKjVAkwHSwpMZ03IU6a6iTzOnkWRlIbrZ3yvBHs2kxgUUJD4T9V4ADEEj0Bs8+Im3RhH07HJ+ICmIt0biIXqbN7cS7f0EgLYgcnPmzOGGG24gNTU1ylIrFH2jpwnds+MSmVBeNegVjk2YOm+hM27vKvxqbwH13kKksAEQ0kuqOZER4UOY1HR+W8s+2cpDw9PnZ8YvXcpJjy2n6X9tDv/pZ/wnvZJt1VO5dHl0FrcN9t9kD3rTUgciulgteAhwVsXrUbewEN39BACWLl1KVlYWLS0t/O53v+Omm27i3nvvjabICkWP9HVCNzOQzMBej9seG4sGzzZX0a93h3EKqPNuxhbOKIKQGslWHmnhqUxoPou08FTSzKmkmBOi1qCMf/FFUhcupKZ5FN65D3MPF2MjsKLYYB1yRqC37O5iPR2TLhZEfz8BoG1f4UAgwA9+8IO9Mi4KRV8ZqhO6Ekmjp4Ra8RHbkta2Kfwa30Ys0dyWLsnMIc2cwvjmk0kzp5AWnkqqOQkvcfvwcImoqcFTWoq5uZz6dY3UbbGoK9ao2xVPTU0q5cGj2EU5YRIRWMgYqOxhawQADmm4lhrfhqhPtEBs9hNoamrCNE1SUlKQUvLyyy8zY8aMaIuuUABDa4WuRBLUylxvnPVtE7Q13g2YWqOTyAcJ1hjSwtOY1ngUaeFpjsI3p+CTiX1+pggG0UpK0EpKad5QTf3GEHVFkrodPmork6lpyKDayqaKwwiS3u5ejwiRllBJE3GESQRETAwADHMjkGCP5tuVL0Y931jtJ1BeXs7ll1+ObdtYlsXkyZNZtGhR1OVXDE9M4PMhsEK3Wat03C4jXC9rfBsIabtDxsVZmaSZU5jctIA0cyp5iYdDxWgCspfza+EwnrIyPKWlsK2Upg2N1G02qduuUVseT3VNGrWh0VSRSzWHESah3e0BbxNpKdWkZjYydlwpKRN2kjw1juTpiaSOt0kcaSM0iJt9Im/tvIyPuBQbDWtfeh5dMKyNQKyI1X4Cubm5LF8enaXiCgUM7hW6LaKWGt8GR9m387XfPfvg+NpPZUJw95h9ujmVOHtEu7wyEzKpkO59to1WWYmntBRPSQn21l3Ub2ihbqukboefmsokahozqSaHKmZRy5nYHUooMa6WtLF1ZI5uJj+nmORJPpKnJZCaJ0jJsgikSoQQQFKHt2ofEVnc+CPOW/hLTg/exuv8NibGYCD+tgqFIkYMxhW6jq99+0VVNb71NHl2tqXx2okRY/ausg9PJd4ejegwSyHq6vCUrsfjDtWEC6vZuV2jYotFbXkCNTWpVNvZbit+Ng20ny/UhEVKSi2pIxrJzjKZnldC8tQ4UiZ6SckyScmy8MUDeIDIYSSrz+8euRXu+aVXc/LoR3lt0pNsq47eOiFlBBSKIcxgmtA1aabWu6ndoqoa7wYavNva0nhkHKnhSYxtOabN9TLdnEaiNc7xtW9uxrNjh9uKfxdRsoPg5kbqtkLtDj+1lUnUtA3TzKSKs2khpZ0cPm+I1Iw6Uke3MCknRMqkMlIm+UgZb5OabZE02kLzgqPkW10+LfZGyfeG1q1wMzMzaa6o4CSAKPpaKSOgUAwxWid0VwS8rOwwoXtxY4i5/TyhaxOm1rul3aKqat966j1Fbb72mvSRYk5kZGiWO24/jfSWSaSUBPCVluEpKcGzYwdy+2oatrxDbbGH2l3xzmQrua6Sn0U147Fo75gRH9dEamYjKVkmWXlBkqdYZM9IQiTXkJJtkTDCRggA4f7FTsEPBJQRUCgGOSaStQNwQtfxtd9Kta+AjVoxJemfUuMtoM67pb2vvTmB9PBU8qtOJWNHBpmb40kvsPEXl2FuraRu6zbqduykvOoTNsjxrpI/hGrmUcdYZIfVtkkpDaSObmFkjs3EiUFScoOkZlukuH+BpMhysIAgmZmJVFS0jyIwXFBGQKEYhERO6K72hakZmdhvE7oSmwZPieNjH+GCWevb1M7XPjk8loyyseQXf4vMTX4yv7CJ/9BLU6GPml3x1ITjKGUkX5Hb1ppvov0ErsdjkTKiiZRxJjkTBCkTGknJNknNchR88lgLb7uG//BU7H0hVjuLPQacCewyDOPAbtLNAT4EFhiG8UIsZFEohgKRE7orAl42Rkzonm1rHFnbwLdaTNJj2NiXSJq0na5Hzu5VtO187YGkmmRGbEomb90YklaNwPd+BnLdSOpaHOVeSC6fkEuNyCUk27tO+gMhUse0kDJeMnWCRkp2ndOCzzJJzbZIHGWj9T3ygqIbYtVQeAK4D3iqqwS6rnuAPwFvxEiGHinTBD9Jj+fB6iCj7Oh9PbEMJd3c3Mwtt9zCe++9R1xcHLNnz+bOO++MmuyKgUFPE7oXREzojszMpKLZ7DHPvuD42q+nxlNAjfU5Ndo3VCcWEQo0taWJK04k5d08slYdhferXOSWXFp25VIbzmWTlsdaexy2s6tsGwkpLaSMM0nLg5zxNqnZtaRkW6RmO141cWnSHY9X7C9itb3ku7qu5/WQ7GrgRWBOLGToDXcn+Vnj93B3kp9FdS1RyzeWoaT/+Mc/EggEWLVqFUIIysvL9xRAMSipjlihGzmhOzlWE7pSEqrfRl39GmrNzx33y5TtVI6uIJjagl2TirU1F89XucR/fALer3LRinJpKc+joWkCZebIdtkJYZM8MkRKjmRMjmRqVjMp2Y2kZFvkzkjGSqzEnzBwXE8VDv0yJ6DrehZwLnAiPRgBXdevAK4AMAyDzMzMPdIEg8G2/9+cEmCdr+f+Ygj41O9BCsHTiX6+8vUc8ml62OLWXhiLWIWSbmxs5IUXXmDt2rXuQhMYObL9h9iKpmlomkZSUlKnZTZQ8Hq9w1Y+E8laIXlTs3lTSD4WEltAqoQTpeAUU+MUWyMHP/ihqwrapYzBIGzfjti+nfDOTVSGv6DCu56KlO2Uj6qiLDmJulAO1j9ZAL4AACAASURBVNZcrK25sPEwtHW52Nsm0FKeS7g5uf1z/DZp2Tbph2nk5ErSckzScyA9R5KeI0nNAo8vcpJ2t3rxer2YZvvx/YHEQK+HEDsZ+2ti+G7gV4ZhWLqud5vQMIyHgYfdQ1lRsad/rGn2vStc7NldWaV7nG/Zfc6nJ6IZSrqoqIj09HQWL17M+++/T2JiIgsXLuTwww/v9Lm2bdPQ0EBnZTZQyMzMHFbydbtCt8XqdEK306ebphO2oKSEtPp6mgsKnNWtFdupiSuiMq2c4vQMdqTnUu7LpaEpF2vrUdjbFmAV5WJvy0GG2q86DSSZji/8NJuUk01SsmvbJlxTsy0SMu0uh2psoLq26/cebr9zLNgXGceNG9fltf4yAocBz7kGIBM4Q9d10zCMf+5rxr1pqZdpgqNGJyHdGi2FoNYDD1REd24AohtK2rIstm7dyoEHHshvf/tbPv30Uy655BJWr15NcnJyD7kp+oPuJnS/HQxzfIu554SulO3CFnhKS/f4f3NlHUXjc9g+LocdablU+nKpDZ9CsCoHa1suduk4sNv3iONGNZGWbZF+gIfUU0xSsmraKflAihqqGY70ixEwDGNC6/91XX8CeDUaBqC33J3kp2N1t93z0ZwbiHYo6ezsbLxeL+eccw7gDDNlZGSwZcsWDjnkkKjJrdh7ejOhe0JVLdOLtuHtQsFrpTsIhhKpIpdKkUvJiFx2pk+jIu4M6ppzCDZnY4VGwGacPwBvGH9WJUnjG0g9pomMrGJGjksiLUs63jXjLLzRjz2mGALEykX0WWAukKnrejHwO3DcBAzDeCgWz+wLn/i9hDr0a0NCsNbvBaJjBGIRSjojI4Ojjz6ad999l+OPP57NmzdTUVHR5jmkiB7xS5eSfMcdeEpLGTVuHPU33NAWx6Uj7SZ0Ax52eJ0W+JTKan74cQGnfPARx614j+TCIjwlJVDfSB1jqXJ94atEHlUJZ7IrMZ8qmUWdNgarNeqkxBkPamrEk7uVQP4uMo4rJiXbIjMrjlFj05gx/QAsX73rOhnpchm9Bo1i6CLkAAoU1QtkaWnpHifr6+sH1HBIQUEBJ554Ivn5+cTFOc2vvoSSnjZtGtnZ2W3v1BpKGmDr1q1cf/31VFdX4/V6+dWvfsWJJ564Rz6FhYVs2LCBpKQkjjnmmCi+XXQZiGOx8UuXkrpwIWWpKVzw3BP87bxf8mn11Wwecw6XvrwdWVrKf80W3klL5u2cLD7Jy8H2aKTW1nHym29z6msrOOL1AuJ2+qkml8q4aVTGTaVa5FEdHkdtMB3bat/+0kZUoOVuxZOzDU/uVuKya0jNlozIimPM2HRGpeSSZk3G18GvHgZmGXZkoMs40OWDqMwJdDqjo1YMx4BYhZIGJ5z0Cy+odXWxJPmOO9CCQW777d14nj2B/6v9DK9lYZUEuHrLG6w8/ATsugxSCuGgh0r4+WcFjNoqEDVp1ATn8XXjxXwdmWGLxJ9RjTd3OzJ3BXF5m/DkbsWTu5WE7AZGjItnRCCHdHOqG+74dPyyfVAzorsMQKFoQxkBhQLwbN+Of/Vq/KtXs/jhMWx58u94bzmOGRYIS9Dqr5N5+UVcWBs5vzMZ229Tl9VEYEYFSbmfk5hbQDjvC+SEr53WfXYxcd7Etp2q0sNTSTNPJS08hTiZ4YzaqJEbRT+hjIBiWFK3rZKNK7ezpcBkZ1kS1Y1jaam/FM/Oa0hc6qGrWZyD52/Dk7sNmbceM+8zghPfp2XclwjNGVb12knuHrTTSDdPIC08hbSqacTbI/eIa69QDASUEVAMWRrrBOu3edmyzc+OTZLaAovQNj/e4njido0DDmpLK0daaBNMfMeHwbeNbaVZjHvPBxK8od3Ku/oxx7HNY8c5m5iYU0mvn9e2kYkT114pe8XgQRkBxaCmqVpQuM3Hpm0+Srd6qS7y0rLFg2ezRqCivVuuOdbEHtMAs0oIZDUzeqqH3FkpTMyrxsz4lArfZ1T4P+ch/7ms0c4nsDOeObf5mP64D2E5xuDEqsdIC08lyRqPhopkphj8KCOgGNBICU2VGiVFHjZv81Gy1UdVkZfmQg/aFg1fdXtF3zTeJpzVhH/mTpK8hYwLFzAxtYIDpnhIP/JggjOnUZWwgQr/51T4Pmen/zM2eAvdhwlSzUmUyOOwiKNpjGTl/SE+/m2YObf5yF+tkdN8Wj+UgkIRO5QRUPQ7UkLjLo1dRV62bPNRvNVLVZGXYKEHtnjw1u0eXrE1SX2upHmihefsRtKSdjKu6UumbnqDmYXvkLejEDvzIFoOO4bmY46i4ujJlCebfOX/nHLfi1T71rVtaJJgjSEzNItJTRcwMjSTEeGD8csUzgWgjg9Sfk1B4jNMTfo+R924qF/KRqGINcoIKPYL0ob6HRpVRV62bvWyfZuPiiIvzVu9yE1j0ZoiFL1HUjdB0jTRRhwVInGCychckwn+bRxc+C4zlr9KyqpVaA0NAISnT6flmGOo+ulPWHu0l/LUAir8n1Hhe5KwVg+Az04iM3wIMxquJDM8k8zQTBLtsd3KfEjDtTQmbOGQhmtjVzAKRT+jjADQUKax+u4kSj/xc+nyfV8wEqv9BLZv384Pf/jDtrR1dXU0NDTw9dftvNL7DduC+lIP1YUeSrd62bbVR/lWL02FHuxCD1rzbkVv+SS1+ZL6yTZirkVCnkVmnklObpjJY0wmYjFy0yb8q1YRWLGawPvvo9XUAGDm51Nz4XcomTeO0sNsylM3UOF/hSaPE4hPSC8Z4enkB88lMzSTkeFDSTUnOhuR94EEezQXmP+hwh7Yi4gUin1hWBuBVuX/1ZJEpAQrFB2vjljtJzB+/HjefPPNtnQ333wzlrV/N8C2Tagt9lBd6KW8yNOm6BsKPdhFHkR4dxmacZLaiTa1U2zs00PETzAZkWeRnRtmymiTidLmwIwMqioqAfAUF+NfsYrA6tUEVq/GU1YGQEvuWLZeeRjFp6Wz86Agu1ILqPUaIBy3zBRzAmNajm5r4WeEZ+BFBcpRDH4yT72Q0OyDabj2MohRqOshZwTeujmFXet83aaxQo4ia9zlenfI3YrrH+d1HfN81PQwJ99a16MMsdpPIJJQKMRLL73EP/7xjx7l6StWCGq2eagp8lJZ5KVkq5cyV9Gb2zwIc3d5hRMkNZNsag+ShM4JET/BIiPPJDvXZOJIR9HnmPaeofAt0HbtwvvmW6S+/jqB1avxbt2KBCrnpLN+4QRKjp/Ezik1VCZtwhZvARBnjSAzPIv84NlkhmeRGTqEgEzvmLtCMSTwf70B38ZCEpcswz7/LLTrLsce3fkeInvLkDMCvaFyg4+WekEXoTSiSjT3E4hk+fLljBkzhoMOOqiTu8ACntJszq/TeOPXKXsMdZnNUL3VS02Rl6pCD2Vbvezc6qW+0Eu4WEPYu8smlCypmWxTc5hN04IQcRMs0vMsxuWFmTzCZJJlM8G0Se4YhqrDHt+iuprAhx/id1v6vg0baBwJJScksH3RWHYcMYmynDJafNVANV47nhHhQzig8VJGhmaSGZ5FopWl/PAVQxMp0UrL8G3Ygtf9AxAh50PSnn2J0c+/SuOCs2i49rKoGYMhZwR601Jv2KXx/t1JfLkkEWm3Hwb63guVUZUnmvsJRLJkyRIuuOCCLu+tkwLPKxofrkkmY0ci0hK8ujCVnUVeaou8mKVaux5Qc5qj6GuPsambaOKbYJGWbzIu12RCmsVEyybftBlly/YquLlr+UVjI/41awi44Rjkli8pmwUlx3op+Ws6Ow5JpT69FmhCyELSzGnkhuaR2egM66SZU9CGXhVVDHekRNuxC9+GzXgLtuDduAWf+69W39iWzMrMaHebsGywWkh8+kV8G7ZQ+WLXjce+MCy/sKRRNqcuquPoaxu6NAbRINr7CbSyc+dOPvjgA+65554u79dKNEZ+5gzdSMt5r4+Xx1M7SVJzok3tJBPyLVLzLMbmmkxIsTjWtMk3LcZbsn3FCHb6iD1pbsb/6afO0M6Hq6gNfca2Qy1Kj9QovS6OiokCqUnAJMmMIzN8FLPNY4ivmUJG+MBOI2QqFIMWKdF2lu9W9hs249tQ6Cj7uoa2ZNaIdMwp+QS/+x3CkydgTp2IOTUfOyOdcVmzd2fn94MmaFwwz5kjiBLD0gi00tEYlKztaZfh3hOL/QRaMQyDk046iYyMjE7udpF7Dnalb6pitum06CdYNomRwzeN9B3TxPfFF/hXr6Jl89tUBL6gdFaY0vmwc5HAdHdED1gpZIZncbDbws8MzyTedia5MjMzqQgp7xvFIEZKtLKK9sq+tWUfqewz0jCnTiR47rcJT8nHnJKPOXUi9oju57Sk3weahv0DnfKrLsYeFd0J4mFtBFppNQbRoqCggHvvvZf8/HzmzZsH9G0/gRtuuIHs7Oy24Z7I/QTAMQK33XZb9xmJPXYX5NqGUN9epCO2jXfdOuSnb1JV+xblievYOTNE6XXQNMpJ4rG8ZLQcyGRrNiOrDyUzNJNkK1eN4ysGP66y927Y4ij8DRHDOLX1bcms9FRH2Z9zOuGpEx1lPyUfO7ObRlsXhGZMIXTYITRcexkZ06dhx2DPg1jtLPYYcCawyzCMAzu5fhHwK/ewAfixYRhfxEKW/iCW+wkArFq1qsd8GsZJyg+1GPuBxPTLdkHQeo2UyMJ11G9ZSmVwJeUZm9kxK0R1q/OSDek148jicEbUHMHI8CzSw9PQ6N47S6EY0EiJtqsCb8EWfBu3RAzltFf2dloq4an5BOedulvZt7bsRXQaPRXLn41KPt0Rq57AE8B9wFNdXC8EjjcMo1rX9W8DDwNHxEiWYYntg6IzLDafFyLvnTD5q3uek7CxaKhcTXXpy1Swhl1jt1N+hIl9rHM9qSKekTUHM7H4OEbEHU9m+GB8MinGb6JQxAgp0cor8RZsQSspI/WzL3cr+5rdIwNtyv6sUzGn5jtDOVMnOi37KCn7/iQmRsAwjHd1Xc/r5vr7EYcfAtmxkGM4k85WTuR24lObGHP/TtJDM6Biedt1iaRJ20FlcCXV1W9QHvgvu3J2ERon4SDw1wnGbBnBzLUHk5F2KhlpJ5Mgx+7ewnYfR5YUiv2GlGgVVXgLHAUf6ZGj1dS2JdPSUghPySd45imYUyYQnuJO0I4cMSSUfVcMhDmBHwH/7uqirutXAFeAMxae2cmquWCwt+4rwxOP5SHbcziNI79gR/N77Kx7k52J62hIawJAy4LRX3o48N1cxmhHMSb3PNInfhsxPfahkr1eb6e/6UBhoMsHSsY2pITySsS6De7fRsQ3GxDfbERUVu9OlpaCnD4F+d3vYE6fgjxgMp6DD8DMzEAIgQ/wAfFdPqh/iFUZ9qsR0HX9BBwjcGxXaQzDeBhnuAhAdrbRsmmqDVi7wxIWX2qP86XncfBBRgnkrvAwqmISmf5jSc4/F3v6LBjrKH0bqIz4aGLJQN/ge6DLB8NTRq2iqm1Bla/AmaT1FmzGU727ZW+nJGFOmUj49LmYU/Kdlv2UfOzRmXu07DMzRwzpMnQ3mu+UfjMCuq4fDDwKfNswjOiu0FLsRkJqERzyNxhZP4301JPQZp9M6JiZ4HdcYu3+lVCh6BKtshqvq+TbVtIWbMZTVdOWxk5OxJwykeZvn4DpDuGEp0zsVNkr9qRfjICu6znAUuD7hmFs6A8ZhjzuGgAtDD84GhJ3CXYU/wfVZ1IMRLSqamesvmAzvo2FbYrfE9EjbVP2px3fXtmPGamU/T4QKxfRZ4G5QKau68XA73CG2TAM4yHgZmAE8ICu6wCmYRiHxUKWzoiMzBftYEwQu1DSAG+++SZ//vOfkVIipeS6667jjDPO2EMGf4MTZHP8u5BUBmZW191BhWJ/Iapq3MnZze1i5HgqqtrS2EmJmFPyaT71OMzJ+buV/dhRStnHACFlx6hfAxpZWlq6x8n6+nqSk5N7ncm4rNnOKjyhRT0YE0B1dTXffPNNu1DSNTU17UJJf/zxxyxevJirr76ak046qS2wnG3bvPXWW+1CSf/73//GMAyklMyYMYOlS5cybdo01q1bxznnnMP69ev3CEuxdfW7vLj+Rub+vYkzttVSe+edBOfPj9o7RouBPp490OWDgSmjqK5tU/beDVtIKNyG/HoDnvLdI792UiLm5AntFlSFp+Rjjxu935X9QCzDjkRhTqDTQh0I3kFRJeXmu/Ct63mEqTUyX+LTL5L49ItYo0ZgZY8Df9cLncLTp1B36y96zDuWoaSFENTXOwtW6urqGDVqVKdxiYTwcvCTAl8gg9o7bxyQBkAx+BHVte0XVBW4LftIZZ+YANOn0HLiMW0+9v2l7BV7MuSMQF8Rbk/IU1aB1tRM+MCpUc0/mqGkhRA89NBDXHrppSQkJNDY2MiTTz7Z6X3W2LFYN95IXVISwWOOidLbKIYroqbODZVQGKHsN+PZFaHsE+Ixp+TTcsLRhKfmt43bW+PGkDlyJDUDvKU9XBlyRqA3LfX2kfmc4EytkfmiHZwpmqGkTdPkvvvu4/HHH2fOnDl8/PHH/PjHP2bFihUkJiZGVW7F8ETU1u8Z9XLDZjxluxV4m7I//qh24RKscaOhj9FyFf3PkDMCvSXWyh+iH0r666+/pqysjDlz5gAwZ84cEhIS2LhxIzNnzoy6/Iqhi6it3x0IrcB1v9y4Bc/O8rY0dnyco+yPO7JtvN6cOhEra4xS9kOIYWkEIiPzxUL5Q2xCSY8dO5YdO3awadMmJk2axMaNGykvLyc3Nzfa4iuGCKKuHu+Gwj1a956du9rS2PFxmJMn0HLs4c54vRvT3soeq5T9MGBYGoFYR+aLVSjpUaNGcfvtt3PllVci3Am1xYsXd7rngGJ40absO0zStlP2cQFH2R8zZ3cgtCn5WOPHKWU/jBmWRiDWxDKU9Pz585mvPH2GLaK+YXfLvjVswqYixhbvaEsj4wKEJ+XRcvRhbT725lSl7BWdo4yAQjEAEQ2N7WPjtK6iLS1rS9Oq7O1vHUFjblbbJK01fhx4Yh/8TzE0UEZAoehHRGNT58q+ZGdbGhnwY06aQOjIQ2mKWEFr5TjKPjMzkwblfqnYS5QRGKIMspXgQx7R2NSm4CPDJXgjh3ECfsyJeYQOn0lTa2ycyflYuVmqZa+IGUPCCLTG0RFq9SHglIdlWf0txrBENAV3t+wjwia0U/Z+n6PsDzuYpu+d217Ze4fEJ6kYRAyJGhcXF0ddXR2pqan9LcqAoKamhu3btyvDuJdEBhiki008RFNw99DNxsLdMe23745t5Sj7XEKzD6bpwnN2D+MoZa8YQAyJmuj3+ykvL6e8vBzPfuo2SyyaPVXEWRkIOn+mBVRqghYhSJaSVFt2GsEpHBQ0VWr44iUJI/Ytur+UkqqqKmpra9E0rdvNJBSd4/96A76NhSQueQX7onMJnHQMWnlVu5j2nu2lbSFH2pT9oQfRtGBeW2wcKy9bKXvFgGfI1NCsrCy++uortmzZst+GQhISEmhq6twVdJtH8Fq8DxPBqc1hppqdK/e6Ug9fv5BAwkiLg85rQtseHdk8Hg+zZs1i/Pjx0clwmNEaYFB77DlGPPYcANLrwZyUR3jmDJr0s9xwCfmYeeOVslcMWvpcc3VdF4ZhDMhZxwMPPJADDzxwvz2vs9CuNnB/kp+7kgNMNG0eqQ4yuQsDUF3k4embM5mUIrn4oQoSMqK7x9dgCI87kPB+s5GE55a1O9fac5NCEDr0ICpf6nnBn0IxmNib5sti4OfRFmQoUCPg2vR43ozzcU5TmDtrgyR2YS6DVYIXvj8CJJz3dGXUDYCid4iaOuL/+ToJS5bh/+83SF/7T0L6/aCJthhTCsVQo89GwDCMHg2AruuPAWcCuwzD2KNpruu6AO4BzgCagEsMw/i0r7IMJL7yalyRkUCpR/CHmiCXNIU738EBMJth6Y8yqC3xcMGSSjLylSfPfsW2Caz6iPjnlhH/+juIlhDhAyZT+/vrCc7/NmMOOrktwKD9A53yqy6OWYwphaK/6dEI6Lr+N8MwfuT+XwCPGIbRU5PoCeA+4Kkurn8bmOz+HQE86P47KHku3seNaXGk2ZIXKpo4LNy1Upc2/Ou6NIo/CjDvgSqy54T2o6TDG8/WYhKMV4h//lW8JTux01JouvAcmi6YR/jAaW0bnEQGGMyYPg1bDakphjC96Qnkt/7HMAyp6/rEnm4wDONdXdfzuklyNvCUO7fwoa7rabqujzUMY0c39wwoyjTB+d4wo9Li+GeCn2NbTB6oDjLC7n665N0/JfPNywkc/5s6Dji7eT9JO3wRwSBxr71NwpJlBN5fixSCluOPpO7Ga2g+7XiI2zPCa6wDDCoUA4neGIEKXdcvA94HjgIqe0jfG7KASD+YYvfcHkZA1/UrgCsADMMgswu/7f3N9Z4w7wsJCX5usDRuFvF4MhK6vWfN3zQ+vM/LET+y+M7NcQgRF1MZvV7vgCmvroiJjFIiPvoM7QkD7flXEPUNyAk5mLdcj33xeWjjx5EEJPWXfFFGybjvDHT5IHYy9sYI/ABHCf8UKAD+JwrP7Wy4vNMmtGEYDwMPt6bpb2+XAq/Gg0l+nvf5QAh8UrKgvJbqHnoAW1YEePHqDCac0MxxN1dRGQ1T2gODwTsomjJquyqIf/FfJCxZhm9jIXZ8HMHvnETTBWcTOmLW7giafXjecCvDWDHQZRzo8kFUNprvlN4YgRZgJ87apweBWcDHeyXJboqBSAf2bKC0i7T9Thh4Pc7Lk4l+Pgh40aREw3EHFcDdSX4W1bV0ef+ur728fGU6I6eanP1QNZpyKY8e4TBx/1lF/JJXiPvPKoRlEZp9MDV/vongWacgk3vT3lcohi+9UUfPACuBCw3DuFfX9duBk/fxucuAn+m6/hzOhHDtQJwPKNMEf0/w8fdEPzs9Gtmmzf+rb+avSQFaWicRhWBJop9rG0KM6qQ3UL9D4/n/GYE/SXLeU5UEkgbkEotBh7dgMwlLlhH/4r/wVFRhjRpBw5UXE1xwFuakCf0tnkIxaOiNERhpGMZDuq7rvc1U1/VngblApq7rxcDvAB+AYRgPAf/CcQ/dhOMi2vdd2GOEBNb4PTyR6OffcV5MIZjbbHJ7TTMntZjclBLYY9zKpvPeQEuD4IX/GUGoQXDRSxUkj1VrAfYFUVtP/MtvkGAsw//Z10ivh+ZTjqNpwTxaTjhardpVKPaC3nw1u3RdXwDE67p+Lp1M3nbEMIwLe7guceYYBgwNAl6M9/FUop/1Pg+ptuTSxhD/0xgm39qtvD/xewl1CMoWEoK1fi/OyJmDFYaXr0qnvMDLeU9VMWq6ub9eZWhh2/hXryXBWEb8v95GNLcQnjaR2t9dR3D+t7EzM/pbQoViUNMbI/BD4DLgU5yx+8tjKtF+ZqNX48kEP88n+GjQBAeGLO6qCXJOMEx8JyM3yysa2/7f1USNlPDmjakUvhPH6X+uIX9u1/MFis7xbC8l/vlXSViyDG/xDuyUJJr0s2i64GzCBx/Q5tOvUCj2jd4YgWmGYdyn6/oo4BIgD1gfS6GiTZkm+El6PA9WBxllS0xgeZyXJxL9rA548UvJmcEwP2gMMztsdbnSt7eseSCJL/6eyFFX13PI95qi8QrDg2Az8a+/Q8Jzywis+siJ13Ps4dT/+mcET5sL8bF1qVUohiO9MQJ/AU4CbsWZIH4cZ73AoOHuJD9r/B5uTw6QY9k84070Zpk2N9Q1c2FTmMweXDx7yzcvx7FyUQoHnNPEtxbWRyXPIY2U+D7/moTnXib+5TfQ6howx4+j7hdXEjz/LKzssf0toUIxpOmNEUjQdT0ABAzDeFbX9atiLVQ0KdMEzyX6kUJgJDi+/cc1myyqbeakZjOqsbSLP/Lz2s/TyT6ihTMW1yC0KGY+xNAqqoh/8V94X3iNkes2IOMCBM84iaYL5hE6avZun36FQhFTeusi+jLwO13X44DC2IoUXe5O8tM6rasBZzWFeKAm+uEaqrZ4ePHSDFKzLOY/WoV3z2gECtMk8Pb7JCx5mbi33kOYFvbhs6j5040E552CTEnubwkVimFHj0bAMIz7gfsjTl0SM2miTJnm+PCb7iSiLQRvxPvYVdfSqU//3tJUqfH890egeSTnPV1JfIZaCxCJd2Oh49P/wmt4yiuxMjNovOx7NC2YR9rRh9M0wFdqKhRDmSHtWH13kr/XPv17SzgIL16aQcNODxcYFaTnqbDQAKK+gfhly0l4bhn+T79Eejw0n3wswQXzaD7xGPD5+ltEhUJBN0ZA1/V0wzCq96cw0aa3Pv17i23Da9ekU/qpj3P+Wk3W7PA+5zmosW38H35KwnPLiHvtLbTmFsJT8qn97TUE55+hYvIrFAOQ7noCt+u6ng5sBJYD7xuGMahWPEX69MeCf93ooeA1P+Nmt/D+PUlM/c7wDA3tKdlBvPEqCc+/gndrCXZyIsHzzqRpwVmEZx2ofPoVigFMl0bAMIyrAHRdnwycAlzhbiqzBlhqGEbx/hFxYPLhvYmsXOxBeCRlX/qxQsNM0TW3EPfGChKee5nAex8hpKTlmDnUX38VzWecgIyP728JFQpFL+jNxPBGnN7AA7que3ACvo3FiQQ67KjfobH0R+ns/MIPQiItgTVcpgGkxPflesen/5+vo9XWY2aNoeHay2jSz8LKyepvCRUKRR/p08SwYRgWzuYyw5LmGsFjJ4+iucb1YR8mTkBaZTXxS//txOn/ZiMy4Cf47RNoWnA2oWPnKJ9+hWIQM6S9g6LJrnVeXrosg5YGQfYRLez8wo+0GbrDQKZJYMUHJCx5hbg3VyLCJqGZM6hZdAPBs09DpqX0t4QKhSIK9Gaj+dMNw3hd1/VJwM+BJYZhvBt70QYOXy+N5/VfphKXJrnoxQqyDgvTsEvj04dG8vGT2pAyBp5NRSQYy0h44TU8ZRVYI9JpvGQBXB42mwAAFM1JREFUTQvOwjxgcn+Lp1AookxvegK/AF4HfgM8AvwfMCeWQg0UrDC8c1sKn/wtifFHtjDvwWqSRjnrj5NG2cz/P4tDryrn/buTKFnr72dp9x7R0Ej8K28Sv2QZgY+/QHo8tJxwNLV/nEfzSd8Cv/LpVyiGKr0xAsm6rucAlmEYH+i6Hlu/ywFCQ5nGy1elU/xRgMMua2DuTXV4OtGFSaNsTl1Ut/8F3FekxL/mMxKWLCPulTfRgs2EJ+ZSd+P/o+m7Z2CPHtnfEioUiv1Ab4zA7cAfgD+4sYM+7E3Guq6fDtwDeIBHDcO4o8P1HOBJIM1Nc4NhGP/qg+wxo/hjP/+8Ip1QveCs+6uZfk6wv0WKGlppGQkvvObE6S/ajp2YQPDc02lacDbh2Qcpn36FYpjRGyMwAUdBt24Ef0NPN7iupPfjrC8oBj7WdX2ZYRjrIpLdBBiGYTyo6/p0nC0n8/oifLSREj59PJG3f59CarbFgn9UMvKAQbU+jsxTLyQ0+2Aarr0MMt0Vui0h4pavJGHJMgIrP0TYNi1Hzab+2sto/s5JyATl069QDFd6YwS2AHfqup4KvAK8YBhGVQ/3HA5sMgxjC4C7ofzZQKQRkECri0kqUEo/Eg4K3liYytdLE5h4cjNn/l81cal7+oB2qmQHEP6vN+DbWEjikmXY3zmZVL+X+NdXov3/9u48Tor6zOP4Z64eEImC45EBokYxCaIJEY9o4okG1yy4q/vgoIn3xE3QGCWJqMEryeKViFmiOwGDGhd9PFZJxEgWxSR4BKMxRg15ea0MKHI5KoEZhun9o6qlabrHYaZrqqS+79eLF9Pdv67+glJP1a+rnt87LWz4+M68f+7pwTX9uw2JO6qIJEBXbhZ7AHjAzAYCNwPXmdnDwFR3/0OJtw0CFuc9bia4ySzf5cBcMzsX6AeMKrYhM2sEGsMs1EWw413xCswaV81bf63gmMvaOeqiSiordyg6NtjJvk4//xWcNo66iybALjvB+vWwrhVa2zb+amulorWtyPNtVKxrhdbWIs/lxrVCW/DeioL35rZXkXt/3nMAFW1BD6Oq++awDZD95K6sv/EHZE88jtqqKpLU5bq6ujqS/6blkvR8oIzlkPR8EF3GrlwieixwEjCA4EygEaggWGPg0BJvKzaxXHhY3QDMdPfrzewLwO1mNtzdO/IHuXsT0JTbRrE1fXvilXm1/OrcAVRUwAm3rWKPI1tZ1cl5Tj1Q0RbsbLM330bNzbf1eDnKfNlMDdlMhmymBmprydbW0JF7nMmQrc2Q7deX7IDtIDe2NhybqWHbpjs22V4FwGtv0PGzX7DyqIPLmLQ8Sq3TnBRJzwfKWA5Jzwc9y1hfX1/yta5MB+0DXOzuS/KfNLPOFpxvBvLnGwaz+XTPmcBogPCqoz5AHfB2FzL1WLYDFtywLQt+3J+dhrXzLz9fxfa7bln/h9zOPwtsGFLPmtOMbG0m2DmHO2VqMx/s1LOZDPTJPc5sfD0cQ6amx1/M5heBbCYDlRWsGTcmmL4SESnQlSJwF/B9M9sWOBU43d2nu/uiTt6zEBhqZrsDSwjOJMYXjHmDYO3imWb2GaAPsHxL/wDdse6dCn593gBemdeHvU/4B1++uoWavh/eA6Li/U2vji3cySalVXI2UwOVlXScaiw/55TE5BKR5OlK05cZwA1Afdg7qOHD3hC2nJ4APAy8FDzlL5jZlWY2Jhx2IXC2mT0HzAJOc/dIu/G8v6yS+8/Znp9+bhdee6yWo3/4DsdNfadLBYDWNgaeORGAbFUV2T61dJw+jmVPzObdH12UmB1t2957sabheJY9MZsNN/4gMblEJJm6ciZQ5e5/M7Pc4y51Cwuv+Z9T8NzkvJ9fBA7pYs4eeX9ZJQtu2Ja/zOpHx3qACk6+fzmD9+/iIjAbNjDg3Eup/cMfaR+0C+tGfYn3zz+LgcM+TUfC5hFXzJ0VdwQR+QjpShF4xMxuBurNbCrw24gzlU1u5//8XcHOP9uxcb69ywUgm2W7S66m74PzaJn8bdZ8/ZSI0oqI9L6uXCJ6lZkNB+YBiwjm8j8SZn9jAIufykC2+1+29r/uZvrdfi/vffNUFQAR2eqUnNoxs7vNLAPg7n9197sJFuad21vhemrMTasZ8bU1VPfJUlm95V839Jsxi/43TGdNw1jem3RuBAlFROLV2fz+ncAcM9sewMy+DNwOfK03gpVDrrnb159YxmdPDopBVaZrxaDv/zzEdpOvY+3ow2mZcrF66ojIVqlkEXD3e4HJwINmNplgLYFj3P1vvRWuXPKLwb4Na9hp77ZOx9c++jjbn38ZrV/Yj9XTfgTVWntHRLZOnU0HXUVwM9cS4ALgWeACM7uyl7KVXa4YnD639BU9NU//hQFnf4f2T+3Jqluuhz5JarIgIlJenR3i/m/4+zyCjqBbvepFr7DDqd+iY+cdWXnHT8l+rH/ckUREIlWyCLj7Y70ZJG5VzW+yw/gJZGszrJw1jY4dizeQExHZmmiyG6hcuZodGr5Jxdq1rLj352z4xKC4I4mI9IrUF4GK99cw8KvnUbX0LVbOmqbF1EUkVdJdBFrbGHjGhdT8dRGrbrmetgNGxJ1IRKRXpbcIbNjAgAmXUrtgIaunXknrqC/FnUhEpNd1qRncViebZbtJU+g7Zx4tl13A2hOPizuRiEgsUlkE+l97E/3uuI/3JpzGmsaT444jIhKb1BWBfjNm0X/qDNaMP573LpoQdxwRkVhF9p2AmY0GpgJVwHR3n1JkjBEsOJ8FnnP3wtXHeqTumAba9ts3WPVr5x3pe9+coB/QsUfQ8h+T1A9IRFIvkjMBM6siuMv4WGAY0GBmwwrGDAUmAYe4+97A+eXOkXnh7/S78wF2/sJYBn71vI39gP7zh+oHJCJCdNNBBwAvu/ur7t5G0JF0bMGYs4Fp7r4awN0jWWC+om09Fa2t1D6yADqytO86mMqWd6P4KBGRj5yoDocHAYvzHjcDBxaM2QvAzBYQTBld7u6/KdyQmTUCjQDuTl1d99bMrQDIZtnGZ9O3+U3a593dre3kq66u7nae3pD0fJD8jEnPB8pYDknPB9FljKoIFJtsL2zkXw0MBQ4HBgO/N7Ph7v5O/iB3bwKacttYsQVr+tbnf3imBiorWTNuTPAdQRnWBq6rq2NL8vS2pOeD5GdMej5QxnJIej7oWcb6+vqSr0VVBJqBIXmPBwNLi4x50t3XA6+Z2SKCorCwnEE22/nvlOxqLyLSm6IqAguBoWa2O8F6BCcBhVf+3A80ADPNrI5geujVcoZo23sv2kZ+Vjt/EZESIikC7t5uZhOAhwnm+29x9xfCBWmedvfZ4WvHmNmLwAbgO+6+spw5VsydVc7NiYhsdSqy2S1fgD1G2aVLC2eV4pP0ecSk54PkZ0x6PlDGckh6PijLdwJFb4xK3R3DIiKykYqAiEiKqQiIiKSYioCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiKqQiIiKSYioCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiKqQiIiKSYioCISIpFtbwkZjYamEqwsth0d59SYtyJwN3A/u7+dFR5RERkc5GcCZhZFTANOBYYBjSY2bAi4/oD5wFPRZFDREQ6F9V00AHAy+7+qru3AXcCY4uMuwq4BlgXUQ4REelEVNNBg4DFeY+bgQPzB5jZCGCIu//azCaW2pCZNQKNAO5OXV1dBHG7p7q6OlF5CiU9HyQ/Y9LzgTKWQ9LzQXQZoyoCxRY0/mBFezOrBH4CnPZhG3L3JqApt40kLQad9MWpk54Pkp8x6flAGcsh6fmgLAvNFxXVdFAzMCTv8WBgad7j/sBwYL6ZvQ4cBMw2s5ER5RERkSKiOhNYCAw1s92BJcBJwPjci+7eAnxwXmNm84GJujpIRKR3RXIm4O7twATgYeCl4Cl/wcyuNLMxUXymiIhsucjuE3D3OcCcgucmlxh7eFQ5RESkNN0xLCKSYioCIiIppiIgIpJiKgIiIimmIiAikmIqAiIiKaYiICKSYioCIiIppiIgIpJiKgIiIimmIiAikmIqAiIiKaYiICKSYioCIiIppiIgIpJiKgIiIikW2aIyZjYamApUAdPdfUrB6xcAZwHtwHLgDHf/v6jyiIjI5iI5EzCzKmAacCwwDGgws2EFw54FRrr7vsA9wDVRZBERkdKiOhM4AHjZ3V8FMLM7gbHAi7kB7v5o3vgngVMiyiIiIiVEVQQGAYvzHjcDB3Yy/kzgoWIvmFkj0Ajg7tTV1ZUrY49VV1cnKk+hpOeD5GdMej5QxnJIej6ILmNURaCiyHPZYgPN7BRgJHBYsdfdvQloym1jxYoVZQlYDnV1dSQpT6Gk54PkZ0x6PlDGckh6PuhZxvr6+pKvRVUEmoEheY8HA0sLB5nZKOAS4DB3b40oi4iIlBBVEVgIDDWz3YElwEnA+PwBZjYC+C9gtLu/HVEOERHpRCRXB7l7OzABeBh4KXjKXzCzK81sTDjsWmBb4G4z+7OZzY4ii4iIlBbZfQLuPgeYU/Dc5LyfR0X12SIi0jW6Y1hEJMVUBEREUkxFQEQkxVQERERSTEVARCTFVARERFJMRUBEJMVUBEREUkxFQEQkxVQERERSTEVARCTFVARERFJMRUBEJMVUBEREUkxFQEQkxVQERERSLLJFZcxsNDAVqAKmu/uUgtdrgduA/YCVwDh3fz2qPCIisrlIzgTMrAqYBhwLDAMazGxYwbAzgdXuvifwE+DqKLKIiEhpUU0HHQC87O6vunsbcCcwtmDMWODW8Od7gKPMrCKiPCIiUkRU00GDgMV5j5uBA0uNcfd2M2sBdgBW5A8ys0agMRxHfX19RJG7J2l5CiU9HyQ/Y9LzgTKWQ9LzQTQZozoTKHZEn+3GGNy9yd1HuvvI8D2J+WVmf4o7w0c530chY9LzKWM68pUpY1FRFYFmYEje48HA0lJjzKwa2A5YFVEeEREpIqrpoIXAUDPbHVgCnASMLxgzGzgVeAI4EXjE3Tc7ExARkehEcibg7u3ABOBh4KXgKX/BzK40szHhsBnADmb2MnABcFEUWSLWFHeAD5H0fJD8jEnPB8pYDknPBxFlrMhmdfAtIpJWumNYRCTFVARERFIssrYRWyszG0LQ7mIXoANocvep8aYqLrxz+2lgibt/Je48+cxse2A6MJzg0uAz3P2JeFNtysy+DZxFkO954HR3XxdzpluArwBvu/vw8LmBwF3AbsDrgLn76gTluxb4Z6ANeIXg7/GdOPKVypj32kTgWmBHd19R7P29oVRGMzuX4PvWduBBd/9uTz9LZwJbrh240N0/AxwEfLNIS4yk+BbBF/NJNBX4jbt/GvgsCctpZoOA84CR4T/CKoKr3OI2Exhd8NxFwDx3HwrMI96LLGayeb7fAsPdfV/g78Ck3g5VYCabZ8wd4B0NvNHbgYqYSUFGMzuCoNPCvu6+N3BdOT5IRWALufub7v5M+PN7BDuvQfGm2pyZDQaOIzjaThQz+xhwKMEVYrh7W5xHhp2oBvqG97Fsw+b3uvQ6d/8dm99Pk9+C5Vbg+F4NladYPnefG14xCPAkwX1DsSnxdwhBD7PvUuSm1d5WIuO/A1PcvTUc83Y5PktFoAfMbDdgBPBUzFGKuYHgf+iOuIMU8UlgOfALM3vWzKabWb+4Q+Vz9yUER1pvAG8CLe4+N95UJe3s7m9CcJAC7BRzns6cATwUd4hC4aXrS9z9ubizdGIv4Etm9pSZPWZm+5djoyoC3WRm2wL3Aue7+7tx58lnZrm5xD/FnaWEauDzwE3uPgJYQ8LuEzGzAQRH2LsD9UA/Mzsl3lQfbWZ2CcF06h1xZ8lnZtsAlwCT487yIaqBAQTT0N8BvBxNN1UEusHMaggKwB3ufl/ceYo4BBhjZq8TdHA90sx+GW+kTTQDze6eO4O6h6AoJMko4DV3X+7u64H7gINjzlTKMjP7OED4e1mmCcrJzE4l+KLz5AR2BtiDoNg/F/6bGQw8Y2a7xJpqc83Afe6edfc/Epzl1/V0o7o6aAuFlXcG8JK7/zjuPMW4+yTCL9/M7HBgorsn5ijW3d8ys8Vm9il3XwQcBbwYd64CbwAHhUeJawkyPh1vpJJyLVimhL8/EG+cTYULTH0POMzd/xF3nkLu/jx5U2hhIRgZ59VBJdwPHAnMN7O9gAwFXZe7Q3cMbyEz+yLwe4JLBnPz7Re7+5z4UpWWVwSSdono5wi+tM4ArxJcNhjLZY2lmNkVwDiCKYxngbNyX8rFmGkWcDjBEeAy4DKCnYMDnyAoXv/m7rE0YyyRbxJQS7CCIMCT7n5OHPmgeEZ3n5H3+uvEXARK/D3eDtwCfI7gctuJ7v5ITz9LRUBEJMX0nYCISIqpCIiIpJiKgIhIiqkIiIikmIqAiEiK6T4BSSUzO4zgsrtKYAPwfXd/3MxagGeAGoIWB/XAKHe/NHzf5cB8d5+ft61tCNp07BW+r8ndb6Wbwg6rRyb0RkTZyuhMQFLHzOqAK4Dj3f1wgoZra8OXn3f3I4ALCXovdcVlwGPhtr4IvNbDiNsD/9rDbYh0ic4EJI3+CfhlrudT2A322YIxf6br3S4PdvfvhdvKAr8DMLMbCW7seRc4maDZ4Ch3v9TMTgvfO5/gBqBVBK0LxgKNwNFmNp/gxq/lW/5HFOkaFQFJo3qCO74xs/HANwjuYp2YN+ZQYFF3PyDs8NjP3Q8NG8+dQ+luswMIehU1ACcQLCj+iSS1+pCtl6aDJI3eJCgEuPt/A6ewsRHXPmb2KEFhmAKsI2h5kNOHjVNHndmD4LsFCHoO7cmmferzuz++6O4dwBKCqSCRXqMzAUmjOcA9Zubu3sKm/w5y3wkAYGZtwAgzyx0wfR64pmB7j5vZye5+R9hg8BCCfkjHhK+PJFhWsQX4ePjcPsBfwp8Li8N6gpXMRCKnMwFJnXCO/QrgATN7BPgZwbrRxcauJGgb/juCxoH3FGnOdgVwWDiHvwDYI2z1u9bMfg+MB24m2OnXm9kcYMdOIr4FDDSze8L1g0UiowZyIiIppjMBEZEUUxEQEUkxFQERkRRTERARSTEVARGRFFMREBFJMRUBEZEU+3/zGBUXzgT2lwAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEkCAYAAAA1naazAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeXxVxfn/33Puno0kJAESNtkXFwRBFhVRtGrrXq/228Wldatabf3+LLVW/Wqrtu7VautSRVzqVbG2dnHHXdkEFRBQRCBASAhkz13Omd8fc+7NzeVmg3uTmzDv1yuvJOfMmfOcbT4zz8w8I6SUaDQajUaTiNHTBmg0Go0mM9ECodFoNJqkaIHQaDQaTVK0QGg0Go0mKVogNBqNRpMULRAajUajSUpaBUIIcaMQ4st0nkOTXhKfoRDiPCFEpJvO3W3n2p8QQhwthJBCiMEpyGujEOK6VNil6T6EEIuEEI90lK7XtSCEEPlCiHuEEKuEEA1CiO1CiBeEEOP2Md9HhBCLUmRmX+ZZoCxTzyWEOMIu/IanxaI0IIS4TgixsRtP+QEwCNjajefU9EJ6nUCgXuwDgOuBycB3gBzgTSFEQbpPLoRwp/scmYyUsklKWdHXzpWMvvqspZQhKeV2KaXV07ZoMhwpZUp+AA/wIFAD7LL/vhX4Mi6NAdwCVAL1wN+Aq4CIvV8A/wKWAK64Y14H3gecbZy7PyCBk9uxzwXcBWwBgsA24G/2vhvt4+N/zrP3SeBnwNP2tT1nbx9r21pv//wTGBV3vvOACDALWA402tc1JcGuucBnQDPwKTDbPucPOnnfTwU+sfPfDSwGDo3bPxJ4Dqi203wKfMfeVwA8CWwCmoC1wNWAiDv+xoRneF70eaX7Ort6LmB4kue4KO74c4AVtg0b7fchO27/IuBR4Gb7/ajs5D0eBbxg79sFvAoclHAtU4D/ArX2+7IYONy+pkSbb2zjfhxt7z8OeMe2ZzXwrYR0Hb2b0XwGdzHfQ1Ctj2ZgHeC37+N1cWlygHuBcjufT4Az4vb7gRAwLW7bj+w8D0123cneibbegbh00+1rabKfydNASeJ7bduz3s7n70AecAbqW6gDngf6xR03GfgPsMO+t0uAExLOvRG4yb4P1UAFcAfgiEtzHOp9q0aVK2/H3xM7zQH2u9SM+kYvs495JC6N076Wr+10q4CLE/IZhnr3mux8rkjMp8373ZlCqJMF1d32TTsVGGffkFpaFy6/sG/qD4HR9v/VtC4Eiu2X6w77/1/baYa2c+4RqBd8RjtpfoESh6OBocBU4Kq4l/op1Ms/0P7x2fsksNO+qSOBMYAP+AZ4A/XhTwHesl84d9yLbKFe0iPte/Iq8BW20KHcJ43AI8AE4FhgGZ0UCNvOEHCN/TKNB/4Hu3Cy91egBPYI2/5TgZPi9v8S9dIfAPzAfj7nJ35IHRTaabnOrp4LcACn2PlOta+vMO7YXah3bwRwFEqoFsTlvwhVKPzZtvOgTtzjAcB2VIXoIFThfB/qnSm200wEGoBngMNQ7/73gBmod+k2YDMt715OBwKxEjjBzucJlDDl22k6825G80kUiI7yLQf+jRKKGajCsRFbIFAVvLfs+3iEfZ8vsu/fsXHX8bD9zPJQ31MdcEUny5l234G497oWJQoH2bZ8Cryb8F43oIT0YFSFpdLOK3qNR6K+n98nPINz7fdjDPBb+/rGJAjELmCefS/PRola/Hd1OnCWncdE1LdRDfSPu5crgI+BacAk264aWgvE4/a1HY96P8+2n9uP4/JZbj+rw+18XrPvT/cIBJCNUq8LE7YvpXXhUg7cnJDmb8QVAva2OfYNvQEIE1cDSXJuB0odFwNGO+nuBd4krnacsP8R4mqbcdsl8GjCth+jPoyiuG0DUAr9o7gXWQKT49JMt7eNtf//nf0yxdcsTqDzAnGonXZ4G/tvRhVe2R3llXCfXkv4kDoSiLRc516e64hk98Q+/yUJ246y0xbY/y9C1YyNuDQd3eMbgY8StglUgRWtgCxAFb5J30/gOmBjJ57N0bYt8TXygfa2b3Xh3YzmkygQ7eX7E1TloSAuzYF2muvi8mkmrsZtb/8r8Pe4/32omm4A1cL4e0fX3sV34GZUZdAdl+YQO81Rcc8tknCf/gSY2MIe9z0s7cCmlcCvE961fySk+S/wTDt5GChR+b79/3G2vfEtv0L72T5i/38ASizHJeR1PbDC/nuunU+8gBXb70OHApGqPoiRKBfTBwnb34v+IYTIA0qBjxLSfJiYmZTyLeBO1EN8REq5MNlJhRAOVE1nDOrlbs+n+hiqNvGlEOLPQogzu+BjXpzw/0RgtZSyKs7mClSzdGL8paBenijl9u8B9u8JwBIppRmXZo/70Q6fAq8AnwshXhRCXCmEGBK3fwrwgZSyIdnBQghDCDFPCLFCCFElhKgHLkE1SbtCuq+zK+faAyFEMeqa7hJC1Ed/UK4CUC6iKMsS3qOO7vFUYEpCvnUod9doO80U4I0O3s+usCL6h5RyO6pQi15/Z9/NruY7AVgjpdwVl+ZzVI02ylTADZQn3I8f0HIvkFI2oWq6ZwAlwAWduOZ4OnoHJqJEOxR3zpW2rfH3oDz+PqEqU9ullJUJ20qi/wghioUQDwghvhBC7LavbyJ7fjMrEv4vj7MPIcQBQogFQogvhRC1qBp9v7h8JgBVUsrYCEIpZTXqOUY5DFUZWZpwv6+l5X5H81kXl09lQj5t4uxMok4goufexzQqoSr4Z6Fe0FFCCCFt6YtL40Y12Q8BjpZSbmkvTynlCiHEAShlnoOqGdwshJgupaztwKRkBWyy6xAJ262EQjG6z0iyrb18kyKlNIUQJ6I+zLnAmcBtQoizpJQvdyK/q4Ffodxvy1EF28+Bb3fWBpu0XudenCuR6L4rUS6QROLfnVbPuhP32EC5cy5Pkm984bm315uMUJJt7d1r2PPd7Gq+nTneQF3z1E7kfYT9Ox9VAFd3kHc8e/O+JdseTrIv2bb4fB9HuaivQfn9m1BekMTKZuL1JubzMlCF6lfYbKd/LyGfztxvgJmolkXi+aBzz63DE+wrX6IucFbC9pnRP6SUNahhdTMS0kxPkt+NqFbBLFTt65fxO4UQWcA/UOp4lJRyU2eMlFLWSylflFL+DKW+41G+R2z7HZ3JB9U8niiEKIqzaYBt86pO5gGqI3CqLYhREu9Pu0jFYinlLVLKo1CdXefbu5cBs4QQ2W0cfhTwXynlo1LKT+zayug20u4L+3ydXSD6YcbOZdegN6NcEF8m+WluL8MO7vFSVA2yPEm+0ZroMmCuEKKt760r715HpOrdTJbvBCFEfly+E1G13ihLUQW+N8m92JRw3F3AxahW3N+EEJ59sC2ZrTPiPQRCiENsW/flHoD6Zh6QUv5DSvkZajDDiK5kIITojyq7bpNSviKlXI1yzZXEJVsNFAshRsUdV4B6jlGW2b+HJrnfX9n7Vtn5jI7LpyghnzZJiUDYLow/A78VQpwihBgrhPgDqgMpnjuBq4QQ3xdCjBZCXIXqXIkpnBBiNqpWe66U8mPgQuAmIcR0e38uqsk/FtVMtYQQA+0fX1s2CiH+n33eiXZL4gJUCyXa9PoaGGfvL+rghX0a1aH1rBBishBiCqoWUY4au99ZHkA1Ox8UQowXQsxB+euhE6ovhJgphPiNEOJwIcRQIcSxqA631XH5G8BLQohZdrP2O3aNGFQz82ghxBwhxBghxG9RHVmpZp+us4t8g/LLniSEKBFCRAuwXwM/s+ccHGi/o6cJIf7SXmaduMf3owr3vwshjhRCDBdqLsbvhBDRCtIfUML7lBDiMCHESCHEWUKIqEh+DQwUQsyw372sfbj+VL2byfKtA54UQhxif49/RdWgo7yJGhCxUAhxuhBihBBiihDiCiHEhQBCCK9tzz+klI+ivu8C1KCWVHE/qgP8cftZH4HqB3pPSvnuPua9Fvi+EOIgIcQklBejq+K+C/WMLrS/uxl2PvH38nWUG+0JIcRUW+AWoPpNVOeoqtD9FXhYCPFDIcQo+9lcIISIVqrfsPN5Uggxzbb5KTufDknlPIh5qGFiC1A++3xUp08896Ae3r2ozqnpKNFoBhBCFKKGXd4rpfwPgJTyBVT/wTP2xz4F1TwdjrrwbXE/Z7djXy3KlfIharjl6cCZUsqoL+5RVE//B6iH9722MrJ9qMejhsu+g6pRNqCGuyVrpreVTzlq1M1MlM/yXlSHJdj3pANqUDXxl1BD9f6Kevg32/lvQ92rOtQIiFWogjnq7rvZtv0l1H0pAP7YWfs7SwqusyvnqkBVMOah3omX7O0LUEMav416P5egWqrlSTNqoaN7XGHvrwIWogqQp1C+5G12ms9QHbjFqPu9AvhfVAUF1HfzHGpETSXKfbFXpOrdTJJvI3ASakj5YtQ1RkcuRtNI1HNeiGohfIG6pm+jOu2xj8lGtR6w+zS+D1wihDhlb+1LsLUCdQ8Go57zy8DnKPfgvnI+qtxcjHpu/7XP0RX7LNQIppGoPq7HUWXjtrg0ElVGNQDvoq7hP6j3K/6buQh1T3+NqrS8gRpltSEun9NQ7/E7dj7/RrmUO0QkuPa7HSHEX4FDpJRTetSQDEEIEXVhHGwXLH2S/eU6NZpUYXtPtqBGjd3XHedMVSd1pxBClKJU8S1U7elk1CSZZB18+wVCiEtRLaGtKL/k3cDHfa3Q3F+uU6NJFXaLKgKsQfVP3IByLwW6y4ZuFQiUKJyFap57UZ3bl0opH+5mOzKJYSiXSHTC1WvYnfJCiGtRQ9aSIqXM6Q4DU0Sb16nRRBFqmGZb3CKlvKXbjOl5slBzGoajXE3LgCNkN4af6XEXk6Zt7D6Zwrb2x4+R1mj6AvGjdpJQbc8F0HQTWiA0Go1Gk5TeGM1Vo9FoNN2AFgiNRqPRJEULhEaj0WiSogVCo9FoNEnp7mGu7eL3+/+KWiFuRyAQOLCDtEOB+agZ2w5gXiAQ+Hf6rdRoNJr9g0xrQTyOWiegM1wHBAKBwKGolcIeSJdRGo1Gsz+SUS2IQCDwjt/vHx6/ze/3j0TFdCpGhbS9MBAIfIGaUZhnJ+uHXoBdo9FoUkqmtSCS8RBwRSAQmIIKcBZtKdwI/MDv929BBZ+6omfM02g0mr5JRguE3+/PQUUAfc7v968A/gIMsnd/D3g8EAgMRkWZXOD3+zP6ejQajaY3kVEupiQYwO5AIDApyb4fY/dXBAKBD/1+vxcoIi78sEaj0Wj2noyucQcCgVrga7/ffxaA3+8Xfr//EHv3JuBYe/t4VPC/yqQZaTQajabLZFQsJr/f/wxqYZUioAIV3vZN4EGUa8kF/C0QCNzk9/snAA8DOagO62sCgcCrPWG3RqPR9EUySiA0Go1GkzlktItJo9FoND1HJnVS66aMRqPRdB3RcZK9I5MEgq1bM2OuW1FREVVVVT1tRrtkuo2Zbh9kvo2Zbh9oG1PBvthXWlqaYmtao11MGo1Go0mKFgiNRqPRJEULhEaj0WiSogVCo9FoNEnJqE7qRKSUNDQ00N1zNZqamohEIt16zs4ihCA7O7unzdBoNPuKKcl+qBnXA1+RfZmbhgu94EjbgKS9IqMFoqGhAY/Hg8vl6mlTMoZgMEhVVRXFxcU9bYpGo9lLHBtMCi5pwLnBRDRB7h3N+BaG2fXnbMwRjp42L0ZGu5iklFocEvB4PNTU1LBixYqeNkWj0ewlRafV4VpjYjSp/40mcK0xKTqtrmcNSyCjBUKTHIfDwYoVK7rd9abRaPYNo9rC93wQAGG13icsiIzNnNYDZLiLSdM2UkoikYhuYWk0mYyUOL+w8L4ewvt6GNcyEyHBygPpBBHX1WllQ+M57p6zNQl9rgXhW7iQkmnTGDR4MCXTpuFbuDBleVdXV/PDH/6QI488krlz5/KTn/yEnTt3tkpzyy23MGzYMO66665W2y3L4sILL4wde84557Bx48bY/gsuuIC5c+dy/PHHc/rpp/P555+3a4tuPWg0GUqzxPNWmH6/bqRkei0lc2vJu60ZQlD/cy+V/86l4sN+yKyEDmmHoPm4zBKIPtWC8C1cSL9rrsFoUo49Z3k5/a65BoCmM87Y5/yFEFx66aXMnDkTgJtvvplbbrmFO++8E4A77riDlStX8sEHH3DFFVfg8Xi47LLLYsefddZZzJ07F8MweOyxx7jmmmsIBAIA3HPPPeTlqSW2X3nlFa6++mpeeeWVfbZZo9GkH6PCwvtGGM/rYTzvhDGawPJB8CgX9Vd6aT7GhTWwpT5+/6QBNNQObp1JLWQfZXL5ioputr5teo1A5F1/Pa7Vq9tN4162DBEKtdpmNDWRf/XVZD39dJvHhSdMoPammzq0oaCgICYOAJMnT+aJJ54A4E9/+hNfffUVCxYswO128/TTT3PFFVfw8MMPc+GFF2IYBscff3zs2ClTpvDII4+0XJ8tDgC1tbUYRp9r3Gk0fQdL4vrcxPN6GO/rYdwrTQAiZQZNfg/Nc10EZzjBl3zYakNl8r6Gtrb3FL1GIDpFgjh0uH0fsCyLJ554Ilbox7cUALxeLw8//HCbxz/22GMcd9xxrbb97//+L2+//TZSSp566qmU26zRaPYe0SjxvKtaCd43wjgqJFJAeIqD2nlemue6iYwzQLQ9lyHcJKhc03uK3V5jaWdq+CXTpuEsL99ju1lWxs7nn0+pPddddx3Z2dmcf/75XT72wQcfZP369Tz33HOttt9xxx0APP/88/z2t79lwYIFKbFVo9HsHY4tLa0EzwcRRBCsXAjOdqlWwjEurP7JW/v1Owx2rHKpn9VOKla52LXBibQyazJce/QagegMdfPmteqDALB8PurmzUvpeW666Sa+/vprHn/88S67gh577DFefPFFnn32WXw+X9I03/3ud/nlL39JdXU1hYWFqTBZo9F0BlPiWm7itVsJrjW262i4QcOPlOsoNM0J7pZC3jKheoOTHaudLYKwytXKXZQ3OMKAiWHGn9JMycQwL/64d3zXfUogoh3RubfdhmPrVszSUurmzUtJB3WU2267jU8//ZQFCxbg8Xi6dOyTTz7Jk08+SSAQoKCgILa9oaGB3bt3U1ZWBsCrr75Kfn5+qzQajSY9iFqJZ5HdSngzjGOXRDogNM1JzW98NB/nwhypCvtQg6DyUyUEFatc7FjtonKNk0izqigaLknRmAgHzAkyYGKYkolhSsaH8eb3zlGHfUogQIlEKgUhnrVr13LfffcxYsQITjnlFACGDh3Ko48+2uGx9fX1zJs3j8GDB3POOecAalb0yy+/TGNjIxdffDFNTU0YhkF+fj6PP/44oh1fpkaj2XscG+xWwuth3B9HEBGw8gXNxyjXUfNsJ3XNdovgX7YYrHKxa6MDpPouvfkWJRPCTPphIyUTwgyYGKb/6AiOToxUzS42k3ZIZxebqb7UfaLPCUQ6GTt2LOVJ+jg6Q05ODlu2bEm6r7i4mJdffnlfTNNoNO0RlrgXR2Ki4NygpjGHxxrUXeSlYpyXjdLHjjVuKp5xseM6J03VLQV4/rAIJRPDTDyzkZKJYQZMjJBbarbXH90u8UNZM3nFOy0QGo2mTyKqLbxvhXG8u42BrzRg1EqkC2oOdLP5Oz5Wixw2bMqi8lEXZlCV9A63pGhsmFHHNzNgYiTmIvLk9U4X0b6SFoHw+/1jgWfjNo0Arg8EAvek43wajUaDlDjXWqov4fUQ7mUmwoJgtoOvCnJY6clleWUeoU8c8An4CkxKJkaYfF5DrL+gcGQEh45eEyMtAhEIBNYCkwD8fr8DKAdeTMe5NBrNfkyzxPVeBBZGyHk3jK9a+fC3OLx8bhXyOblsafCSX2xSckiYwye2iEHOQGuvXUT7C93hYjoW+CoQCHzTDefSaDR9mOYawe4PDIx/muQvDVK6rQm3JQkhWEsOa5w57BjjJetQScmEMEdNrGfcEQ7qg5np4890ukMgzgGeSbbD7/dfBFwEEAgEKCoqarW/KW4+g6YFwzAwDIOioqKMjebqdDr3eJ6ZRqbbmOn2QfpslBJ2b4LylYKtKyH0ToiClQ2MrKljMs0A7DacrC3Np25aNs6TfAw6DE4eDY5YqeaK2eiNZO59zOTnnFaB8Pv9buAU4FfJ9gcCgYeAh+x/ZWJPfqYu+9nTWJaFZVlUVVVlrEBk8siMKJluY6bbB6mx0QxB1brWcwt2rXIwvLaRidRzBHXkE0EC1WVuNs7MgTMduGYJ+hvQnwagAYBdu9NjYzrZF/tKS0tTbE1r0t2COBFYHggEMic8oUaj6TGadgl2rHbFhaBwUbXeiRUWFBDiYFcts3yVDKtvxAmYPkHzbCe7TsiKhbXIrIDY+4ZRUYnz7Esx7rsZqyTzWhHpFojv0YZ7KR0UbTged3DVHttDnolUjXh1n/Ovrq7myiuvZOPGjXg8HoYPH87vf/97+vfvH0tzyy238Je//IUrr7ySX/ziF7HtlmVx8cUX88UXX+DxeCgqKuK2225j+PDhrc5x1113ceedd/LGG28wbty4fbZZo+kJpISaTY7YBLNoGIra8pYiJ6c4wsFD6jnt4DqGbm8ktzwCYRURtdmOiBo6vHVYi75Gzj2PID5YQs7dD1N7a1JHS4+SNoHw+/1ZwHHAxek6RyIh3xRcwfUIWqK3StyEfIelJP90rgcB8Nlnn7F8+fJYyA2NpjcQaYaq9S52rGrtJgrVqfATwpAUjoxQNjXEtHPqGR1uoHRDE9kfhHAsjwtrcYGP5rkuzJHtR0TtCwwaMQMRbCmncp54npwnnkd63Gzb8GEPWtaatAlEIBBoBPp3mLCT5G2/Hlew/fUgkCEgnLAxgiv4Of2/+W6bh4U9E6gd2LPrQQSDQa699lr+9Kc/cdZZZ3Voi0bTEzRWGzEh2LHKxc61Tiq+GIQ0VYHuyrIomRBh4hlN9ozjMAM9QXLeU0tuul/eM6xFcLYTmb9/rX9S8eE/yLvpHryvLMJoasbyeWk+YQ6111/V06a1om/NpBZuTEcJDnMHAolEYDqKQaTea5nq9SDuuOMOzjzzTIYOHZpyWzWariIt2P1NnIvIbhnUb28JP5Ez0GTIoXDAsfVqxvGEMAXDTYQpcS9RYS08fwnj+qolrEX9xR6Cc92EJjvA2bdbCe1hDShG5mYjmoNIrwfRHMTKzc64foheIxCdqeEDGOEKBnw1A2QQhIeqEf/Fcpak3J5UrgexdOlSVqxYwbXXXptqMzWaDgk3QdVaVysxqFzjJNRgu4gckv6jIwybGaR4YpgBE8KUTIyQ1d+yR+DUxcJaeG8P41kUUWEt3BCc6aThfA/BY12YQzNrtbSexqiqpuFH38Vz+QUE7/8rjh2ZN9Kq1whEZ7FcA2jodzbZuxfQ0O/stIhDqteD+Oijj/jqq6+YPn06ANu2beP73/8+d911F7Nnz065/Zr9l4aqPRexqf6yZREbd45FycQwB/obKZmo1jAoGhPG6U3IyA5rYTxeTf+X6nAvjSAsMIsFTSe5CM51ETzShczZf1sJHbHrEbVAWFFREbW3pHbNmlTR5wQCoL7oKlzBddQXp96fl471IC6//HIuv/zy2P+HH3448+fP16OYNHuNZcKurx2xoaTRoaX1FXGL2JRFKJkQYexJzbH+gn5DTERbdZ6gxPNhJLbCmnOzch1ZBzqov9JL81wX4YMdYGhR6Cv0SYGwXAPYOfyFlOebrvUgNJp9IdSo1jmuXN3iJqpc4yTcZC9i41QuouFHBlVfgd1f4CvoOEKpscPC86a9mM7bYYxGsLwQOtJF/eVess4aQJUnyew0TZ+gTwpEukjXehCJfPzxx3t1Dk3fJ36dYyUGTqo3OGOL2HjylIvo4P9pjAWl6z86grOzjV0pcX3esg6ze4W95GapoOm7am5CcKYTfOp8WUVOyDzXuSZFaIHQaDIQy4SKNbD2fR87VjljLYPGqhYXUb8har2C8ac1UTJB9RfkDe76IjaiSeJ+Nxxbh9mxXSIFhA91UHuNch1FJjj6/NwEzZ5ogdBoephQg2hZ8H514jrHbgyXpHhsmJHHBimZ0OIi8vbb+0VsHOWW3UoI4fkggmgGKweCs+25Cce4sIr2r7kJmj3RAqHRdBNSQv12o9Vs42TrHA+YqNY5HjndS9bQavqP6tw6x+1iSlwrTLyvqZaCa43tOhpu0PCD/SOsRabx/lo3P3ygP8GwwOMaxIKf7mTW2FDHB3YjWiA0mjRgRWDnl86YCET7C1qtczw8QskEtc5xtL8gt7RlEZuiIjdVVXsf0VjUSTxv2x3Mb4Zx7IwLa/EbH8G5LiL7QViLTOT9tW7OfbCQYFjd+2BYcO6Dhcy/tDqjREILhGa/I9U1t2CdoHKNi4q4EBSVa+PWOfYoF9HobzXHFrwvHh/Gk5v6dY4dG1taCe6PI4jw/hnWwrLAkuqnKQSNQYElwYxut0Rsv2mBlAn749PEtgksC0x7m5Qilt6UIK3WecTvTzzn1U/mEzFbC3NTyOCHD/Rnw73beuiu7YkWCM1+xd7U3KIFhGlBTblB5Wq3EoHVTqrWuKjb3PIZeQosCsaGGXNOI3ljIvQbGyZ7qAlGS2GzW0L1LifWzvYLm5w8wa5dnnYLG8KSkjUhRixt5oDlQfqXqxZHZZmT9d/KYe0kH5tGuokIgSUF5kfRwgvMxAIwWsBJFWrDiksjYwVf62NcbgeNTQVt7rdsu6VsK7+Wgjd6nClF6/1xBWtsf9y9Srx/lkzWIhqU6lcpLUTfy0yhTwpEo1HB2wU/ZfauB8myUjeTOp3hvg8//HA8Hk9s8t2vf/1rjj766JTZrlG8emJ/LgwmfoQGr/yjP+edKmOFjYxAv1rov1tQXAtFNVBcA964WJC7sqGqH1SOh6o8qOwHDV4DhAd2e2Ax6mef2DPeZWEowgkV9Xynoo4TdtRTELYICcFbRVm8fFAR/xqQy9fZdqfFZ/ZPGwghcQgwDDW/zTCk+h3bJnHY+4QAR8J+l9NAWk4ctqfKiEuj0qs8DPtvl0O2uT/eBoe93zDAISSGnb8jzq5oeod9TEt+rffn5mTR3NQQZ5/a3zp9y7W3XK9Mul/ZpKLUGq1san3/HHvY1LJ/zjTSTeIAACAASURBVM0lhCJ7ioHHlfpW5b7QJwViZc49VLg/ZmXOPcyovSVl+aY73PdDDz2kZ0+ngd2NgpeX+1i42Mfhe4iDIjsoODcSwig3MLYasM1ARD9gl0QMtjAmmjiGmDiHWjiHRijIgtEdFDbRAkfYBZYjSWHTqnCK21ZY0I/a2t0YSPI2RSh+P0TxeyHyPw0jLAgVCnad5GbtUW7qZrronys4X4T5iVHduQJO7Hv3Q6av1gZQVOSlqqqhp81oxZOX7eTcBwtpCrW4+3xui/mXVvegVXvSawTi47zrqXZ1EO4bMAlR5V4OQrI2ewE7XZ/j6GANqsLwBA6v7dlw35rU0hyGNz/3snCxjzdWeQlFBKMGJIaCb41roYesIpMBB4YpOVkFpCuZGKbwgAhGd38pQUnx6nqCC2tUWItNKqxF6EAH9T+zw1ocosJa5AK5SCCzap+atpk1NsT8S6vj+sJkxnVQQy8SiM7S4IifrSxpcGwhzxyR8vOkOtw3EIvHNHXqVObNm0e/fv1SbHXfxrLg4y/dLFzi41/LfdQ0GZTkmZx7VANnTmti/MAwdz7U9hq+l32ynZwSqxstbo1RaYe1eC0a1mI3Ti8Ej3BRf5mX5mNcWKV9v4N5f2HW2BAb7t2W0a2wXiMQnanhNxoVvDBgBgi7JiUkIUcNs6seSGlfBKQ23DfAwoULKSsrIxgMcsMNN3Dddddx3333pdLkPsvarU4WLvaxcImPrbucZHksTprUzBlTm5g1NojDgC/+6eWvfyhoN59uFwcpca6yRx29Ecb9iZqbYA4SNJ3pxn1GEVUHNSJ9mdVxqdl/6DUC0RlW5tyDTGhmS6yU90WkOtw3EFtm1OPxcO655+6V8OxPbN9t8PelPhYuzmLVFhcOQzJ7fJBfn1bH8Qc3k+VR78HGd9y8fWse2z91UzS2fRdTd9CVsBZFRdnIqqaeNlmzH9OnBGKHexmWaO3Ds0SIHe6lKTtHOsJ9NzY2EolEyMvLQ0rJSy+9xMSJE1Nmc1+hrknwn5VeFi7O4r21bqQUHDo8xM1n1XDKYU0U5ba0ALZ/6mLRLXl8866HvLIIJ929i4lnNvHAlAE0VO65cE12sZk2u41yC+8b9oS198MqrEV2XFiLY3VYC01mkjaB8Pv9+cAjwIGo3rMLAoFAWlfjPrXq1XRmn7Zw35WVlVx44YVYloVpmowePZpbbkldi6c3Ezbh7dUeFi7x8cpKL81hg2FFEa46sZ7TpzYyckDrgr16g4N3/5DHF//04SswOeaGGg79UUNswZvLV1TE0qbN92tJXJ+YqpXwehjXajusxTCDhu/bYS2m67AWmswnnS2Ie4H/BgKB7/r9fjeQlcZzdQvpCvc9bNgwXn01veLWm5ASPtnoYuFiHy8t81Fd76Ag2+TsGU2cMa2RKQeE9xieWV9h8P7duXz6TBYOl2TGlXUcfkk9nrzuGdkj6iSed+xWwhtxYS2mOqm5zg5rMUqHtdD0LtIiEH6/Pw84CjgPIBAIhIDMGr+lyTg27HDw4uIsFi7xsbHSidclOf7gZs6Y1sjs8UHcSd7W5hrB4gdzWPpINmZYMOkHjcy4sq5bOpwdG1taCe6P4sJazFFLbjbPdiILtOtI03tJVwtiBFAJPOb3+w8BlgFXBgKBVrNV/H7/RcBFAIFAgKKiolaZNDXpDrpkGIaBYRgUFRXhcrl62pykOJ3OPZ5nMipr4bkPDJ5512DxegMhJEcfKPn1WRFOm2aRl+UAe6R/POFmeP8Bg7dud9BYLZjkN/nWDRGKRrmAwjbPN80Z4nBpcK3p6LSNMSIS8WEzxr/qMf7dgFirOr3lODfWFQVY385GTvfidAqcQHbnc05Kl+3rAbSN+04m25cugXACk4ErAoHAx36//15gHvCb+ESBQOAh4CH7X5noD45E9j6SZV/Gsiwsy6KqqipjBaI9/35TSPDqpx5eWJzFotUeTEswYXCY35xex6lTmxiUb08Ka4SqxtbHWhH4/Hkf792RR902Bwcc3czsX9Uy4ED1rnTUpbCyNI810mS+YXKuZXJJdQ0DrLbdUGKXhXdRBM/rIbxvRTBqJNIFwRlOgt/30XysC3N4tNO7AXanbsZuJo+Pj6Jt3Hf2xb7S0rbn9aSCdAnEFmBLIBCIrp35PEogNPsppgXvr3OzcHEW//7ES0PQYFC+ySVz6zl9ahPjy9qvDEgJ61/x8s5tuexc72LQpBDfvncXw2Z13XMZsvsBHjUs/jogh0khk281hxloSbIsycB1EYb9J0TpayHyF0cQFkSKBI0nuAjNdRE8yoXM0X0Jmr5PWgQiEAhs9/v9m/1+/9hAILAWOBboOE6Gpk8hJaza4uSFxVm8tNRHRY2DXK/FKVOaOGNaE9NHhejMNJLNH7lZdEseW5e5KRwR4bSHqhlzUnOH/b0RYK3TYJnbwXK3g+UuVdM3TMnP765l3m213PqrPB64NJf8xfCdl5uZ/XITIzcosfpkkouXr+3Hy9/xsWSqG2kI3FKSJcEnlZhkSciK39bqb3ufJfHFpcuKS5clJT5L/fYAWnY0mUQ6RzFdATxlj2DaAOiZX/sJW3Y6+Ou7BgsWFbNumwuXQ3Lsgc2cPrWJuQc14+2kV2zHaidv35rHhje95Aw0+dYfdnPw2Y1txkWqNERMCJa5Hax0OWg0VJHb37SYHDYRGy0C/krGrIuQ3Sj5/S93c/s1uzEsML1QfaSLFT/N5uvj3ewsc9JPCE4XFt+qC9IkoFEIGg1Bo/13dNsuQ1AuhNpvb2s2ulbcGwnikmuEcRdlKSGJE6NW4hMvOBZ42xIhCXvO/tBo2idtAhEIBFYAh6Urf01msbtR8C87YupHX6oJhNNGBrnte7v5zuQmCrI7P9x09yYH792ey6oXfXjyJLOvrWXKBQ24fC15hIBVLoPlbmdMEDY7VXPEKSUTwxZnN4aZEo4wOWQy1JQIwDmrhsKdFk57kJPTVKt9Wrmw45N8pE9QApQANO9bH5gFLaIS+63+bhKCRsP+3Wp/y7aI18FuCY0CdjoNe3tLOquLQ2a9UuKz9hQXXxutnj22JWn1OJCEoINwmJreSp+aSZ1IhVHNPTnPscy9ller7trn/NK5HkRzczM33ngj7777Ll6vlylTpvCHP/xhn21OJ8EwvLlKRUx9/XMVMXXkgDDXnFzLBd/ykmvs7FJ+DVUGH/4xh0+eyMZwwOGX1jP9sno8+ZKtDsEyl1O1ENwOPnc5CNoF5CDTYnLI5LyGEFPCJgeGTHxtnGPjeCcl77TutxASQgc5Ux7zyACyJWTLqLB1bU5GUVE2VTtrku6TKJGMtWSMPUWotRjtuS0qXlWG0bLPFq1gp8UnDKV5OGWLG6210LSIS1I3m6VEqD0XnFeqe6npfvqkQESF4dnst5BYhERqRkOlcz2I3/3ud3g8Ht577z2EEFRWVqbE5lRjWbBkg5sXFvt4eZmKmFqcZ/IjO2LqQUPUJDYVg79zeQbrBUv+ksOSv2QTbhZMOKeR3GsaWT1M8JTby3K3g+0OVUR4peQgWwwmh0wmh0xK2xmFlMjo7zqxloUw4gYbWdnQeE7vqgMLwAN4JBREl2tLIRGSt36aYq0etU3k5FDZ2NDK3RYVmkYhqDNghzASWkcgu9j6US2f5H0+vgQxSnTBDRQWEY9DHWO1FiGfhJ4YB3h8UTZTQhGuqg+RmQNcFb1GIK7Pe5TVro3tpgkRZoujkh2O3YAKghblu/1/0+ZxE8LDuan2xx3akK71IBoaGnj++edZunQpwv5wiouLO7SnO1m3TUVMfXGJjy3VKmLqCYc0c+a0Jo4YG8S5Fw7uSBBWPJnNe/fmENzpIHxqiOU3Bbn/IDCFagMMj1jMDCo30eSwyfiwtU/ujObj3PS7rolWtXmHoPm43iUQ6cYJ5ErI7aD1U5TloKq+ayPJJNAMCa2eJC44gd3yab0t3i233TBiYhTdH95DfCLQv+1ZKa7EQQZWchdcK4GyBx605YKLipGX5AMPVrkdrHcZPJvt5lwrwiWGaHe4dU/RawSiM6xzbaFONHbLUJBUrgexceNGCgoKuOuuu/jggw/Izs7mmmuuYdq0aem7gE5QURONmOrj883uWMTUeafW8a24iKldoU7AcoeDpS9l0fi7LNzfONg8J8IHtzVSf5jJpLDJT+vNmCAUpfijkXmC7WvygcwfH99XEYAPVYCqKY2pfcZhaCU4nsJ8ttXUtO+Ci2v1RPftNgRb44Qn6srrCkYS95nPFt3ocOtHDIv5A3I4uyHEVfWhjBKKXiMQnanh7zB22a6lN7ESXEvP77w5pfakcj0I0zT55ptvOPDAA/nNb37D8uXLOe+883j//ffJzc3tILfUUt8s+M8K1a/w3loPlhRMGhbiprNqOGVKE8V5nQ9hYQHrnUbLyCKXg+BrLmZc66H4UwfNh5g03FvH9KOauSxsMma7pUfaaPYZF9BPQj+pVtkrkgaDQ6mJ1msBzW20emJutiStnkQXXKs8BQQRLMh2s87l4IWdjclP3gP0GoHoDCVWAbfUXsRV9We1KRSpINXrQQwePBin08lpp50GKNdVYWEhGzZs4JBDDkmp7ckIm/DOGg8LF/v4rx0xdWj/CD87oZ7TpzUyakDnPq5qQ7DcpTqRP3OGWTIwlzq7xjXqA8HsX7rJfs+FZ3iEI/60iymnNCEMQEdU0fQSDIi1BBR7V9svK82L/e2WYCBjLYhMok8JRJREoVjq/iJleadjPYjCwkJmzpzJO++8w+zZs/nqq6+oqqqKjXBKB1LCim/siKlLfeysd5CfbeGfriKmHjZiz4ip8YSBL1wGy2xBWO528LXdEWFIyUESTmsKc9BnYN2Uxdb/eMkqMpn1u90c8j+NOLTLX7Of45YSAzjXcnBJZQ0lGeRaiiKkzBij5NatW1ttqKur63YXS3usXbuWY445hhEjRuD1qgUGurIexLhx4xg8eHDsmqLrQQB88803XH311ezatQun08kvf/lLjjnmmKR5ff3113z22WeceOKJXY7F9PUOBy8u8bFwSRZf73DicbZETD16QvKIqQAV9iS0Zba7aKXLEZsIVmxaTLH7DCaHTA4Om/RrLOIf14X5PJCFK0sy7ZJ6pl7UgLsL8yHSTab3QWS6faBt3FuOL8rmMHsU04TC/vsaiyltva59sgWRLtK1HgSoNSGef/75vTWtXXbWGfxzuZcXFmex/Gs3QkhmjglxxbfqOHFSM3m+1oV2M/B5XMtguctBuT0JzSUlB4ZNvt8YionCYHsSGkBTteCjP+Wx/DEXUrqYckEDM35WT1b/bl7vuZOkeq6MRtMZXq1KXVDHdKIFoo8SjZi60I6YGrEE48vCXHd6Dace1kRpgSqwJbDZIWKzkZe7HaxyOWIjLMoiFlPCJhfa8w4mhk28Sc4XbhIsfSSbjx/IIVgnmPIDi6mXV9FvcPqW8twXKoxq/s8xn/kD/pPSuTIaTV9CC0QfwrTgg2jE1BVe6psNBuabXHSsipg6YXCERgErXA4Wut2xDuXK6CQ0SzIpbPKT+hCTwyaHhkwGduAXNcPw6TNZfHBPLvUVDkYd18xR82oZf0Q+VVWZIQ4WFlVGDVsdVaxybiSQ9SafuNcDYIrMbNloNJmAFoheyJZqB3e+5ODnrw9k/mV1FGRbLFycxd+X+NhuR0z9zqFNnD6tieJxEVZ6HTzhdrLc7WGN04jF8DkgYnJUMMKUkMmUkMnYiNXpWaXSgi9e9vLuH/LY9bWTsqlBTv3zLgZP6/5RGI2ima2OKsodVZQ7Ku3f6u+tjiq2Oqo61UI4tvgqxkSGMCY8hDGRIYyNDGF4ZBBOPfhWs5+iBaIXEh1XEAwLzrm3PxKByyE5YmIz/plBxGERPs1xcrHbw25DOYRyLcmhIZOfNStX0aEhk8K9HKCw8R03b9+ax/ZP3RSNDXPmYzsZeVwwLcstm5jsMHbHCvxtscK/RQx2OepaHWNIg4FmIWVmEZNCoznJnE6ZWUypWYRPuvm77z3+nvXuHkOgB5slrHR9yT+9HyCFujcu6WRkpDQmHGMjQxkTGayFQ7NfoAWilyMRCIdkyMN1vFVo8BZOhHQwNmJxUlM4NrJodMTa54Bn2z91seiWPL5510NeWYST7t7FxDObMPahnKwXTXG1/pba/1b7722OnUREa1dVnpUVK/Anh8dQZhZTZhbFfg8wC9stvI8MHcIv6/6HPxf/k/nGv2NCMb/6WkC1SL50lrPWuYn1zi2sdW1mhetL/uF7P5aHWzoZGSljTHgIoyODlXCEhzDcHKiFQ9Nn0ALRB5CmYESWyXdrVetgUtgkN4WjSas3OHj3D3l88U8fvgKTY26o4dAfNeBM1lsdRwSTCkd1QuFfyVbHzpj7p8ZoPZrDKR0MMvtTahYxNTSO0riCv8wsotQsIk/u62rPaq7MH81fcEnlyXvMlcmSXg4Oj+Tg8MhWxzSKZtY7t7DOuZl1zs2sdW1muXsdL2W9F0sTLxxRN9WY8FCGmQO0cGh6HX1KIOIjJKYjnkm6wn1v3ryZCy64IJa2traW+vp6Vq1a1Sm7hEsyvzr105HrKwzevzuXlU9n4XRLZlxZx+GX1OPJk0gkNaIxoeBvcf1sd1VTPqgKK6ETON/KocwsZkikhOnBCbGWQFQESqx8HN1YkEYnVXaGLOnlkPAoDgmParU9KhxrnZtZ59rMWuemPYTDI12MiJQyNjw0Jhyjw0MYbg7o1uvVaLpCnxKI+AiJ6Qh8la5w30OGDOG1116Lpbv++usxzc6NAHK4JQ9eVp2yawRorhF8+GAWyx7JwQoLcs9fT/Cad3i59Bsesjt9yx1V1ButRcktnZSaRQwyi5htHUr/xtxW7p9Ssz/Zsq2VGnovrYQj7pY0iKaYcKx3bWGtcxNL3V/w96x3Y2k80sXISBljw0MYHWtxDGGYOaAHrkSjaU2vEYjr8zysdnVc04qO31+Q7WZBtpsSUzLYbD9E9ISwyU21wQ7zTle471b2h0K8+OKLPP30020bYncGe1ySJy7dyazRXQ23LNkl6pWf39ni/tka3oX8y6GU3v5tPNV5rP3eK3x080PUjFSTA/ubeZSZxYyIlHJE8OA9fP9FVj8Mu6ejqKiIqrrMmr3a3WRLH5PCo5kUHt1KOOpt4VhntzjWOTez2L2GF+OEwyvdjJXDGJE/0G5xqD6OoWaJbnFouo20CYTf798I1AEmEAkEAt26/Gh0QZIKBzQaBgeGUzvePZXhvuN59dVXGThwIAcddFCbx5YWmlx9qsmUb6/h/oIX+b+EWcDNhNjm2Jng+onvAK6iyWgRRBFxcPD8k5l24zX4thQRnLsOxw0vcfxEOM/8CWUVqvbvo2uxpzTJyZE+Dg2P5tA2hWMTa12b+TqrIqlwRFscYyJDYi0PLRyadJDuFsScQCCQkmpkZ2r4rSMkqkBYUVdTqgNhpTLcdzzPPvss55xzTrvHW0T4p/EeV5Y8iem2MIXFhQV/iLl+Kh279zimxMynzCxmXGQoxwYnK5dPpAjXy+PY8LuR7F7vYdChIWbfVcWwWTnAsTrKajeTKBxFbhVDqF40xTrGoy2Oj9yrWZj1TuxYr3QzKlzGGHsYrhKQoQw1S2KtOo2mq/QaF1NnSbcwQOrDfUfZvn07H374Iffee2+7eWx3VLNObCYsIrFV89Y5N1NmFjMhPDxh5E8xg8z+eBKmwG3+yM2iW/LYusxN4YgIpz1UzZiTmtMyl0Gzb+RIH5PDY5gcHtNKtOtEI+ucm+2huJtY59zMh57PWZj1diyN13IzKqKEY2xsLscQhmjh0HSCdAqEBF71+/0S+EsgEHgojecCYGLIjEVITFfo3HSE+44SCAQ49thjKSws7LJdb1fe16l0O1Y7efvWPDa86SVnoMm3/rCbg89uxOhzVYW+T67MYkp4LFPCY5MKhxpRtblN4RgdGdxqRJUWDk0iaQv37ff7SwOBwFa/318CvAZcEQgE3klIcxFwEUAgEJgSCrXubN28efMeteyeJJ3hvgGOOOIIbr75ZubMmdNuXu9u+pjrP7ub9eeGkHbvezD0TrvHVH8Nr9zk4JNnDLz9YM7/M5n1Uwt3Voem7xVOp5NIJLMD4GW6jam2r4Z61oiNrBYb7d9fs0ZspFxUxtJkSS/j5DAmyOGMl8MZLw9gghzOMAYmFY5Mv4eQ+Tbui31utxvSGO67W9aD8Pv9NwL1gUDgjnaSZfx6EJnCyk3vcttnt/PhjwSW28AUgk1bn8GRpBO5ocrgwz/m8MkT2RgOmHJBPdMvq8ebn97nnokx+BPJdBu7y74a0aA6x12bYi2Oda7NbHe0DJ/2WZ5WLY5o2JFDCsZRXZXaYdappi8/5165HoTf788GjEAgUGf/fTxwUzrOtT9iIBlFDdPFJt5mEJvI5ZmBBzIoNIuy5jmUBY/BXTOUJX/JYclfsgk3Cw4+p5GZV9WRV6qjl2pa009mc1h4LIeFx7baXiMa4lxVKuzIe55PeT5rUSxNtvQxqqhU9XHEhR0pM4u0q6oPkC7P8wDgRb/fHz3H04FA4L9pOtd+Sy5hTmMrg5qPJMecTLnnLTb53qbpiW00/vYGzMpchp68iWP+VzBglB4Cqeka/WQ2U8PjmBoe12r7blHPetdm1jm3sCm3kpVyHe94VvBc1luxNFmWlzF2iyM+7EiZWYxIX4VXk2LSIhCBQGADcEg68ta0xhJhGo0K5lYtYNWLXt65PYu6zV5yZi/H8/tTCR7+Pq9aXgaFZlLWfAxlwTnkmcN72mxNLyZf5jA1NJ6pofEUZRVRtVO5R6LCsTYuVtXbnhUE4oQj2/K2uKrsobhjIoO1cGQoeuxKLyTHHMJU82pO3HYiTqeLDW96eOzWPCrXuCiZGOaEp3ZywOyBmMZ8tu/8gC2etyj3vsWW/DcByI0MZ7AtFgODM3CSOQMBNL2XeOGIZ5eoi4UaUS6rLSzyfLKHcIyJDLFbHINjw3JLzSItHD2IFoheSM1mB4vudLDqp4Mw7GFM+cMinPynXYw/pQlhu36d0sfg4LEMDh4LtVDr2EC5dxFbPG+xLvtp1uT8FYf0MjA4g7Lg0ZQ1zyHPHKE/SE1KKZC5TAuNZ1oS4YhO/Iu2ON70LOfZrDdjaXIsX6zFEZ3DMSY8lFKrv35PuwEtEH2EnyzagaO9gFNAnjmCvIYRjG+4gAhNbPd8RLnduljsvQH63UBuZBhlwTmUNc9hYGgmLpmmcbCa/Z4CmcvhoQkcHprQanu1qItzVW1qVzgSo+Nq4UgtWiD6CB2JQyJOfAwOzmFwcA7UQp3jG9sV9SbrfX/ji+zHMaSHgcHDlWAEj6FfZKT++DRpp7At4TBqVUsjLuTI696l/M3xRixNrpVlC0dLdNzpHIIHod/dvaDLAuH3+0UgEEj/5Im95P21bn74QH+CYYHHJVnw053MGpuadZLTtR4EwGuvvcbtt9+OlBIpJb/4xS846aSTUmJ3Z8g1hzG+8TzGN55HhGZ2eBazxfMm5Z63WNLv/1jC/5ETGWK7oo5hUGgWrhQs3KPRdJZCK4/poYlMD01stb3aqG01f2OtczOvepfwTJxw5A3MYnRC/8aY8BAGWoVaONphb1oQdwE/T7UhqeD9tW7OfbCQYFg98GBYcO6Dhcy/tDolIpGu9SCklFx55ZUsXLiQcePGsXr1ak477TROOOGELsd6SgVOvJQGj6I0eBRwI3WOzcoV5XmLr3wvsDZ7AYZ0MSA0LTYyKj8yRn9omh6h0MpjRmgiMxKEY6dRwzrnFsrzq1keXMM652ZeSRQOSwnH2LihuKO1cMToskAEAoEeEYfrn8tj9RZXu2k++tKNlK0falPI4Ow/9mf6qLYFYsLgMDedVduhDelcD0IIQV1dHaBWlCspKekRcUhGrjmEcY0/YlzjjzAJssO9hC1eJRhL+93MUm4mO1Jqu6LmMCh4BFDU02Zr9nP6W/2YEepHkVVEVc2Rse07jZq4obhqAuB/vYt52vF6LE0/KzvWx6Em/6nO8QFWwX4lHB0KhN/vfzQQCPzY/lsADwcCgZ+k3bK9IFEcOtq+L6RyPQghBH/+8585//zzycrKoqGhgfnz53faluzizq0+lwoceBgUOoJBoSOYym+od5TbrYtFfO17iXXZTyGkk8FyFiU5R1DWfDQFkfH71UelyWz6W/2YGerHzNCBrbZXGbtbuarWOTfzb+9H7HLUxdLEC0f8yKqSPiocnWlBjIj+EQgEpN/vH9le4nTRmRr+iCsHxdxL8Xhckud/vjOl9qRyPYhIJML999/PY489xtSpU1myZAmXXnopixYtIjt7Tz9/vyEmR19tcuKJ23C52m9VpZscs4yxjT9gbOMPMAmxw72Ucs9bbM9+h2V5v2NZ3u/IMgfaIUDmUBo8ErfM6zhjjaabKbLyKQrlMyvUsliXRFJl1LQairvOuZl/+T7kKaNlmeB8K4fR4cGx1f9Ghwd3STgqjGruyXmOZQmLf/U0nRGIKr/f/xPgA2AGkNqSNoUs+OlOzn2wkKZQi2vG57aYf2lqg4mlej2IVatWUVFRwdSpUwGYOnUqWVlZrF+/nkmTJqXU9nTiwM2g0EwGhWZS5Lmbb6o/o9y7iHLPm2z0vcz67GcQ0kFJ6LDYUNrCyMQ+WfPS9A0EgmIrn+I2hEONqFJBDtc7tyQVjmiokfgWR7GVj0BQYVTzf475zB/wHyQWIZFZUWc7IxDnokJyXwasBX6UVov2gVljQ8y/tLrVKKZUdVBHScd6EIMGDWLbtm18+eWXjBo1ivXr11NZWcmwYcNSZndPkG0NYkzj9xjT+D0swlS6l6uRUd63WJ53G8vzbsNnlsQm6ZUGj8Ij83vabI2mQ+KF44gE4aiMuqriJgG+7PuA3UZ9LF2elYXXcrPToTwjpsjMIJqdEYggsB21tvSDwKHAknQatS/MGhtiw73bHBkucQAAIABJREFU0pL32rVrue+++xgxYgSnnHIK0LX1IObNm8fgwYNjS4pG14MoKSnh1ltv5eKLL0bYS7rdddddSRcV6q0YuBgQOpwBocOZUvcrGo0Kyj2LKPe+xSbvK3yZFUBIg+LQFCUYwWPoHz4QoSOCanoRAkGJVUBJqIAjQwfHtkskO4xdrHNuYZ1rM/fnvMAOx+40BupODZ0RiCeBt4HvBQKB+/x+/63A3PSalZmMHTuW8vLyvTo2JyeHLVu2tLn/jDPO4Iwzzthb03odWdYARjedzeims7GIUOn+hHLPm5R7FvFJ3u18wu14zSJbLOZQ2nwUXtn1lfY0mkxAIBhgFTIgVMiRoYM5uWkm9+Q8x7PZb2JloGspSmcEojgQCPzZb8fu1mhSjYGTAaGpDAhNZXLdL2kyKlXrwvMWW7yv81XW8whpUBSeRFnzMQwOzqF/+GDdutD0WkqsAm6pvYir6s/iz8X/ZL7x74wUis4IxA6/33824PP7/acD6fHfaDQ2PquYUU1nMarpLCxMqlwrKLfnXazIvZMVeXfgMQtjrYuy4NF4Ld260PQ+SqwC/mj+gksqT+aenOdY6v6ip01qRWcE4gLgJ8ByYDBwYVot0mjiMHBQEp5CSXgKh9b9L83GTso9b8fmXmzIWghSqNaF3dldFJ6EgV4gSdN7iLYoMo3OCMS4QCBwv9/vLwHOA4YDmSVzmv0Gr9WfkU1nMLLpDCxMdro+tTu732Rlzj2szL0bj1lAaXC23cI4Gp9V3NNmazS9ks4IxJ3Asag1pd8GHkPNh9BoehQDB8XhQykOH8qk+p/TLKrZ6n3H7ux+m6+z/g5A/9DBsTAgxaFDMXDSaFTwN+fZzDTuI8sq6eEr0Wgyk84IRJbf7/cAnkAg8Izf778k3UZpNHuDVxYyouk0RjSdhsRip+tzJRbet/gs5z4+zb0Xt5VPafBImsVOtosPWZlzNzNqb+1p0zWajKSzw1xfAm7w+/1e4OvOZu73+x3AUqA8EAh8Z+9M7Dz3TxpAQ+WevufsYpPLV1Tsc/7pDPf9+uuvc/vttxOJRMjPz+fuu+9m6NChSe1whDfj2HIng774KW6nJOSZSNWIV/f5+voSAoOi8MEUhQ/mkPqrCIpdbPW8q5Ze9bxBs0MFBFib/QQNjnIGhmZSEppCYfggnHh72HqNJjPoUCACgcCfgD/FbTqvC/lfCawBuiX4TjJxaG97V0lXuO/du3dz1VVX8dJLLzFy5EheeOEFfvWrX/HUU091aJPETch3WEqury/jkQUc0HwKBzSfwod581iX/QxSRACD7Z6P2eJTIaAN6aZ/+ECKQ1MoCU2hODSFbKu0Z43XaHqItK0o5/f7BwPfBn4H/KKD5B3y+vV57Fi994Hpnv5u/zb3lUwIM/emngv3vXHjRoqLixk5UsVBPOaYY/jZz35GdXU1hYUdDN8UBvXFV3Vou0bRaFTwZXbAFgdAWEginLzjNeqd31DpXsYO11K+yH6C1f+/vTuPj7o6Fz/++c6+ZIUkQCCIgCiKyuZSq7hU3EoLxfZ43Vrtvdf2trXFVqh4e7vYqtR6bW1vq9dawa3q0SKWitvVn1pr64KKKLhTgaAmkIUss8/398d3MplJJiEkmcw3yfN+vXiRzPrMQPLMec45zymyuvEG49VUxualksZ8xsQOw8l+HuEnxDDUY4JQSpVrrRsH8Ni/AlYAxb08xyVYfZ7QWlNRkX2GQCgUGsDT59dgtvueOnUqdXV1vPbaa8yePZsHH3wQgNra2h4SROb+fJPK0J0kJlwCvgMH9JoGk8vl6vbvaQdPOH8CdD0QMcn2sQ+wMPFr4EIAErEodcYmdhl/Z5fxArt8L/BP/3oAXKaPceZcqs1jqU4eS7V5DEHGD3qsdn0PM0mMA2fn+HobQVyrlCoH3gUeB57XWvdpm59SahFQp7XeqJQ6qafbaa1vAW5JfWvu3r076/p4vPPp+vIJ/+cTey4FnPeAfdt9l5SUcNNNN/HjH/+YSCTCySefTGlpKS5X7wM80/AQDhyPb+eNOHb+kkjRqbSVX0wkeAIYhd1lXFFRQdd/TzvYXvE3Es7s5o0JI8r2xHPd4nVzIAdwIAdwHgBtjl3WCMOzkXrPRja6/4eXnFZr5qJ4DVXR+VRG51EZm8eY2EwcDKwVu13fw0wS48ANJL7q6vyWPw3T7P14aaXUQcBC4Disj64vAGu11j02Fkr1a7oQiAM+rDmItVrrC3p5KnPXrl1ZF7S0tFBc3OMApJveEsT3a3f1eN3+uuqqq9i6dStr1qzZ746uq1ev5p577uG+++7rsRlffX09xxxzDG+88QaBQKDb9R++9wpvPP49vvi5YwnVXIsjVkuw8S4CTX/EmdhN3DOVtvKLaC/9EqazMGcv2P2HEgYeY5wwDe7N6YRR59lIyGkthnAl/YyNzU7PY1TF5uFL9lzmzEd8Q0FiHLhBSBB5a/nXl0nqd7FGEb9LrUo6BpgA9JggtNYrgZUAqRHE5ftIDoMiWJnocRXTYMlHu2+Auro6qqqqSCaTrFq1igsuuCBncgBIOkowvdW0VlyKE0i6J9JS9X1aKpbhb3mYYONqSj/5IcV1qwiVfpG2MRcR9x7c35cseuDCR1XsKKpiR0Gb1bGzzVlLnbsjYbzMG0U3p+c7iuNTUgljPlXReZTFD5Ed38LW9muSWmudwDo4yJYGYylrb/LV7hvguuuu46WXXiIWi7FgwQKuvPLKnh/McJKsPIekqzL714vDS6h0KaHSpbhDmwg2ribQfB/BpjuIBD5NW/nFhIsXgpG3tQmjmoFBUWISRYlJTA0vBiBuhNjjfp261OR3rfcZ3g/8CQBXMkhlbHY6YVRG5+I1R06LdzH87bPENIQGXGIaLbZt28bmzZs588wz93nkqCO+h0DTPQQa78AVryXuqqa9/ELay84j6crfxJjdh/VQmBhNTFqd26nzvJxaMbWRRvdWTMMa5ZbEplEVsxLGQcFTob7S1l1r5d954IZ1iUkpdYbW+lGl1HTgMuA+rfWz+QpIDK6kayytFd+idezX8bX+H8GG1ZTU/5zi3b8kVPI52sq/Ssw/fI41He4MDIoTB1AcOoBpobMBiBlt7Ha/lp7H2OF9nPcC9/E84B5fTGV0LpUxa19GRXQOXrO0sC9CjBp9qTVcDjwKXAn8Hvg1cFQ+gxJ5YLgIF59BuPgMXJF3CDbejr/5fgLNfyLqm0PbmIsIFS8Ch+wiHmpuM8iE6KeZEP00YI0y9jo/IDT2HT6IPE29ZyObvL8EwwTToCw+o3MjX2w+pfGpth5liOGrLwmiWCk1GUhorf+ulGrLd1Aiv+LeGTSPv5q9lVfgb36AYONqynd9hxLnVbSXnUdb+YUk3RMLHeaoZWBQmpjGtOQxjG8+E4Co0cJu96vpUcaH/od5N/hHADzJMiqjc9Mrpipjc3CbRYV8CWKE6EuCuBb4GfCzVC+mf+Q3JDFUTGcx7WMupr38IjztfyXYsIaiPb+laM9vCRefQVv5RUQDx4Fh84NzRwGPWUx1dAHV0QUAmCRpdr1vzWWkVk29WvIUAIbpoCx+cHpfRlV0HsWJAzHsfgCysJ2+JIgDgSu01h0zyFfkMR5RCIZBNLiAaHABzugOAk13Emy6G3/LBmKeGVb5qfSLmI5goSMVKQYOyuIHURY/iBmcC0DEaKLeY40y6t0b+cD/IG8H7wTAmxhDVapdSGV0HhWx2bjN3MuohejQlwTxAXCdUqoUWA88oLVuyG9YolASnhpaqq6kpeIy/Hv/TLBxNWUfX0lJ3bW0lyrayr9Mwju90GGKHLxmGZMiJzMpcjIASRI0u97J2Mj3Mjt8TwBgmE7GxA5NTX5bI42iRI2MMkSWPi9zVUqNAW4GzgAeA27UWj83iLHIMtc+2p9lrgNmmrjDrxBsWIN/73oMYoSDJ9JWfhGRos+AkXujl92XFoL9Y8xHfGGjgXrPK9R7XqbOs5Hd7teIO9oB8CUqqUo1JKyMzWNs9HBc+Ic8xsFm9xiH+zLXM4F/AcqxRhCXpAJ6CFiQr8D6LWESvCVM0W8jtH7LS9u/+8A5OO/fQM6DAFi5ciUvvfQShmHgdrtZuXIlJ5xwAmC11/j2t7/Njh078Pl8XHfddcydO3dQ4h4QwyDmn0fTxHnsHfdDAo13EWy6i7E7LyburqGt/Cu0l56D6dpH11lhCz5zDDWRU6mJnApAkjiNrrfSCaPe8wrb/Y8C4DDdjInNSk+AV0XnE0zK4oXRpC8lpsOBK7XWtZkXKqX+PT8h9Z/zgwTlX2/D9UECRwiKrw/jXxuj8eYgiakDb2kw0PMgVq5cSUmJ1RvpzTff5JxzzmHz5s0YhsG1117LMcccwz333MOLL77IpZdeynPPPYdhownipKuS1srLaK34Fr6WRwk2rqG07meU1F9Pe8kS2sZcTNw3q9Bhiv3gwMXY+CzGxmdxSPtFAIQcu9Ob+Oo9G3kncDdbi6xuAYHE+IyzMuZTxkmFC17kXV8SxH3AfymlioCvABdrrW/VWr+d39CylfywHfeW3nsqeV6KQ7xzvOUIgfvNBFUn7yV6VM8vNXaok71X7XvCbiDnQQDp5ABW+Szzl//69et54YUXADj66KPxer1s2rSJ2bNtuInNcBMu+Rzhks/hCm8h2LgGf/Nags33EvXPp638Yhjz5UJHKfrJn6xgcvh0JodPByBJjAb3lnTSqPO8zIf+hwF4zPQwpuLwrBVTgeTgtz4XhdGXBPEH4FvA77TWCaXUucCt+Q2rf8wAOLp0BTeAZB4Wa/T3PIhf/OIXPPjggzQ3N/P73/8ewzBoaGjANM2ssx8mTpzIrl277JkgMsR9h9I84Tr2Vl1JoEkTbLyd8l3fxKy/iuLS82gru4CkW35hDGcO3FTEjqQidiQz+SpgHbxU79lIS+kWtvNXtgbX8GbR/wIQjE9Mz2NYx7geNuDW56Iw+pIgnFrrt5RSHd8XZMtmXz7h+/8UoXRlO46MrXzJIDT/LEDo7P3rvLov/T0PYvny5SxfvpznnnuOq6++On040HBnOstoG3sJbWP+DW/b05S33k3R7l9RtPs3hEvOoq38YqL+o2RPxQgRSI7jgPBZVBR9md27d5MgQoP7zXSPqU+8L7It8BAATtNHRfTI9FkZVdF5+JOVBX4Foi/6kiCeUkrdDFQrpW4EnshzTP0WXuih9Achsk4McxqEFw7u8ZBXXXUV27ZtY82aNTgc/cuXxx9/PC0tLbz11lscccQRAFlHjNbW1ub9MJC8MBxEik4hPkXRuOslgo13EGi6D//ePxPzHkpb+cWESr+A6eh9dYwYXpx4qYzNpTI2F1If0NoctVlnZWwp+j1J43cAFMcPSO/JqIrOpzx+CI78nYAs+qkv50H8VCk1C3gSeBvYnveo+sksMfh4a1len6O/50GYpsn777/P9OnWHoJNmzaxZ88eJk+eDMCiRYu44447WLZsGS+++CLhcDidOIarhOdA9o77ES2Vy/E3P5jaU7Gckrqf0V72L9aeCs+UQocp8iSYnMiB4YkcGLZa48cJs8ezOb3z+yPvc3wQWAuAKxmgInZkOmFUxubhS8rKuELr7Uzq+4HztdZRrfUbwBtKqYOxjh89eqgCtJOBnAdhmiYrVqygqakJp9OJz+fjpptuoqzMSmhXXnkll156Kffffz9+v58bb7yx36MTuzEdAdrLz6e97Dw8oRcJNqwm2HArwYZbiBSdkjom9cSCH5Mq8suFj3HRoxgX7TxgqdW5M73z2zpg6abO1ufxAzPOyphHWfxgOWBpiPU2grgX2KCU+qLWukkpdTrwU2DULk85+OCDqa2t3fcNc3A4HKxdu7bH66uqqrjvvvv6G9rwYBhEA8cQDRyDI/YRwaa7CTTexdjWC4i7p9A25iLaSxWmU9pZjwZW6/MaikM1TA0tAawDlna7N6V7TNV6n+L9gHV2uztZREVsTmdTwuhcvGZ+KwajXY8JQmv9J6XUR8DDSqnHsM6kPk1r3TRk0YkRK+meQEvl5bRUfBv/3g2pY1J/THHdzwmVnk1b+UXEfTMLHaYYYi7Tz/josYyPHgtYo4wW54cZG/k28nrRrzGNJAClsYNSbc+t0lRpfLq0Ph9EvZWYfoo121sLfBe4CfiuUgqt9Q+HKD4x0hkeQqVLCJUuwR3anDom9X6CTXcRCXyKtvKLCBefDoYskxyNDAxKElMoCU1hWuiLAMSMVna7X0snjO3+R3k3eC8AnmQpFdE5VEXnUxWzDliC/J2cONL1VmL6v9TfTwK/HYJYxCgX8x9Ok/8Gmqt+QLD5XgKNtzOm9mskXONpK7+Q9rLzSbpkeeRo5zaLmBA9ngnR44GOA5bez1ox9Vrxf6cPWKowD2VM6ez0Rr6SxDRpSthHvZWYnunvg6bOjXgW8Kae4wGt9Y/6+3hidDFdY2gd+w1ax3wNb+uTBBtXU1L/C4rrf2UdkzrmImK+ubKnQgAdByxNpzQ0nYNC5wAQNfay2/Made6XaSp6nW3+9bwTvBsAb6LcWpKbShgVsTm4TWlln0u+Fh5HgFO01q1KKTfwnFLqEa21HDYk+s5wEik+jUjxabgi7xFovJ1Asyawdy1R3xG0lV9EqGSxHJMquvGYJVRHFlAdWUCFr4L63XU0u95LT37XeTays+RJwDpgqTw+s3OJbXQexYkDZJRBnhKE1toEWlPfulN/+tZXXIgc4t7p7B3/U1oqv586JnUN5R99l5K6n9Jedj7t5ReScE8qdJjCpqwDlmZQFp/BDM4DIGI0pg9YqvNs5H3/n3g7aPVW8yUqrC62sfmpA5aOxGWOvs2dfT4PYn8ppZzARmA68Fut9fdz3OYSrPbhaK3nRaPRrOt37NiB37///yiOT+op/8aVNN50LcmqwZugyme775tvvpm7776bbdu2sXr1ahYuXNhjHB9++CGbN2/m/PPPz/95EP3kcrmIx+P5ewLTxGh+GueumzD2rLcuGruIxIT/wCw7uU/lp7zHOEB2jw9GVoxJEuwxtrDL+Ae1xj/4yPEPGo33AHCYLirNI6g2j7X+JI+hhMEZZQzkPfR4PJDH8yDyliA6KKXKgAeBS1Mb7noyaAcGlay8luCdf6LtwrPZe+3K/b5/TxobG9m6dWtWu++mpqasdt8vvfQSN9xwA5deeimf+cxnspr47d27t8d236+99hqlpaUsX76cr33ta70miCE9MKifhvKQFmeslkDjHQSa/ogz0UDMMz3V0uOLmM4iW8TYH3aPD0Z+jGFHQ3oTX336gKUQAP7EuM49GekDlva/3DmsDwwaqNQmu6exTqLrLUH0quSH1+Pe8k6vt/H84xWMjIRXdMcDFN3xAKZhED2258N3YofOYO9Vl+8zhny2+96frq3Ojz7CecMNVF19NeHvf5/Q0qV9vu9IlHBPpKVqZeqY1PVWS49P/pOS+mtpL/0S7eUXEZdjUkU/+JJjqIkspCZifWBLEqfRvTV9Vka9ZyMf+jcAnQcsVWXMZQSTw7CfWoa8JAilVCUQSyUHP3Aq8PN8PFem6NzDcX24A0dDE0bSxHQYJMeUET+gZtCfazDbfe+3eBxME2dtLaUrVgCM+iQBgMNHqOxLhMq+hDv0KsHG1QSb7qaocTWR4Am0lV9MUf31eCJb0nfp+PGNeg9j99THCxO3GDYcuBgbO5yxscOZmT5gqd5aYptKGm8H72JLkXUiQiAxIX24ktX6fBZOOpuHtjs+4V7XORzn+A2BZFUhXlKv8jWCmADcnpqHcABaa/2XgTxgXz7hA5RecQ2Bu9Ziej0QjRE66zODWmbqMJjtvlN1xH5xhEIUr1olCaKLmH8OTf457K36IYGmPxJsvIMxO79K0ghi4sSg8/ApEw9R//wCRiuGM3+ykgPCZ3BA+AwAEkRpdG9JJ406z8v802/9+nOYXiqiR6Tbnm/3Ps5O43k2Ff2KT+29ppAvI6d8rWJ6HZiTj8feF8fuBtq+/EXaz19K4O61OOsGvz6ar3bf/eWsrcVoasIsk740XSVdFbRWfJvWsd/A1/I4wYab8YY2drmVSbjoVIxkSNqQiwFz4qEiNpuK2GzgXwFod3yc2shntQzZGryNN4tuTt/nveB9HNm6zHajiBHXgL3x1uvTX++95opBf/x8tfseCAMYP3s24ZNPJrRkCZGFCzEDeThGbzgzXIRLziJcchZlO7+Ov+VhDJKYgEGMip0XYmKQ8Ewh5p1J3HsIMe8hxHwzSbgPAEO6iIr+CyTHMyX8WaaEPwtAggjPlH+THb7HMY0EJklbjiJGXILIp3y2+77pppu49dZbaWho4LLLLsPr9fL000/vcxVX0u+n9ZvfxNHcjH/9evyPP04yECB8+umEFi8mcuKJMIAS1ki0d9xP8Lc+DmYEDB/1NffgTNTjjryFK/IW7shWfC2PYKS27iQNH3HvwVbS8M0k5j2EuHcmSZf0+BH9E3E0Uet7Kt3aPGlEbTmKyPsy1/0waMtcR7rtzz/P5iuv5IJQKHsVUyKB54UX8K9bh//hh3E0NZEsKyP02c8SWryY6LHHgnNoPgnbffljyUcrCTbdRVvZheyd0P1Tm5FsxxV5J50w3OG3cEW24kzsSd8m4azISBrWqCPunTFoZSq7v4cgMfbX30tW8m7wXpJG594vh+nhoLZz92sUMeyXuYrBl5gwgcR3v0td130QTifR444jetxxNP/sZ3ifeQb/Qw/hf/BBgnffTWL8eEKf+xyhJUuIHXnkqO5l1FqxjEDyA1orl+W83nQEiPlnE/PPJpRxuSNejyuVMDpGHMHGOzHMsHW/jDJVulTlO0TKVCJLnWdjVnIAaxRR53m5QBHlJglipPJ4iCxcSGThQoz2drxPPIF/3TqCa9ZQ9PvfE58yhdDixYSWLCE+Y0ahox1ySfc44kc+SXI/P1kmXZVEXZVEgws6LzQTOKP/xB3pTBru8JacZaqYdyZx3yFSphrlFu/uXFJtxxFOB0kQw9D+lgXNQIDw4sWEFy/GaGrC/8gj+Neto+g3v6H4xhuJzZxJ6AtfILR4MYlJ0s9ovxlOEt5pJLzTCPPZzou7lam24mt9AmfzvenbJJwVxFPzGvkoUwkxELZOEKZpYppm/zaTjVCmaZJIJPZ9w57uX1ZG+7nn0n7uuTjq6qyJ7XXrKLnmGkquuYbo/Pm0L1lCeNEikpVy9sJA9K1MtTVHmcpBwjMFR92RFBlTpUwlCsbWk9TRaJRYLEYwKL3aOzQ2NvLqq6/S2trKWWedhcs1ODneuX27NV+xbh3ut97CdDiIHH88oSVLCJ95JmZGm5C+sPOwuYOtYswqU1lJwxd7B8IfZJSp/Kky1SEZZapDSbrG7uPB88dW72EP7B6jnXsx2TpBALS2tvLJJ5/0e0NafzgcDpLJ5JA9X1+ZpklDQwMffvgh5eXlnHLKKXl5Htdbb1kroR56CNf27ZheL+FTTiG0eDHhU0+FPnTYtfsPJdg/xoqKCvbUbccVecdKGuGt6TmO7NVUlemEYZWpZhLzHgRDUKay+3sI9o9REkTf5EwQAE1NTfztb3+jazvwfAkEArS3tw/Jc/WH3+9n6dKl+Y/RNHG/+qqVLNavx1lXRzIYtPZYLFlCZMEC6KGbrN1/KMH+MfYWnyNejzu81SpVpfdvvNOtTJWVNNJlqsH7sGX39xDsH6MkiL7pMUEMNbv/h4ICxJhI4Hn+easMtWEDjuZmEuXlhBctIrRkCdGjj4aMUZ68hwO33/GZCZzRbV1WU23FGfswR5mq62qq/pWp7P4egv1jtHOCsPUktbARp5PoCScQPeEEmq++2tpjsW4d/gceIHjnnSQmTCD0+c9beywOP7zQ0Y5OhpOEdzoJ73TCLOq8ONmOK/K2lTRSZSpf62M4m+9J3ybhrCTm62wxMpRlKmFfkiDE/vN6iZx2GpHTTsNoa8P3xBPWZrzbbqPof/+X+NSpGOedh3PhQhLT5RyGQrNWU80h5s/on2maOBL16R3i1ohjK8HGO7LKVHHPgamk0XXT39DNCYrCkRJTDnYfkoI9YzQaG/Fv2IB/3To8f/87hmkSnTWL0JIlhD7/eZITJxY6xCx2fA8zFSS+rmWq1Igju0wVSK+m8o6dR1O8ZkBlqnwbyf/OMgdRAHb/DwX2j7EiGiV0++1WsnjtNQAiRx9tLZtdtIjk2ML/MrH9e2ij+LqXqbamVlM1pG+TcFYR8x1iuzKVnd7HXCRB9I0kiP1g9xgz43Nu22ZNbj/0EO533sF0OoksWGAtmz3jDMwCNWQcTu+hLZkmFaUJWnY9nypTbU2tpnp3H2WqmSTck4esTGX399HOCULmIETeJQ48kNZly2j9zndwbd2a3pBXvmwZps9n7bH4whcIn3IK+Pb/0HdRIIYBnvFEihYQKeram2pb56a/8Fu4w2/ia9mQs0xlraaamSpTjSnQixG5SIIQQ8cwiB96KC2HHkrLFVfg3rgxvcfCv2EDyeJiwmecYe2xOP54GKRd4mKI7Ws1VXhr57kb3VZTZZapMjf9yQeHQsjLT6BSqga4AxgPJIFbtNY35uO5xDBlGMTmzyc2fz57f/xjvM8/j3/dOnyPPELg/vtJjB3bucdi/vysPRZieOp9NVX2pr/cq6lShzWlmhsOZZlqtMrXR7Q48D2t9StKqWJgo1LqCa31ljw9nxjOXC4iCxZYO7OvuQbf00/jf/BBAvfdR/D224lPnGi1Jl+8mPhhh43qcyxGHMMg6aoiUlRFpOjEzsvNOK7oPzuTRvgt3OE38Lf8JX2TdJmqy/4NKVMNniGZpFZKPQT8j9b6iV5uJpPU+8HuMQ5GfEZrK77HHsO/bh3eZ5/FiMeJTZ9uLZtdvJjE1KkFjzGf7B4fDH2MRrItVabqPB7WOumvMX2bhGtcKllYSaNo/Keob6+0bZlqVE9SK6WmAHOAF/L9XGK1TXeCAAAQdUlEQVRkMYuKCJ19NqGzz8bR0IDv4YfxP/QQxf/935Rcfz3RI47o3GMxYUKhwxVDwHQEifnnEvPPzbjQxBGvS2/261iKG2xfg2FG4COYgDNjNVVHmWomCXeNlKl6kdcRhFKqCHgGuFprvTbH9ZcAlwBorecNVTO+fXG5XMTj8UKH0Su7x5jX+HbuxHH//Ti0xvHKK5iGgXnCCSSVIrl0KfRxj8Wofg8Hia1jNOMQeg9XeCvm3tcx2jdjtL2BEd7WeRNHEDN4GGZwFmbgMMzg4ZjBWeAeun06A3kPPR4PDMd9EEopN/AX4DGt9Q19uIuUmPaD3WMcqvic77+P/89/ts6xeO89zNR8RugLXyB8+umYvZwlIu/hwA3HGLPLVJmb/noqU8205jk80/NSphp1JSallAH8Adjax+QgRL8kpk2j9bLLaF22DNebb3busbj0UpI+H5GFC63d2yefDF4vAP61ayletQrnrl1UVVfTcsUVhJYuLfArEUOlb2Uqa/9GukwFmJllKt/M9KqqkVymytccxKeBC4HNSqnXUpddqbXekKfnE6OdYRCfNYuWWbNoWbkSz8svW8tm//IX/OvXkywpIXzmmcQrKii67TYcIesQUFdtLaUrVgBIkhjNDIOkexwR97geVlNt6ZwYD2/OXk3lCBL3zMhKGjHvIZgjYDWVtNrIYTgOm+3GNvHF43ifey69x8LR2pr7ZhMnUvfii0McXO9s8x72YrTGaCRacUW7rqbqqUyVShoZZaqKD07DE3mz2+NGvYexe+rjfY5jWJaYhLANl4vISScROekkWLWKCdOm5fxpctbWUnnKKSQmTSJRU0O8poZE6k+8pgazrEz2X4g001lEzD+PmH9exoUmjvgnGSf8dZSpVncrUxlmDBMnBonOu+Mh6p8/1C+lV5IgxOjh85GYOBFXbW23q8yiIuJTpuDasQPPiy/iaGnJuj5ZVGQljEmTiE+e3C2RmKWlQ/UqhF0ZBkn3eCLu8USKTuq83Izjim6z5jbSZao3spKDdX8HrZXLhjTkfZEEIUaVliuuoHTFivQcBEDS76f52muz5iCM5macO3bg2rEDZ+qPa8cOnDt34nn+eRxtbVmPmywp6Z48Mr4uVMdaYQOGi7j3IOLegwiXfD59cemuywk0349BHBMPbaXnkHRVFTDQ7iRBiFGlIwl0rGJK9LCKySwtJV5aSnzWrO4PYpoYjY24du7MTh47duD64AO8zzyTlYAAkmVlnWWrLokkUVPT63JcMTK1VC4nsHettV/DhqMHkAQhRqHQ0qWEli7t/+SlYWCOGUNszBhiRxzR/XrTxNHQYCWP7duzE8k77+B76imMcDjrLokxY9LJIlFTQ3zSJIzDDsNVVkZi0iRMv5wNPdIk3eNoKz2HYNNdthw9gCQIIQafYZAcO5bk2LHEZs/ufr1p4ti9G+f27Th37rRGH6mv3Vu24Hv8cYxUV4GOXxmJioqsSfPEpEkkJk8mPmkSiUmT5ByNYaq1YhmB5Ae2HD2AJAghhp5hkKysJFlZSWzevO7XJ5M46uoY29JC6xtvZCUS9+uv43vkEYxYLOsuiXHjrNJVRvLoGIkkJk5MbxIU9pJ0jyN+5JMkbbpUWBKEEHbjcJAcPx5z1ixCBx3U/fpEAscnn1ilq+3brfLVzp24tm/H88orONevx0hkLJ80DJLjxmUt3U0nj8mTSVRXg9s9hC9QDBeSIIQYbpxOktXVRKur4eiju18fj+P8+OP0vEdH8nDu3InnxRdxrluHkUymb246HCTGj7eSRY59IIkJE+R0v1FK/tWFGGlcLusX/aRJ8KlPdb8+FsP50UfZySP1tfdvf8Px8ccYGR0WTKeTRHV1zuW7HHGEVb5yOofwBYqhIglCiNHG7bZGC5Mn574+GsW5a1f2PpBUIvE++yyBjz/OuvkEl4vExIndl++mJtGT48bJkbHDlCQIIUQ2j4fElCkkpkwh5wkt4TDO2lpcO3dS2thIaOvWdDLxPfkkzrq6rJubHg+J6moreXRdgVVTQ7KqStqY2JQkCCHE/vH5SEybRmLaNJIVFbR0XYETCuGqre2cA8nYhe5+9FGce/Zk3dz0+YhPnJgzeSQmTyY5dqwkkAKRBCGEGFx+P/Hp04lPn57zaqO9HWeOXejOHTtwb9qEs7Ex6/ZJny979VWXpbzJ8nJJIHkiCUIIMaTMQID4jBnEZ8zIeb3R2tqZPDISiXPHDjyvvIKjqSnr9slgsLOFSY59IH09glZ0JwlCCGErZlER8Zkzic+cSSTH9cbevdnJI2MjoeeFF7p14jVLSqicOLHHfSBmScnQvLBhSBKEEGJYMUtKiB92GPHDDstxpWl14s1oYVK0ezeJd9+1VmE99xyO9vasuyRLSztHH5llrI5GikVFQ/TK7EcShBBi5DAMzLIy4mVl6U68/ooKGjom0js68WYu3+1oqthbJ96eNhHW1GAGAkP9KoeMJAghxOiR2Yn3yCO7X2+aOPbsyZ4D6Shh9dSJd+zYHtu4xydNgh468frXrk23na/qoe18oeUlQSilbgMWAXVa6xwN9YUQwoYMg2RFBcmKCmJz5nS/3jRx1Nfn3IXufvPNrE68HRKVlZ3JI5VInNu3E/zDH3BErFkWV20tpStWANgqSeRrBLEG+B/gjjw9vhBCDD3DIFlVRbKqqtdOvJlLd9ONFDdtwrlhQ7dOvB0coRDFq1aN/AShtX5WKTUlH48thBC2lerEGx0/Ho46qvv1iQSOjz9m3DHHZPW76uDctWsIguy7gs5BKKUuAS4B0FpTUVFRyHDSXC6XbWLpid1jtHt8YP8Y7R4fSIz9Mm4c1NTA9u3dr6upsVWsBU0QWutbgFtS35r9Ov4xD/p9FOUQsnuMdo8P7B+j3eMDibG//MuXU7piRdaKqaTfT/Py5YT2I9bq6up8hJcmq5iEEGKIdcwzdKxiSoymVUxCCCF6F1q6lNDSpbYc4XTIS5N2pdQ9wN+Bg5VSO5VS/5qP5xFCCJE/+VrFdG4+HlcIIcTQkWOehBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5CQJQgghRE6SIIQQQuQkCUIIIUROkiCEEELkJAlCCCFETpIghBBC5JSXM6kBlFJnADcCTuBWrfWqfD2XEEKIwZeXEYRSygn8FjgTOBQ4Vyl1aD6eSwghRH7kq8R0NPCe1voDrXUUuBdYnKfnEkIIkQf5KjFNBHZkfL8TOKbrjZRSlwCXAGitqa6uzlM4+89OsfTE7jHaPT6wf4x2jw8kxsFg1/jyNYIwclxmdr1Aa32L1nq+1np+6j62+KOU2ljoGIZ7jHaPbzjEaPf4JEbbxJc3+UoQO4GajO8nAbvy9FxCCCHyIF8lppeAg5RSBwK1wL8A5+XpuYQQQuRBXkYQWus48C3gMWCrdZF+Mx/PlSe3FDqAPrB7jHaPD+wfo93jA4lxMNg2PsM0u00NCCGEELKTWgghRG6SIIQQQuSUt1Ybw5FSqga4AxgPJIFbtNY3Fjaq7lI71V8GarXWiwodT1dKqTLgVmAW1vLmr2qt/17YqDoppS4D/g0rts3AxVrrcIFjug1YBNRprWelLhsD3AdMAf4JKK11o81i/AXwOSAKvI/1XjbZJb6M6y4HfgFUaq13FyK+VBw5Y1RKXYo1bxsHHtZaryhQiFlkBJEtDnxPaz0TOBb4pk1bhHwHa/Lfrm4EHtVaHwIciY1iVUpNBL4NzE/9gDqxVtkV2hrgjC6XXQE8qbU+CHgy9X0hraF7jE8As7TWRwDvACuHOqgMa+geX8cHv4XA9qEOKIc1dIlRKXUyVqeJI7TWhwHXFyCunCRBZNBaf6S1fiX1dQvWL7aJhY0qm1JqEvBZrE/otqOUKgEWAH8A0FpHC/WJshcuwK+UcgEBbLBHR2v9LNDQ5eLFwO2pr28HlgxpUF3kilFr/Xhq1SLAP7D2PBVED+8hwC+BFeTYrDvUeojxP4BVWutI6jZ1Qx5YDyRB9EApNQWYA7xQ4FC6+hXWf/ZkoQPpwVSgHlitlHpVKXWrUipY6KA6aK1rsT6hbQc+Apq11o8XNqoejdNafwTWhxegqsDx7MtXgUcKHUQmpdTnsUqxmwodSy9mACcopV5QSj2jlDqq0AF1kASRg1KqCPgTsExrvbfQ8XRQSnXULjcWOpZeuIC5wE1a6zlAG4UvjaQppcqxPpkfCFQDQaXUBYWNavhTSv0nVon27kLH0kEpFQD+E/hhoWPZBxdQjlXWXg5opVReW2j0lSSILpRSbqzkcLfWem2h4+ni08DnlVL/xOqQe4pS6q7ChtTNTmCn1rpj5PUAVsKwi1OBbVrreq11DFgLHFfgmHryiVJqAkDqb9uUHjIppb6CNfF6vta64GWcDNOwPghsSv3MTAJeUUqNL2hU3e0E1mqtTa31i1jVgYoCxwTIKqYsqaz9B2Cr1vqGQsfTldZ6JalJQKXUScDlWmtbffrVWn+slNqhlDpYa/028BlgS6HjyrAdODb16TKEFd/LhQ2pR38GvgKsSv39UGHD6S51MNj3gRO11u2FjieT1nozGWW5VJKYX8hVTD1YB5wCPK2UmgF4AFvEKDupMyiljgf+irX0saPGf6XWekPhosotI0HYcZnrbKxJdA/wAdbSx4Itz+xKKfUT4BysksirwL91TBAWMKZ7gJOwPjl+AvwI6xeHBiZjJbYvaa1zTcIWMsaVgBfYk7rZP7TWX7dLfFrrP2Rc/08KnCB6eA/vBG4DZmMtF75ca/1UoWLMJAlCCCFETjIHIYQQIidJEEIIIXKSBCGEECInSRBCCCFykgQhhBAiJ9kHIUYUpdSJWEsHHUAC+C+t9fNKqWbgFcCN1RKiGjhVa/2D1P1+DDyttX4647ECWK1NZqTud4vW+nb6KdXl9hQbbsAUIicZQYgRQylVAfwEWKK1PgmruV0odfVmrfXJwPeweln1xY+AZ1KPdTywbYAhlgFLB/gYQgwZGUGIkeQs4K6O/lmpjryvdrnNa/S94+hxWuvvpx7LBJ4FUEr9GmtT017gfKymjqdqrX+glLoodd+nsTY/NWC1e1gMXAIsVEo9jbXprX7/X6IQQ0cShBhJqrF2waOUOg/4BtbO3sszbrMAeLu/T5DqtBnUWi9INfn7Oj13/C3H6v10LnA21uH0k+3WHkWInkiJSYwkH2ElCbTWfwQuoLPp2eFKqf+HlTRWAWGsFhEdfHSWo3ozDWsuA6weTtPJPmcgswvnFq11EqjFKi8JMazICEKMJBuAB5RSWmvdTPb/7445CACUUlFgjlKq40PSXOC6Lo/3vFLqfK313alGjp/G6i11Wur6+VjHbDYDE1KXHQ68nvq6a+KIYZ1gJ8SwICMIMWKkavo/AR5SSj0F/A7rjPFct92D1db9WawGjQ/kaIT3E+DE1JzB34BpqXbMIaXUX4HzgJuxEkK1UmoDUNlLiB8DY5RSD6TOmxbC1qRZnxBCiJxkBCGEECInSRBCCCFykgQhhBAiJ0kQQgghcpIEIYQQIidJEEIIIXKSBCGEECKn/w+acM5qbeP/ygAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOy9eZhcRbn4/6nT2+z7lslCkplkkkxCgAAJIYQ9bCoQpdm8bIpwFS5c8SIqIoIg+gO+CFfxigqKgLQYxB1UZF/Dksxk30O2WZLM3ts5p35/1Jmens6sme7Mkvo8Tz8zfbpOnfds71v11ltvCSklGo1Gozm8MYZbAI1Go9EMP9oYaDQajUYbA41Go9FoY6DRaDQatDHQaDQaDdoYaDQajYZDbAyEEHcKITYeymOONIQQW4UQtw+3HL2RKJ8Q4hUhxM8P0bEP2bEOJ4QQTwgh/pmEek4RQkghxIRkyKU5NAghJjv3bVFf5dyHSqBUIYTIA+4EzgQmA63Am8C3pJRrh0+yMcNSwBypx3KMR6WU8pSUSJQCnAbRb6SUdx6iQ96E9gJo+mEsPCDjgCnAHcAxwKeALOBlIUT+cAo2FpBS7pNStoy1Y/WEEMI7XMdOJVLKZinl/uGWQzPCkVKm5AP4gEeBZmC/8//3gY1xZQzgXqABaAN+C9wMmM7vAvgL8D7gidvnn6jWv7uXYxcCEvh0H/K9AXwv7vt3nX3OiNv2KvDDuO9nOscNAjuBx4HCuN+PAf4G1Dvn8z5wdsJxtwK3x30/w7lGtwzgmnqAB4EdQBjYDfw2oczFwAdACNjryJMfJ/8rwD7nmK8Cx/cj3yvAzxO/A98G9jh1PQFkDvS+9nF+gzoWqkcoEz5XOb9lAT9y7lMH8BGwNK7uyU75y4G/Au3A/QO8xpcAHzvXeKtTPjOhzFeA1U4d9cBzceeUKPPkXq7HE6hn/UvANqAFeAEoTih3ZdyxdgDfI+7d6KznIOq90amvA3gRuMKRd0JcmXnAS859bgCWAUcM5f3t6Zno7RmIO87XgM1ABNgE3NzDc303XTqpHrgBpaceQemoncANCfvd5NzrNuf4vwXGxf1+inNNzgRec67VauCshHruAdY4v38C/BTITShzqSN7CHgL1bCVwKK4MpXA74EmR+aXgDkJ9fiBjXH1fCaxnh6vdX8342A/wP9zLvj5wAzUi9ZCd2PwVeci/wcwzfm+jzilARQ7N+l+5/u3nDKT+jj2VOfkT+ijzF3A23HfX3fk/b7zPR31cp3tfD/NuZE3OrIeB/zbeQBE3INxJTALmI56KSPA9J6ULUoRtQGXDfCafhX1cp4CTHJkuDnu96uBKOrFmQUc6TzMRc7vFwIXObJVo16yfXQ3aDH54l/GhO9Nzv2dAZztfP/uYO5rXy/+QI+FUvhPoR74MueTjlIO/3b2X+Q8D19y7sXpzr6TnWdkB/B5p8yUAVzjq1Av4X84+ywGVgJPxpX5rnP+NzjX+pi4e14AbEG9D50yu3q5Hk+gFNczwGxgIUp5/yquzHmABXzDOdbFjnx3J9STaAz6q/d8lMvuq069XwDqiDMGqGeszTnfGcAc4HfAeiDtYN/fHp6J/p63r6AaaF9CPW/XoxThFxKe6ybnfCqB2wEb1RDo3PYNZ9usuP1uQjXYpgAnoJ61V+N+P8W5Jisc2aYBv3aOlRdX7nbgJNRzdzqwNuF6z3OO/T2gCrgApdBjShwoRRmkR51rXYUyZHtxDDlwtFPP953fl6Ket+ExBkCmczOuTdi+nO7GYCdxD62z7bckKA3gVOfB/A5K2S3t49gu4O/Ae4DRR7lTnDpzgAyU4r8FeM/5/UyU8uhshb4C3JdQxyTnIh/Vx3FWoMYvuilbVEumGThzENf1R8DLOManh9+3A/87iPoMlOK4PFG+hJcxUUGvTKjnp3Q3rAO6r728+IM91s+BV3q4tyEObHn9EviD8/9k5959e5DXeCtwfcK2xU5d+c6zHwS+1sd5bgTuHMD9eQLV2vbFbbsN2B33/XUgkLDfTY4M3rh6Eo1Bf/W+ATyVUO/9dDcGT3Bgr8mHajRdELdtwO9vL89Ef8/AJ8T14J1t/w/YnHDf/pDw7LcAf+rhfbihD3mOdq7B+LhnTdK911nmbDurj3ouROkcw/n+FPB6Qpnr6W4M7gTeSSgjiOsJAb8B3koocwMDMAapGjOoQD0UbyVsf6PzHyFEDlAOvJNQ5u3EyqSU/wYeQF2Mn0spl/V0UCGEC2WVp6Nujt2HjG+jlP1ilMXe5ux7tBAiF9UTeE9K2e6UPw64WQjR1vlBdQdBtQYQQhQLIX4ihFgrhGhyylQDRyQc+0uoFsBpUsp/9CFjIo+jWgQbhRA/FUJ8ttPPLYQoASaiuo09IoSYIoR4UgixUQjRgnoZcnuQrz8+Tvi+E9VqGdR9Heqx+uA4wAvsTLhfn8e5V3G8l/C9r2tcjLpWDybU+zdn30rU/U6jj/swSNZIKcNx3xPPvxrVO43nVUeGiiHUO4s+3l+H44ALE67FXufYses80Pe3D/p73ibQ8zWYLITIiNu2Ik4mG2UQVyZsqwdKOrc5EVQvCiE+EUK00nUNEt+ZmIxSyj2o3lrsegohlgohXhNC7HKu01OoZ7TMKTKL/t+Z44B5Cde7FdWw6bzes1AuuHgS71uPpCqaSDh/5RDLqIJKyZ+IusCVQgghHZMXV8aL6vbOBU6RUu7oq04pZVgI8RaqyxYBXpZSNggh1qKs/WkoP2knBvAD4Mkeqtvj/H0C1Vu4FdU1C6JaxIkDk2+jWktfEEJ8mHgufcj8sRBiCqrXciqqFXu3EGJBfLE+qvgz0IjqVn+COu83epCvPyKJotEVjDDg+5qEY/WGgep1HTeA+trjv/RzjTuPexPKDZXIDpRrrlPOZNDT+YsetsUzkHtwMPUmYqDeh/t6+G1vTJgBvL/9MJBnoLdrEE+0h3162mYACCEmodxIT6Lcyo0ow/NPDnxnEmUkrp75KPfZ94H/QfU+FgC/SqhnINf7X6iWfiLNzl8xgHp6rTwVbERdnBMTti/s/EdK2QzsQvnh4lnAgdyJau2fiPKtfT3+R8f6/xFlFRdLKbcPUM6XUUr/NNRF7tx2oXOcl+PKLgeqpZQbe/i0OWUWAz+RUv5RSlmDGnyc2sNxa1CKZinwMyFETw9uj0gp26SUz0sp/ws4FpgJnCylrEcpo7N62k8IUYi6PvdJKV+UUq5GuVJKeip/sAzyviaDCMo1GM9yIA/lt068V/0+G31c4zqUEa3q5TkIoXqLIXq5D33IfLCsAk5O2LYY1RDZPIR6V3Pg+5v4fTnK+G3q4VrERy/dSR/v71CQKvpsBz1fgy1Syo4hVH8cagzqZinlm1LKdfTfK+2JRUCjlPJ2KeW7Usr1KKMSz2r6f2eWo3qCO3u43g1OmVX0f996JCU9AylluxDip8D3hBB1wDrUANQMVDeskweA7zqt8fdQg2FLiLNsQoiTUQM7n5ZSviuEuBZ4RgjxipTyHSFENsp6T0ANetlCiM6uV7OUMtiHqC+j3DUWXS29l4HnUC2G+G7aHcBLQoj/h7Loraiu2UUoH2PQOc/LhRBvoF72u+jlpZdSrnLO7WXgcSHENf24tRBC/A9K0X6M8ste6si+3inyXeBR55o/hzL2p6J6J/tQ3eJrhRCbUBFXP0QpjWTT731NIluAi4QQ1agBzlbUNf0nsEwI8XWUeyAf1RgJSSkf662yAVzjbwG/EEI0AX9APSczgXOklNdJKduEEA8AdwohgsA/UArlXCnl9+NkPtFpeXYA+/q7933wfeBPQojbUJE8R6GU7wNSyp5aqwPlAeB3Qoj3UO/XItSgeTz3ou7vb4QQP0I9X5NRg58/klJu7u/9HYJ88XwfeEAIsQE1xnAa8J+oHvBQ2IB6Zm8RQjyF8jrccRD1rAOKhRBfQOmZRcCXE8o8CLwvhLgL5fefgRrDhK735n9RevQPQojvoRomE4BzgL9IKd9CjZW8L4S4B6WnquPq6Zu+BhSG8kG9AP+H6r40Az+j59DS76O6X50hiN8EWp3fC5wTfiCh7v9DvVC5dA3g9PS5qh8ZXY5sK+K25aEGu/7ZQ/mTUEqmFeVeWAM8hBMih/I1v4VSsFtRN/yfwBNxdWyl+wBtJWrg9yl6iSqJK3sdKmy0ha7Q1fMTylyOUn5hVFf9LzhRDajW0wpUy3Ud8FkSBjN7kO8Vegj3TDjm7cDWgd7XPs7vYI5VgFJWzfH3HPX83ec8JxGUK+/vqHEa6BpAXpRQ/0Cu8QWohkKHU+5j4I643wXKlbTOOXYd8Lu43491jhFkAKGlCds+D8iEbVeinsUIyp9+DwMILR1AvTc59QVRz/GVHBhaOgcVlrrfKbcR9a4XMID3dwB6ZCDPgEC5X7agjPNmeg4tvT1h2wED+agon/iQ806XahDlUj3buQanOL+fknhNnO0mcfoHFdZah9Ibf0U1Mrrde7pCS8PO8+V3ysyLK3MESlc0OOW2oYzHlLgyl8TV8y6qkdzvAHJnSOSIQQjxS2CulHLecMuiSR76vmo0g0MIcQVdc5maUn28YU1HIYQoR/nn/43qin8aNbGlpwESzShB31eNZvAIIb6Gemf2ocYrfoDqUabcEADD2zMQQpQCz6IGodJQ3bZHZB8+3bGMEGIVvYd5/kZKef2hlOdg0fdVMxCEEJejXEa9MUsOPBhk1COE+DUqiq3TvfY88B05tEHwgR9/pLmJDmeEEEeg0iH0RItUEUMazZjACf7oKzpnq5TyUCVJPOzRxkCj0Wg0YyJrqUaj0WiGiDYGGo1Go9HGQKPRaDTaGGg0Go2GEbbspd/v/yVqQYf6QCAwu5+yk1DTrfNQM4lvCwQCf029lBqNRjP2GGk9gydQ070Hwu1AIBAIHI2afv2TVAml0Wg0Y50R1TMIBAKv+f3+yfHb/H5/BfBj1IpJHcC1gUBgLSrXRo5TLBeVXEyj0Wg0B8FI6xn0xM+AGwOBwDzU6mCdPYA7gc/7/f4dqMRPNw6PeBqNRjP6GdHGwO/3Z6HSDv/O7/d/jJq6Ps75+VLgiUAgMAE4F3jS7/eP6PPRaDSakcqIchP1gAE0BQKBo3r47Qs44wuBQOBtv9+fBhTRfb0EjUaj0QyAEd2SDgQCLcAWv99/EYDf7xd+v3+u8/N21JKV+P3+maiEaA09VqTRaDSaPhlRuYn8fv8zqMUiilALQXwHtWrVoyj3kAf4bSAQuMvv988CHgOyUIPJtwYCgWQtQq7RaDSHFSPKGGg0Go1meBjRbiKNRqPRHBpG0gCy7qJoNBrN4BHJqGQkGQN27RoZ88aKiopobGwcbjH6ZKTLONLlg5Ev40iXD7SMyWAo8pWXlydNDu0m0mg0Go02BhqNRjPctNUZvPiNHB5fUjRsMowoN5FGo9EcTrTVGbz2XRfv/6oUKcGKJMX9f1BoY6DRaDSHmLY6g9d+mM3q32cgJdjm8BmBTka0MZBS0t7ezqGeCxEMBjFN85AecyBIKUlLS8Pr9Q63KBqNZoBICa27DOpXeahb5aF+lYdNL/uwwiPLSz+ijUF7ezs+nw+PxzPcoowIpJS0trbS2tpKUdHw+RY1Gk3P2Cbs3eSm3lH6dbUe6le5Ce53qQJCUjDFYsriMB17DepqvCCG1z3UyYg2BlJKbQjiEEKQk5PDxo0b2bhxI3l5ecMtkkZz2BLpEDSsdndr8Tes9WCGlGJ3+STFM6JMOydEaXWUkuooJbNMvJldno62eoMPf1rM+78ykLYeM9AMEpfLxfLlyznjjDOGWxSN5rCgvdFwWvmqpV9X62HfZjdIpbzT8mxKq6McfUU7pbOV4i+sNDH60bBZJTZLH7Y45voG3nooi53Lh88FrI3BKGUkjmloNKMdaUPTNldM8Xe2+NvqXLEyuRNNSqqjzLwgSOnsKKXVJtnlFuIgGvV1huBid5RHyiRL7m1J4pkMnjFnDNKXLSP7vvtw7dqFVV5O6223EVy6NCl179u3j5tuuomtW7fi8/mYPHkyP/jBDygsLIyVuffee/m///s/brrpJr761a/Gttu2zXXXXcfatWvx+XwUFRVx3333MXnyZACuueYatm/fjmEYZGZmcvfddzN79uykyK3RaA7EDEPj+i6/fl2th4bVHiLtamDXcEsKp5lMPilMSXVUtfhnRUnLS15Ay33ZPt4UkoeyvNzbEk5avQfDmDIG6cuWkXvrrRjBIADunTvJvfVWgKQYBCEE//mf/8nChQsBuPvuu7n33nt54IEHALj//vtZsWIFb731FjfeeCM+n4+vfOUrsf0vuugizjjjDAzD4PHHH+fWW28lEAgA8NBDD5GTo5Z0fvHFF7nlllt48cUXhyyzRqOBUJOgfrWnW4t/7wZ3LKTTm2lTUh1ltr+DkmqT0tlRiqZFcaclT4ZGQ1DjMVjpcVHrcfGRx8VutzI8z2Z6ubktQok9fCnaRo0xyLnjDjyrV/dZxvvBB4hIpNs2Ixgk75ZbyHj66V73i86aRctdd/UrQ35+fswQABxzzDH8+te/BuDHP/4xmzZt4sknn8Tr9fL0009z44038thjj3HttddiGAZLliyJ7Ttv3jx+/vOfd52fYwgAWlpaMIyRFXam0YwGVBinizqnpV+/ykPjGg/7t42LlckqtSipjlJ5RijW4s87wkIk6ZWTKPdPjcdFjcegxuNipdfFHlfXASabFl4kLimxhMCGYe8djBpjMCASDEG/24eAbdv8+te/jin4+B4AQFpaGo899liv+z/++OOceeaZ3bZ97Wtf49VXX0VKyVNPPZV0mTWasYRtwt6N7rgQTtXiDzU5SldICqaaHDHf5sjL25Xir46SWWwnTQYJ7HIpxb8yTvk3OIpfSEmFaXNC2GJONMKcqMXsqEVQCE4ozcJyBhoiQgx772DUGIOBtNxLjj8e986dB2y3xo9n73PPJVWe22+/nczMTK6++upB7/voo4+yYcMGfve733Xbfv/99wPw3HPP8b3vfY8nn3wyKbJqNKOdSLugvocwTiuslKk7TYVxVp0XjIVxFs8y8WZIJyto25BlkMB2V2eL3xVr+e9zFL8hJdNNm1PCJnOiNnOiFtVRi8wedPv3s70H5Owf7t7BqDEGA6H1ttu6jRkA2OnptN52W1KPc9ddd7FlyxaeeOKJQbtzHn/8cZ5//nmeffZZ0tPTeyzzuc99jq9//evs27ePgoKCZIis0Ywa2uqNhElbHvZtcXUP45wdZd7VqrVfUh2lsKL/MM7BYANbXAa1HoOVXqX4az0umg3H+EhJlWlzVshkdtRiTtRmlmmRPsBG/QdeN5GE8KOIECz3ugFtDIZM5yBxqqKJAO677z5WrlzJk08+ic/nG9S+v/nNb/jNb35DIBAgPz8/tr29vZ2mpibGjx8PwEsvvUReXl63MhrNWEPasH+LK9bS7/zbXh8XxjnJpLQ6yqylHbEWf3a5fVBhnL1hAZvcjm/fY1DrKP42R/F7pWRm1OZTwShHRi3mRC1mRG0G9/Z356XG9tj/I2W9hTFlDEAZhGQq/3jWrVvHI488wtSpU/nMZz4DwKRJk/jFL37R775tbW3cdtttTJgwgUsuuQQAn8/Hn//8Zzo6OrjuuusIBoMYhkFeXh5PPPEEIplPvEYzjJghaFjn6d7iX+0m2tEVxlk03WTKyeHYpK2SWVHScpPrP48CGxzFX+NxUeM1WOV2EXQUf5otmWVafC4YZU5E+ferTJvDIQ/CmDMGqaSqqoqdPYxJDISsrCx27NjR42/FxcX8+c9/HopoGs2IIbhfdGvpd4ZxSstpaWepMM4jL+mgZLYa1C2cZuIeSlO7B8LAeo/BFsPirdw0aj0u1ngMQk4jK8OWzI5aXNYRifn4K037sFWKKTlvv99fBTwbt2kqcEcgEHgoFcfTaDSHHilh/zZY/3patzQNLTu71EpWmUVpdZRpS0KxFn/epOSFcXYSBNZ6jG6Du2s9BlEhAItsw8PsqMWV7SqiZ07UZopp4+qv4sOIlBiDQCCwDjgKwO/3u4CdwPOpOJZGo0k9VhT2bVTKvrPFX7+6M4yzQIVxVpiMPy7CMVepFn/JrCiZRckL4+ykQ8Aqt3LxdCr+9W4jFqaZZ9vMidp8qS3C7KjF4qxcchr36mUd++FQ9IhOBzYFAoFth+BYGo1miITbBA1rPNTVdsXwN67zxDJqutMkxTOjzPhUkKnzfWRO3k/RDBXGmWxaBayKi+Gv9bjY6DawHcVfaNkcGbU5M9TZ4reYYEniR9uKsgTDPzw78jkUxuAS4JmefvD7/V8CvgQQCAQOyNEfjAsR1XRhGAaGYYzoNQ3cbveIlg9GvoyHQr6W3bBrhWDnCsGuFYJdHxs0bupSpRmFkvFzJTPOtBk/V1I+16ZoGrjcAB7cbgPTzE2KLPuRfCwkH3V+DJsNcVq9XMJRUuC3BUfZBsdIQTkgDAE+6C28R9/ngSFSuYqY3+/3AruA6kAgUNdPcblr165uG1pbW8nOzk6VeKOWLVu2sG7dOs4+++zhFqVXRkq4XF+MdBmTKZ9tqTDO+IHd+lUe2hu6vOZ5R5ixWbqdaRqyyvoO4zxYGffF5enpjOHf5u5y5Iw37VhLv3Nw92Bn5o7l+1xeXg6QlLDDVPcMzgE+HIAh0Gg0SSIahMZ1jtJ3fPwNa+LCOD1OGOepYScFs/Lv+3JS0zCsN0S3GP4aj4udcYr/CNOOi+pRyr9gGBO2Ha6k2hhcSi8uIo1GM3SC+8QBk7b2bowL48xWi64ceWnXpK2i6SauFKyhIoHdhqDWSczW6eOPT9A21bQ4NmJxTbsa3J0dtUhiRmjNEEiZMfD7/RnAmcB1qTpGIi8ULWG/d9UB2/Mj1Zzf+NKQ60/legadPPjggzzwwAP861//YsaMGUOWWTM2kBJadri6onlqPdStctO6KyGMc3aUaWeHYi3+3InJD+MEpfh3uARvCJs3sn1Oi9+gMS5PT6VpszBscmRcnp5srfhHLCkzBoFAoAMo7LdgEimJzKPZswFbdGUpNaSXksixSak/lesZANTU1PDhhx/G0lJoDk+sKOzd4GbrNoNN7+TEWvzhFicTpqHCOCfOj1BS3e60+E0yCpMfxglK8W+NT9DmtPqbDAMwcbm9TDdtTgubHBmxmB21qTYtUhBcpEkho2ay3bs5d7DP0/d6BhYRbKLdttmY7PXU8rfCz/W6X0F0FvNbhnc9g3A4zDe/+U1+/OMfc9FFF/Uri2ZsEG7tDONULf36Wg+N6+PDODMomWUy83wnG+fsKMUzTDwDzYg2SGxgs9voNri7yuOixUnX4JGSGVGbc4Mmc6IWJ2VmU9awj55TLmpGE6PGGAwEF17SrRKCrnoQEqQg3SrGRfIdpMlez+D+++/ns5/9LJMmTUq6rJrhR0poqzMOyL3ftLXrFcwotCiZHeXYL7ZRUm0yY1Em5DdipGiarAlsjOXpMWKKv91R/D4pmRW1OT8uQVtV1O72NhVlGDqG/yDwnHs82xe28Ma322n32lCutifLpX0wjBpjMJCWO0CHUcfvS0/AIowLH59u/DsZdknS5UnmegbLly/n448/5pvf/GayxdQMAyqM0x1r6Xf6+Dv2xoVxTlbZOOf4O2It/qzS7mGcRUWZJCsiMgqscxvdBndXu12EHMWfbkuqTYuLOyLMdnz80w6TBG3DQfEKi/w1GRz5eAYrr+7gjW+3EyxzJ82lfTCMGmMwUDLsUirbL2Zd5pNUtl+cEkOQ7PUM3nnnHTZt2sSCBQsA2L17N5dffjkPPvggJ598ctLl1ySPaFDQsLb7alsNa9xEg11hnMVVUSrODFFabcaycfpSOJIapitPT+d6u2s8Rix/fpaToO0/4hK0Veg8PSklJPbR7NlAk3s9Te71XAi4HVfgUY8po1BzVYT866+A4uGRccwZA4C5bTfT5FnP3Labk153KtYzuOGGG7jhhhti3+fPn8+vfvUrHU00wujYZ1Bf66ZudVf8/r6NbqTtuFVyVDbOuZd3xCZvFU5LTRhnJ0FgdZybp8brYp3bwHQUf66j+L/QHnFSMttMsWydpydFhIx9NLnXOUp/A02edTSzgfSt+yiudVNS42FWjQ/i+lwuU+Ay4eif+Yh8/AB7f9+7ezmVjEljkGGXcs7e3ye93lStZ6AZPtrqDN58KItdH3i5+iXlk5ESmrerRVc6W/v1qzy07u5qO2eXm5RWm1SdF4rF7+dOtJK66Eoi7U6envgJXBviErTlO3l6To3L0zMpIU+PZuhIJCFjr6P0N9DsWc9+93qaXevx7N5PyYduims9VNWkUVqbTv4aN66wau5LIbAmTwS2x+ozPRLphnb/pwjd/F/DdFZj1BikilStZ5DIu+++e1DH0AyctjqD177r4r1flSItsE3BP+/IiQ3sRlq7wjgLp5lMPCEcU/ol1SYZBakJ4+ykRcAqYfNGpjc2wLvJbSAdxV9sKffO2SEzpvjLteJPKhJJ0Gig2b2eJs/6rta+ex3G/iaKaz0U17qpWJlOaW0mhavS8LR0uaXNcaWYMyoIXl1BtKoCc2YlZuVkZHo65ePnIb0eLMPi46vb2HPLuRyTPrBx0VShjYHmsKJ1j8E/v53LhhfTkBbEp3VZ+XQGxbNMqi8MxhZdKaqK4klx3OR+gcrP4+3KzrnV7QJMyE1jnGUzJ2JxQTAaG9wt0+kakoZS+nVK2Xs2xFr8TZ71yI5mila7Kalxc0RNJgtqMimszSatvstFbOflEJ1RSWRpJe1VFZgzKohOr0Dm5fR6zEj1dCLHzqXhq+ezevadLKz/uorrHUa0MdAcFoRbBKuWpfPKPTmxHD2J3LxuT8rCODtpdBK0xS/C8klcnp6JToK2izuinJiexRF791OkFX9SkEg6jD1xrfz1NHtUa980mylc56a41s2EmiyOW5lN4ap8sramxfa309MwqyowT62guVPpz6jELilisP7BxpdUlh4fcIn5Lxrt4Q/Q1cZAM2aREnYu97LiqQzW/ikNM2RQOD1Ken6UPR97kZLY5C4g6YZgj5OgrbZzApfXxe64PD2TTYujohZXdESY7QzuFsRlES5KM2jUhmDQKD9mkPQAACAASURBVKW/i/2e9TS71xNyfcKeohU0uTcQpZW8LS5KatyMW5nNvNocCmuLyN6QiWGqprl0uzAryjGPrKDFX6mUflUF1qTxMMjowdGENgaaMUdwn6D2uQxWPJPB3vUevJk21Z8NMvfyDsqOjCIEtNUbfPjTYt7/lYG0uxuFwSKBXYZw0jR0De7WO4pfSEmFabMgbDEnqgZ3Z0ctUpQk9LBBYtPu2hVr5Xe2+JvdG4iKNrJ2GxR/4Ka8Jo+qlTkU144jZ20urg4zVoc5KZ/ojEraz+hq6ZtTjwDv4TfDQhsDzZhA2rD9LS8rns5g/d/SsSKCcUdHOPv+JmZ+Jog3s7vmzSqxWfqwxTHXN/DWQ1nsXD6w+E8JbI/P0+P4+PfFJWibbtosjkvQNitqkaUV/0EjsWlz7ThA4Te512MaHaTtFxTVuClfkc+cmjyKaieStzqIZ38oVodV4sOsqiR4WQXRmZXK3TN9KjIzYxjPbGShjYFmVNNWb1AbUL2Apq1ufLk2R32+nbmXdVA80+x3/6wSmyX3tvT4mw1scRnUegxqvF0TuJqdWbtuKakybZbEInpsZpkWKUobNOaR2LS6tjvRO10Duc3uDZhGEHcHFK1xM35lIbNq8impqSB3dZC0XW2xOuxsi+iMKUTOq6R9RgVmVQU5C4+ncbhHZ0cBY9IY1BmCL+en8+j+4EGvjtQTqUxhPX/+fHw+X2wi27e+9S1OOeWUpMk+lrAt2PKqj5VPZ7DxH2nYpmDigjCLvtrK9HODA4r+qTMEF7ujPGIISmyJhUrQtjJuAletx0Wbo/i9UjIzavOpuDw9M6J2bystavrAxqLNtb3bQK5q7W/EMkIIEwo2uJiwspiqFQWU1FaRtzpE+uZmhDOmIn0dRKdNwTyhkhbHvROtqsAuLz1wMLeogKTl9RjDjElj8FCWl3e9Lh7K8nJvSzhp9aY6hfXPfvYzPeu4D1p2ulj52wxW/jad1l1uMgotjr22nbmXtlNQYQ2qrrtyfLwpJJcWpJMFrHK7CDqKP82WzDItPhuMOimZLap0np5BY2PS6trWzbWz37OOZvcmbBEGCbnbXExcUcK0lUWUrKomvzZCxob9GBHVq5NGE+aUiZgzqmm9QLl3olUVWJMngHtMqq9hY9RczTtyfKz29B/uEQE+9LqQQvBkppdaT/85S2dFLe4agNFIZQprTc9YUdj0zzRWPJ3B5n+rdvjkxWFO+04L05aEBpXqYbch+GO6h0C6h7Ve9Syt9bg4KtK55KLy8Vea9uh5MUYANlFa3NuUeycuRl8pfbW2SEa9wcQVZcxbUUxJ7VEUrIqSuXYfrrYQaiSmAXN8GWbVdNpPdpT+DDVJizTd/zoUjLlnfkdc6J50vk+1ku8vTHYKayCWn+i4447jtttuIzc3N8lSjx72b3Wx8pkMagIZtNe7yCqzOOG/2jjykg7yJg28F9As4K/pHp5P9/CW00gotGxcUmIJgReYO8DGwOGOUvpbukXvtLk3s2/cemyh1hHxtggmfVDOUTUllNYeR0FtlKw1+3E3tqJGYeqw8nMxZ1QS9J+EWaXcO2bVVGRO9rCe3+HOqDEGA3lZ6wzBCaVZsSn7UgiaXfCTxuSOHUByU1gDLFu2jPHjxxMOh/nOd77D7bffziOPPJJMkUc8ZhjW/z2NlU9lsu1NH8KQVJweZu5lTUw9LYwxwKc1BPwrzc0f0j38K81NWAgmmxb/3RZhUcjk0qKMWD6fiBA8m+nl5rZI0p+R0YpFhBb35m4zcpvdG2h2b0YK5b5xhQSTVk3gyNXjKfzoBApWWWSvacL7yV7AAnZjZ6RjVlUQPvNo2qsqVUt/RgV2UcGgJ2lpUs+oMQYD4aEsL4mvs+1sT+bYQbJTWAOxpS59Ph9XXnnlQRmZ0UrjBjcrnspg1XPpBPe7yJlgctL/tDDn4g6yxw2sV2cBb3tdPJ/u4a/pHloMQbFl8/n2CEuDUeZGbQTwjRzfIXlGRgMWYVrcm1WStbjB3Bb3FqRQvS9hCsZvmMisj8dRVruYwlU22aub8W2uR1hRYCvS48asnEx03rGELq8kWqWUvjVh3JiepDXWSJkx8Pv9ecDPgdkoj801gUDg7VQdD+ADrzuWs72TiBAs97pRWd6HTipSWHd0dGCaJjk5OUgpeeGFF6iurk6KvCOVaFCw9k9qLGDn+z4Mt2TaWSHmXt7B5JPCA1rEXQK1HoNl6R7+mO5hj8sgy5acE4qyNBhlYdg64AE/FM/ISMMkRIt7U/dka551tLq2dSl926Bs+yRmrChnXO1UCmslOatb8K3fjRGKANtUxs0jJhCdUUXbuecSraoka+GxNOZlg0cPr492Utkz+BHw90Ag8Dm/3+8FUj6746XG9pTWn6oU1g0NDVx77bXYto1lWUybNo177703pecyXNTVulnxdCarn08n3GJQMNXklNubmX1RkMyigfUCtroEzzvjAJs8LjxSclrI5M5giDNCZp/r8cY/I0VFRTSOoZBDkyDNMaXflV5ZKX11bYV0UVw3maqV4ymrmUFRrSBnTSvpa3djNLcCmwGwyoqJVlXQccUix71TiTltCjKj+9XNKirSYZtjhJQYA7/fnwMsBq4CCAQCEVSgz6gmVSmsjzjiCF56aXjWPT0UhNsEa/6QzoqnM9izwovLJ6k6L8jcyzqYuCAyIPdxgyH4U7qbZekePvKqx/aEsMl1TUHOC0bJO4zc/aYIOpOxulr5Te4NtLq2qbW/ASHdFDRPpmL5EZTXHklRDeSubid97R5cdY3ABgDs3GyiMyoJnn+WGsh14vVl/uEbvHC4kqqewVSgAXjc7/fPBT4AbgoEAt2a7n6//0vAlwACgQBFRUXdKgkGgykSb3RjGAaGYRxwvUYSLpebjs3FvPtLg48DBpF2QVm1zfkPmBxzmU1GgRvoPcUvQCuSPxo2zxg2LwuJJeBIW3CvaeC3DSYKr+pvHmSf0+12j+hraLtDmMXbaBSr2SvWsNf520yX0jekh8JIJVWrZzKhZiFFtQbZq9rxrdqJ2PIJQq4FQKanIWdOQy45BbO6Cul8KC9FCIEHDmoexUi/hjDyZRwp8qXKGLiBY4AbA4HAu36//0fAbcC34wsFAoGfAT9zvsrELrtp9p9O4HDEtm1s2x6RLo5Qk2DVsgxqn81mT60HT7rNjPM7mHtZB+XHqCRxHTZ09CJ6BHjF5+b5DA8v+dyEDMFE0+bLwSgXBqNUmV2upKGe/UhxE0VFm7NoynonPl+tnNVu7KBzfUpDesmNTGXCpuksWDmfolo3eauCZKytw71pGyL6EQDS5cKcOolwdRXRpec6ydemYU0qB1cP83T27h2S7CPlGvbFSJUxfdkysu+7D7FrF0Z5Oa233UZw6dJB1VFeXp40eVJlDHYAOwKBQOeSXc+hjIFmDCIlfPKOShK37i/pWGHBhGNszrqviZkXBPtd/N0G3ve6WJbu4c/pbpoMg3zL5uJglAs7ohwbtcbECl4R0RLLtdPp2mlyr6PdvStWxpA+cqNTmbT9SCrWn0XOBzZ5q0JkrK3HvW4zRsfKWFlzYjlmVQWhM05y3DuVmBVHgC+Fiy5rkkL6smXk3norhuP9cO/cSe6ttwIM2iAki5QYg0AgsMfv93/i9/urAoHAOuB0YHUqjqUZPtobDWp/l87KpzPZt9mNN9vmyEs6mHtZO9Wn5NHY2NHn/mvcBs+ne/hDuoedboN0W3J2yOSCYIiTw+aoTf8QFs0HJFtr8qynw7U7VsYl08g1K5hQfyzjawspqfWSvypMxpp63Gs34dr/YaysVVSAWVVBx6UXOLn1KzGnT0FmZw3H6WkGg5SIlhZcu3erz549GLt3k/XoozFD0IkRDJJ9331jyxg43Ag85UQSbQYOn8D5MYy0YevrPlY8lcGGl9Kwo4Lxx4U598ZWZnw6hKeflJ07XII/OAZgjceFS0pODpt8ozXEkpBJ5igaCA6Lpu7J1jwq/06Ha0+sjMtOI8+cxoSm+YxbXUJpjY/8VREy1jbgWbsJ9673YmXtrEzV0j/3NMyqCjLmz2NvWZGapKUZedg2RmNjNyXviv842xKVPnDAXJdOXLt29fJL6kmZMQgEAh8Dx6aqfs2hpXW3Qc2zGaz8bQbNn7hJz7c45iqVKrpoet9jO/uE4M/pakbwuz71yB0bMbmnKcinQyaFh3Dm7wtFS9jvXdW1wXG55keqOb+x54iukNhHs2cD+52ZuJ0unqCrPlbGbaeTa06nvP1EyteXUVqTRv4qk8w19XjWbsa19T2E7ayk5fVgVk4hsmAeHc4qWuaMSqzxZd1m5qYXFWGPQF/3YUEkgquuDtfu3V1Kfs+emKI39uzBVVeHSBjXlG43Vmkp9rhxRGfNwjr9dKyyMqxx47DLy9X/paWULFqEu4fIRCuJYwCDZUzNQNYkF9uEzS/7WPF0Jpv+5UPagiNODLP4Gy1MPzuEu485dx1IXkhTA8Gv+NxEhWBa1OLWlhAXBqNMsoanC1ASmUezZ0MsgRqowdmSyLGEjL1OK3+d4+JRLf6Qq0shu+1M8szpjA+dTNmWCZTWplFQa5G5pgHPus24N76PCKu6pWFgTZ5IdEYFwfOXKKU/sxJz8kSdcXMYEW1tqtW+a1eXgnf+Gp3/92CE7fR07HHjsMaNI7JgAZbzvz1uXJfCLyoa0Kzr1ttu6zZm0Fl/623DN7Q6pp/ItjqDNx/KYtcHXq5+aegtrFSuZxAKhbjzzjt5/fXXSUtLY968efzwhz8csswHQ/MnLlY8k0HNsxm07XGRWWwx/8sqSVz+lN6TxJnA6z6VEuJFT5S2ggzKLJsvtEe4sCNKtWkP60CwRDKj/So2ZP6223Ybky3pL7Au61exbR47izxzOhNDZ1C0ZwJlNVkUrrKV0l+7Cfe6DzHaXo+Vt8aVEp1RQXjxfKelX0G0cgqkp6E5REiJsW/fgS35piYKtmzpUvitrQfsauXnYzsKPTp3rlLszvdORS9zcpKWU6lzXCD7vvtw7dqFdZDRRMlkTBqDTiNQ+2zmAYueD4VUrmdwzz334PP5eOONNxBC0NDQkBSZB4oVgQ0vqfQQW19TTf6pp4Y583vNVJwRwtXLaK4EPvS4+EO6mz+me2h0GeTaEr9tcM7+VhZELFKdncbGImQ0EnTV02HUEXTVEzTq6HDVEzTq6XDVETTqCboauOqYbM48wccb3zZpH2eDhDS7gImhJRTuP4Ky1TkU1lhkrtmLZ90m3OtqcDW80nWsvFyiMysJfu68WEs/Or0CmaszbqYU08Sor+/mj4+5azq/79mDCHdPKSINA8rKMEpLMSsrCS9a1K0lb5WVYZWVQfoAVkRKMsGlSwkuXTpiQl9HjTH45x051K/uO77EikDzDhft9U48tewyAk9/rrCXvaBkVpQz7up56cN4UrWeQXt7O8899xzLly9HOC2P4uLifuVJBvs2uVjxTCa1gXQ69rrILjc58b9VLyBnfO+9gI1xkUBb3QY+KTkjZLI0GOLUkMn4oiIaI4NbcCYRizBBV0OPCj7oqqPDqCfoqidkNMbSLcTjs/NIt0pJt0vIjUwl3Sql7ONlFK/2MfeXGWxfFGHfTMnM9SfiW7MW9ycvx/a109NUxs3TToxl24xWVWCXFOmMm8kmGOzdXdM5EFtfHxtz6UT6fDGlHjn66JgLJ17R2yUlFJWVjQhlO9IZNcZgIOxd7yHcKuAQOCOSuZ7B1q1byc/P58EHH+Stt94iMzOTW2+9leOPPz4lspshWPdXlR7ik7d9CJek8swQcy/rYMopYYxe1hDaYwhecHIC1XhdGFJyYsTiv1rDnBOKkjPAYYCo6CDoKPiO2F+l4GMteVcdYaPpgH2FNEizi0i3SsiwSyiMziHdLiHDKiHdLnW2l5JuFeOKW5TS2Lsf3+vvAstwRWxAMPVfPqb8C+zcNYQXHNMVujmjEmtiuc64OVSkRDQ3d1fqPQ3GNh14n+2cnJhSN6uqYv75mG9+3Djs/HxtmJPIqDEGA2m5t9UbvPVQFjXPZiLt7u6hy54b2kzLRJK5noFlWWzbto3Zs2fz7W9/mw8//JCrrrqKN998k+zs5LkfGta4WfFMBqt+n0GoySBvssnJ32hh9kUdZJX2nCSuWcDf0jwsy+haHGZuxOLO5hCfCUYpdSKBJJKwaI4p+HojSH3mppiLpsPVpeyjRtsBxzGkh3SrhHS7hBxzCqWR+T0o+BLS7EKMgTy2oTDe99/D9/o7+F59B2/tuh6LCcBoacXV3ELTTV8Y8LU87LEsjMZGxLZtpK1Z0+WuifsYe/b0GFZpFRdjlZVhTpxI5LjjelT0MjNzGE7q8GbUGIOBkFVis+TeFhbe3NarUUgGyV7PYMKECbjdbi644AJAuZ8KCgrYvHkzc+fOHZKskXbBmj+ms+KpDHZ/5MXllUw/RyWJm7Qw0mOq6BDwcpqb5+MWh5lkBvlCcCMnR1dQKNYTdNezOree5XH+eEuEuleUq0IuOxV6QXQW6dap3VryGZb6zSfzEUPp0UmJe+1GfK+9i++1d/C+8yFGKIx0u4gcdxQtt36Z8OL5FH/qyq5dvF4wBO0Xf4a2m7948Mcea/QUVpnop48Lq+ycBSHdbuWaKSsjOns21hlndLlrysvV/yUl4NUzpEciY8oYdJJoFHYuT97Dl4r1DAoKCli4cCGvvfYaJ598Mps2baKxsTEWaZRIm+sT3nbdw55y1ZJNjJGXEvas9LDiqQzWvJBOpM2gcFqU077TTPXngmQU2NhE6TAaYi6aNlc973kz+KdnGu+6j6JDZJAtG5jHcxzJs0xwf4Bww3bUx2vnxlw1JZHjDnDVjM+pIrTXi0dmDU3J94FR3+go/3fxvf4OrnrV+4tOm0LH5RcSXryAyAnzkJndM9lJrwcMA/tKPw3Xf16NAxwm9BhWGe/CGUhY5QknxFrxmdOn05SVpRT+AMMqNSMTIeWImfIpdyXMvmttbU2qm2SorFu3jtNOO42pU6eSlqZCBgeznsGMGTOYMGFC7Jw61zMA2LZtG7fccgv79+/H7Xbz9a9/ndNOO63Humq2v8Zz677FlCs3Y0gv09ov5YSWewm3CGqWKVdQY20mrnST8gtWUXr1v/AufFcNwjot+ZCxDykku5jLCi5mJZ+llXJ8spWj5assMt/jGHMP2VZRbBC2S9kX4+5z1YDUJAcTwSDedz+Ktf49azYCYBXkET5pPuGTFxBedDz2+LLe5VpyKZFj59J28xcpmDVjRA8sDuoa9hZWmRBx019YZfwnPrSyt7DKkRIJ0xcjXcahyOckqktKa2tM9gxSRarWMwC1psFzzz03qDqt3WW03HUH7/zjPD5e+A6tz52NDGbgPuojsn/8GGmXPU0kr5kd0k26rZR6ljUBK7qEj4zTeM1zNNtd+bilzcnhdj7b0cqSkCSdxajlKIYZ28a9egNpr72D77V38b73ESIcQXo9yvXzjRsIn7yAaHXVgFukjS89k2KhU4BpYtTVHRhxE9+yr6vrMazSLilRg7DTphFevPiA2HmrtHRYwio1Iw9tDEYjlovQHz9N4xd/CKYbMDDriii75D2mXFFD6ZEmGfJEMiIXkr6nlDS7gL2Giz+mq3GAD9O6Fof5SqtaHCZ/hHQQjd31+F57B9/ryv3j2rsfgOiMCtqvuIjwyQuIzD/6gBW3RisiGDwwjNL5625ooHTHjt7DKh2lHpk3r3vLvnMgtrhYz3TWDBj9pIxCrB0TsT4+CsyusZCbPmzGmzkdmK5GgIE2AX9JU6Ggr/tcWEIwK2rxrZYQ53dEGX8IcwL1hugI4n37gy7Xz3pn2cWiAuX2WbyA8EnzscsOzbyLpJEYVpk4Qaq/sMpx42DiREKVlQe25seNQ+qwSk2S0cZgNCIh0U3oddJ99rQ4zATT5j/bIixNWBxmWLBtPDVrVev/1XfwLl+BiJrINB/h44+i4+JPEz5pAebMypQNRnYuKuLatYuSg0kD4IRVHjBBKm5Qts+wynHjVFjl8cd3nwmbEFZZVFRE8wj2dWvGFtoYjEYE2K7urfp3e1gcxh+MsjQY5djI8C4O49q5W7X8X30H7xvv4drfDEB01nTav3gZ4cXzCR931CHJ49PvoiLhsAqr7CMlsau+vudslZ25bWbPxjrzzG4KXodVakY62hiMQuzxNg1H2xTvkAgLRESwtCiTdFtyVsjkwmFeHEa0tSPe+ZicP7+kXD+btgFglRYRPuMkpfxPmo9d3HuKkKRj2xh795Jz9909LiqS99WvknPnnbh6WAbSzsiIRddEFi7sasmXl8dcOHZhoQ6r1IxqtDEYhbS7BVvPtXjnRx0cf5eHaW8aPLK/g7OGa3EYy8KzYjW+V9XAr/eDlQjTwpXmI7LwWDo+/1nCJy/AnD41+X7uUAhXQ4OKtqmvV8nMEv82NGA0NCCsPnIlRaOEzj67x7TEMjtb++c1Y54xZQyKllxKZN6RtN38RezS5A84piqF9SeffMI111wTK9vS0kJbWxurVq2iJzr1fbjUpuT/a+PO5hAc6J5OKa7tO5Xyf+1dfG++h9Gs4tcjc2bQdv1/kPbps2iYNvng1uOVEtHU1KXkGxqUUq+rw2hoUH87lXxz84G7GwZ2URFWSQl2SQnR6moVYllSQvaDD+Lat++Afazx42keppThGs1IYEwZA++q9Xg2bCHz2T/RfvGnk24UUpXCeuLEifzjH/+Ilbvjjjuw+mrFOlhC8McMD99oDVOS4sgg0dKK7833Y1E/7q1qzoQ1rpTgOacRXjyfyKLjsQvVzGpfUREkDn5Go2rgtbcWfKeyb2g4IGYewE5Lwy4txSopwZw+nfBJJ2EXF6uVpRxlb5eUKJeNq+dsezI3d8QtKqLRjARSZgz8fv9WoBWwADMQCAxpCcycO+7Hs3p9v+VEJApA5pO/J/PJ32OVFGJNKAdv7x706KzptNz1tX7rTlUK63gikQjPP/88Tz/9dL/yANjAQ1le7m05UHkOCdPE89Gq2IQvz0e1CMvCzkgnsvBY2q+5RLl+KiYjOjow6upwb1iH8aZS7q62NvK2bu2m7I19+xA9zHi38vOxS0uxi4uJTJ2qlHunki8uVkq+tBSZlTVkd81IXFREoxkJpLpncGogEBiW2LhOpeOqa8ToCBGdXZXU+pOZwjqel156ibKyMubMmTMgOSJCsNzrBoZoDKTEtXUHvlffVhO+3nwfo7UdKQTm1ImETj0euzQP6QFXYwNpf/kdmb/8X6XkOzoOrM7jwVtcjF1SgjlxIva8ed1a71bn/8XFhzzCZqQtKqLRjARGjZtoIC338vHzYv93JiPrzEiZ7GRkyUxhHc+zzz7LJZdc0uf+k3fs5Nt3/4DPf+u7g2/Vdg641tfj3rwV77sf4alZj3vLTox2NVtNGoAMAiGEjODZtBvPJrW7nZ0da7VH5s7FLi6OuW7ilX1BZSWNPfjmNRrNyCSVxkACL/n9fgn8XyAQ+FkKj9V10BQbAUh+CutO9uzZw9tvv82PfvSjvisxTYSU3WLkQ6ed1n9UTX0DRnM74AO8gAcQSGzwCqxxWZhHlGFNGo9VWhpz08QreznQPDY6zFKjGVWk0hicGAgEdvn9/hLgH36/f20gEHgtvoDf7/8S8CWAQCBAUVF3xR3sYQZnX0Sqp8cyUqYqLXEqUlh3EggEOP300ykoKOhh754xgkHybryxx0ll0pcGhSVIbxYi6IKOLCANKQRy+hTkScdjn30q8oyTY8nKBOqhGOqD4Xa7D7ifI42RLuNIlw+0jMlgpMh3SFJY+/3+O4G2QCBwfx/FDusU1gCLFi3i7rvv5tRTT+2zru2vvca6b32LKzerPD4SaLnjDjUIm5aBe/sePDXr8b7zIe5ddQCYkyd0pXleeFzKF3AfDf74kS7jSJcPtIzJYEynsPb7/ZmAEQgEWp3/lwB3peJYh5JUprAGeOONNwZYW/d7bxeVY+yLkP6H5/DUrEVIiZ2bTfjE42j7ry8QXjwf64gJByW3RqM5PEiVm6gUeN7v93ce4+lAIPD3FB3rMMQA0oB8JF5cjTZZP32SyDFzaL3lOsKLFxCdO1OnL9ZoNAMmJdoiEAhsBoa2eK+mH9xIfFjjimm59SuEzzkVmZ013EJpNJpRig75GIVIZ7axAFx7Gsl89o/aEGg0miExov0IUkqklAidJCyGlBKzrQ3pciHTfLEQWo1GoxkKI9oYpKWl0dHRQaaz2MfhjpSS3Str2fGrZ+CoauoC/5OyEFqNRnN4MaKNgdfrJRKJsGnTpkFP7hosoqMDY/9+iF9r1jCw8/ORGRkHlDdDguB+A9tUq4yl5dmIJIgoQmGMphaIRpFeLzIvB+nzIqXE3rSVLa+8TvOnTmfijCptCDQaTdIY0cYAVEimaZq8+eabRCKRlB7LvWYNvjfewGhtxc7OJrxoEebMmd3KRNsFm1/10bjGQ3qBRcUZYXInWrB9aMd21TXie+0dXNt3YufnEl50HOb0CtjdvZw4bi752dksWbKE5h7SN2s0Gs3BMGhj4Pf7RSAQOKRLqOTl5XHeeeel/kAXXgj0PAlE2rDimQxeuTcHd1BwwY2tLPhKG+7BTUI+ANf2nWT/4Mdk/OFFrII82v77Wto//9k+s6wCeDzDtY6ZRqMZixxMz+BB4L+TLchIpnG9mxe/nsuO93xMOiHMkvuaKKzsf72BvhD7msh++JdkPvEs0uWi9cZraPvyFcickTPjWqPRHD4M2hgEAoHDxhBEg/D2w9m8+2gW3kzJuQ/uZ7Y/OLSU+sEQWb/8LVn/+ziirYOOiz9N6y3XY48rSZrcGo1GM1j6NQZ+v/8XgUDgC87/AngsEAiM+VjGNS/4+Put+UTaDGZf1MGp324ho9Duf8fesCzSf/9Xcn74KK7ddYROX0TLN2/EnFGZPKE1Go3mIBlIz2Bq5z+BQED6Ipz9IgAAE8RJREFU/f6KFMoz7DSsdfOL69w0biykc7Xh8x5qOvgKpcT3ytvk3PMwnjUbiMydxf6H7yKycEgLv2k0Gk1SGYgxaPT7/V8E3gJOAPamVqThoa3O4K+35LHl3/EjwkOb7OapWUPO9x7G98Z7mJPGs+8n3yf06TN0rn+NRjPiGIgxuBK15sBXgHXAFSmVaBgItwmeOKuY9oaeF1EfLK5PdpH9w5+QsexvWPm5NN/1Ndr/43P9RghpNBrNcDEQYxAG9qAWtn8UOBp4P5VCHUp2vO/hLzfl095oUDo7wt6NHqQNVmTwvQKxv5nsh39B5hMBMAxab7iatq9cqSOENBrNiGcg/orfAHnApYFAwAS+n1qRDg1WBF67L5unlxYhJVy+bC9XvdjIdW/XcfzVNu40ics7wOkUoTCZj/6a0hPPJ/Oxp+lYei51rz9P6zdu0IZAo9GMCgbSMygOBAI/9TuLE4wFGje4+fONedTVeJlzSTun39mCL1sp/qwSm6UPWxxzfQNvPZTFzuXe3iuybdKX/Y3sH/4E9849hE47UUUIzZx2iM5Eo9FoksNAjEG93++/GEj3+/0XckCChNGDtOGDxzN59d4cPBk2F/5iH9PPDvVYNqvEZsm9Lb3W5Xv1bXK+9zCe1euJHDmTxge/Q2TR8akSXaPRaFLKQIzBNcAXgQ+BCcC1KZUoRbTsMvjrV/PZ9rqPijNCnHN/E5nFg5834K5dS849D5P22ruYk8az/8f3EPzMEh0hpNFoRjUDMQYzAoHA//r9/hLgKmAysDaVQiWbNS+k8eI38rAjcNYPmph7ecegZxG7duwm+wc/If35vyFzc2i+8xbar/gc+PpwI2k0Gs0oYSDG4AHgdNSC9q8Cj6PmG4x4Qk2Cf9yey+rnMyg/JsKnHt5P/pTB5RQSTS1kP/JLMn/5WzAM2r58BW1fuRqZqweGNRrN2GEgxiDD7/f7AF8gEHjG7/dfP9DK/X6/C1gO7AwEAp86WCEPhq2ve/nrf+fT3mBw0v+0sOCGNow+zrZoyaVE5h2pVg0rKlIRQk88S/bDv0S0tBG86FO0fO167PFlh+4kNBqN5hAxEGPwG+AF4Dt+vz8N2DKI+m8C1gA5ByHbQWGG4NX7clj+WBYFFVE+/8d9jJsb7Xc/76r1eDZsIfPZPyEXHkvJmvW49zSoCKFv3Ig5S0cIaTSasUu/xiAQCPwY+HHcpqsGUrHf758AnAfcA3z1YIQbLHW1bv50Yz5713s45uo2TvlWK570gS+9ICLKaIh/v4kQguCZi2n+wTexS4tTJbJGo9GMCFK50tlDwK1Ar851v9//JVSqCwKBAEX/f3v3Hh5Vfedx/J0wCSCgBKIr42V1vVVF64VtLbqK16XqYp9t96ugq6ul1MvWemGLUC/1ca0utlZqkT550IWKVb4iXeyWWlwtouulK9iKW8u2VR/lIhCIKAiEkNk/zomMIZNMMvPLOYHP63l4kpnMnPORR/KZ8zvn/H61XVvGsXk7PHV7Jb+e3IvKXjD259s44pxqYHCXtgdQkcvR55nn6X3trTQ983iXtxNKJpPp8t9Xd0h7Pkh/xrTnA2Ush7TkK1gGZlbj7g1d2aiZnQ+scffFZjai0OvcvQ6oix/mWq8uVowVS6r42Vdr2LQmAxU5mpsqGHzCWjq1qeZmsnkPc9XVUFnBpgtHsfG6sTR3IVdoba3GliZpzwfpz5j2fKCM5VBKvmw22/GLitTekcFdZlYD/BFYALwYT0dRjJOBUWZ2LtAH2NPMZrn7JaXF3WHj6koW/dsAls7OW6w+14VZRnM59rp5cvRtr0qoqqL5MmPtlZdowXkR2W0ULAN3vxLAzA4DzgbGxYvbvALMdffl7bx3IjAxfv8IYHw5iwDgyatreO+VakqaZjqXY8Bd99Nv5uNsH1zD5vPOZOP1X2PQUZ9J5dGAiEgoxZxA/iPR0cED8aWinweGAAXLoDuMmtbAi/f1Z+nsfl2eZbT/Dx9iwNSZbLr0K2z47k2Utp6liEjP1akTyO6+nWiRm868ZyGwsDPvKUbL3EHDr9vYpVLoN/2n7Dn5AT7+8nlsuHOCikBEdms9fkKdllL4+kurOXb0JvY5urHD9/R9bB573fZ9Np97Bh/ce6vmFRKR3V6HRwZmNtLdnzKzQ4Hrgdnuvih8tM7paJbRFn3mLWDg+DvYMuILNPzoTsiEvLpWRKRnKOYj8fj46ySiu5G/Hy5OWL2fXkTNtTfT+LnjaJh+jyaZExGJFVMGA8zsQGC7u78EbAqcKYjqF37DoK9PYNvRR7B+5n3k+vZNOpKISGoUUwZ3Af8K3BPPTfRy2EjlV/Xq6wy6/AaaDj6AdbPuJzegf9KRRERSpZgB84OBm9x9Zfz4poB5yi7zxjIGX3otzfvUsu7RB8gNGph0JBGR1CmmDN4CJpvZXsDPgTnuvj5srPLI/OltBo+5huZ+e7Bu9jTdUSwiUkCHw0TuPi++e/gy4CzgHTN73MxOCZ6uBL3eXcHgC6+GykrWPTaN7fsPSTqSiEhqFXNp6ReBi4AaoiODcURzQMwDTg2arosq31/L4IuuomLLFurn1LH9kL9MOpKISKoVM0x0DDDJ3VfkP2lmXwsTqTSV6xoYfNFVVNY3sG72NJqO1KI0IiIdKaYMZgO3mFl/oqGiy919ursvCxut8yo+/IhBY64h895K1s26n23HD006kohIj1DMpaUPEi1Uk43nJhodNlLXVHy8mUGXfpOqZX9m/fTv0fiFE5OOJCLSYxRTBr3c/Q+dfE9wteeMZs+Jd1G5ei1s2cqgK26kevFSGqZ+l62nD086nohIj1LMMNGzZvZjIGtmU4CnA2cqyo4F7J9k+96DySxfRcMPvsOW885MOpqISI9TzKWldwA/Am4hGjJ6IHSoYlU0bqNiayOZ5avIZTJUvfZGdKQgIiKdUrAM4nsJqgHc/Q13fxzYSrQEZupUNDXR7+EnqLl6UtJRRER6nPaODB4D5pvZQAAz+1vgYeDS7gjWGbnqKnJ9erPp0q/QMO2upOOIiPQ47a2B/ISZrQJ+YWa/AoYD57j7B92WrgO56iqorGTThaPYeN1YTTchItJFBcvAzO4AcsAK4AZgGnCDmeHut3ZTvoIajz6cxmGfVQmIiJRBe1cT/Vf89Rlgamc2Gk91vQjoHe9jjrvf1qWEBdQveLScmxMR2a21N0z0XAnb3Qqc4e4bzawKeMHMfunuPW4tBBGR3UGQBYDdPQdsjB9WxX9yIfYlIiKlq8jlwvyONrNewGLgUGCqu09o4zXjiGZBxd1PbGxsDJKlszKZDE1NTUnHaFfaM6Y9H6Q/Y9rzgTKWQyn5qqurIZpFumTByqBFfGnqz4BvuPsb7bw0t3LlynZ+3H1qa2upr69POka70p4x7fkg/RnTng+UsRxKyZfNZqFMZRB8nqH4UtSFwMjQ+xIRka4JUgZmtnfezWp9iVZI+0P77xIRkaQEOYEMDAFmxucNKgF39/8MtC8RESlRqKuJXgeOD7FtEREpv1SsTSAiIslSGYiIiMpARERUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiKoDEREBJWBiIigMhAREQKtgWxmBwA/AfYFmoE6d58SYl8iIlK6UEcGTcCN7n4kcBJwjZkdFWhfIiJSoiBl4O6r3H1J/P1HwJvAfiH2JSIipavI5XJBd2BmBwGLgKHu/mGrn40DxgG4+4mNjY1BsxQrk8nQ1NSUdIx2pT1j2vNB+jOmPR8oYzmUkq+6uhqgohw5gpaBmfUHngPudPe5Hbw8t3LlymBZOqO2tpb6+vqkY7Qr7RnTng/SnzHt+UAZy6GUfNlsFspUBsGuJjKzKuAJ4JEiikBERBIUpAzMrAJ4EHjT3e8NsQ8RESmfIJeWAicD/wgsNbPfxs9Ncvf5gfYnIiIlCFIG7v4CZRrHEhGR8HQHsoiIqAxERERlICIiqAxERASVgYiIoDIQERFUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiIEWgPZzB4CzgfWuPvQEPsQEZHyCXVkMAMYGWjbIiJSZkHKwN0XAetDbFtERMovyDBRscxsHDAOwN2pra1NMs4nMplMarIUkvaMac8H6c+Y9nygjOWQlnyJloG71wF18cNcfX19knE+UVtbS1qyFJL2jGnPB+nPmPZ8oIzlUEq+bDZbthy6mkhERFQGIiISqAzM7FHgJeAIM1tuZl8NsR8RESmPIOcM3H10iO2KiEgYGiYSERGVgYiIqAxERASVgYiIoDIQERFUBiIigspARERQGYiICCoDERFBZSAiIqgMREQElYGIiKAyEBERVAYiIoLKQEREUBmIiAgqAxERQWUgIiKoDEREhEBrIAOY2UhgCtALmO7ud4fal4iIlCbIkYGZ9QKmAl8EjgJGm9lRIfYlIiKlCzVM9DngT+7+lrs3Ao8BFwTal4iIlCjUMNF+wHt5j5cDn2/9IjMbB4wDcHey2WygOJ2XpiyFpD1j2vNB+jOmPR8oYzmkIV+oI4OKNp7LtX7C3evcfZi7D4vfk4o/ZrY46Qw9PWPa8/WEjGnPp4ypyVcWocpgOXBA3uP9gZWB9iUiIiUKNUz0P8BhZnYwsAK4CBgTaF8iIlKiIEcG7t4E/DPwK+DN6Cn/3xD7CqQu6QBFSHvGtOeD9GdMez5QxnJIRb6KXG6noXwREdnN6A5kERFRGYiISMDpKHoiMzsA+AmwL9AM1Ln7lGRT7Sy+w/tVYIW7n590ntbMbCAwHRhKdEnxFe7+UrKpdjCz64GxRNmWApe7+5aEMz0EnA+scfeh8XODgNnAQcA7gLl7Q8oy3gP8HdAI/Jno7/KDtOTL+9l44B5gb3evTyJfnKPNjGb2DaLzrE3AL9z9W92dTUcGn9YE3OjuRwInAdekdBqNbxKdmE+rKcBT7v4Z4LOkKKuZ7QdcCwyL/zH2IrraLWkzgJGtnrsJeMbdDwOeiR8naQY7Z3waGOruxwL/B0zs7lB5ZrBzvpYPeWcD73Z3oDbMoFVGMzudaIaGY939aOB7CeRSGeRz91XuviT+/iOiX2L7JZvq08xsf+A8ok/eqWNmewKnAg8CuHtjUp8U25EB+ppZBtiDFNwD4+6LgPWtnr4AmBl/PxP4UreGaqWtjO6+IL56EOBlonuKElHg7xDgB8C3aOPG1+5WIONVwN3uvjV+zZpuD4bKoCAzOwg4Hngl4Sit3Uf0P3Zz0kEK+CtgLfDvZvaamU03s35Jh2rh7iuIPnm9C6wCNrj7gmRTFfQX7r4Kog8qwD4J5+nIFcAvkw6Rz8xGEQ2n/i7pLO04HPgbM3vFzJ4zs79OIoTKoA1m1h94ArjO3T9MOk8LM2sZa1ycdJZ2ZIATgGnufjywieSHNz5hZjVEn7gPBrJAPzO7JNlUPZ+ZfZtomPWRpLO0MLM9gG8DtyadpQMZoIZoaPpfADezsk0zUSyVQStmVkVUBI+4+9yk87RyMjDKzN4hmgn2DDOblWyknSwHlrt7yxHVHKJySIuzgLfdfa27bwPmAsMTzlTIajMbAhB/TWT4oCNmdhnRSdGL3T3xoZg8hxCV/u/ifzP7A0vMbN9EU+1sOTDX3XPu/huio/7a7g6hq4nyxG38IPCmu9+bdJ7W3H0i8Qk6MxsBjHf3VH2qdff3zew9MzvC3ZcBZwK/TzpXnneBk+JPjZuJ8r2abKSCngQuA+6Ov85LNs7O4kWsJgCnufvHSefJ5+5LyRtaiwthWJJXExXwH8AZwEIzOxyoBro9o+5AzmNmpwDPE11u2DImP8nd5yeXqm15ZZDGS0uPIzrBXQ28RXS5YWKXRLZmZrcDFxINa7wGjG05eZdgpkeBEUSfCFcDtxH9knDgQKIS+wd3b+sEaZIZJwK9gXXxy1529yvTks/dH8z7+TskXAYF/g4fBh4CjiO6RHe8uz/b3dlUBiIionMGIiKiMhAREVQGIiKCykBERFAZiIgIus9AdjFmdhrR5XqVwHbgFnd/0cw2AEuAKqJpE7LAWe5+c/y+7wAL3X1h3rb2IJr+4/D4fXXuPpMuimdzPSOFNzOK6MhAdh1mVgvcDnzJ3UcQTey2Of7xUnc/HbiRaG6nYtwGPBdv6xTg7RIjDgT+vsRtiAShIwPZlZwLzGqZTyqeefa1Vq/5LcXPrDnc3SfE28oBiwDM7IdENwh9CFxMNKHhWe5+s5n9U/zehUQ3Eq0nmhLhAmAccLaZLSS6gWxt5/8TRcJQGciuJEt09zhmNga4muiO2PF5rzkVWNbVHcQzSvZz91PjCe6upPDMtjVEcyGNBr5MtPD5gWmbQkQENEwku5ZVRIWAu/8UuIQdE34dY2a/JiqIu4EtRNMotOjDjiGl9hxCdO4BojmNDuXT8+Tnzzb5e3dvBlYQDRGJpJaODGRXMh+YY2bu7hv49P/fLecMADCzRuB4M2v5QHQCMLnV9l40s4vd/ZF4EsOTieZaOif++TCipR43AEPi544BXo+/b10S24hWVhNJHR0ZyC4jHoO/HZhnZs8CDxCtad3Wa9cRTVW+iGhywjltTAJ3O3BaPMb/38Ah8RTDm83seWAM8GOiX/5ZM5sP7N1OxPeBQWY2J17fWCQ1NFGdiIjoyEBERFQGIiKCykBERFAZiIgIKgMREUFlICIiqAxERAT4f3FrZSn+QHfyAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Managed Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgUxfn4P9Vz7n3NshwLcsqpIqeoeKGAJsYrtOaXGDWRJH4TYqKJUZOowYSYaDwSCUaNoHjRKsYYLyAaFVEOEeQ+5L522QPYa86u3x/dOzt7XzPswtbneeaZ6e7q6rdrut+36q2qt4SUEoVCoVB0TbSOFkChUCgUHYcyAgqFQtGFUUZAoVAoujDKCCgUCkUXRhkBhUKh6MIoI6BQKBRdmJPeCAgh7hNCbO9oORRtp+5/KIS4UQgR7ohrtyOfC4QQUgiRb2/3tbfPbea8XUKI37T3+i2QL25lat/Xd+KRl6J5hBD/E0I83dbzT3ojkAiEEJlCiEeFEBuEEBVCiENCiNeEEEPame/TQoj/xUnMk5kFQK+OFqKVLAN6AAc6WpBGOBHLVBEHlBFoGz2AfsA9wCjg60Aq8L4QIivRFxdCuBN9jc6MlLJKSlnQ0XK0BillUEp5SEppdrQsDXEilqkiPpxURkAI4RFCzBFCHBVClAoh5gCeOmk0IcQsIcRhIUS5EOJlIcTPqpvCwuItIcRKIYQr5pwlQohPhBBOKeUmKeUVUsrXpJRbpJSrgP+HZRwabd4LIVxCiIeFEPuEEAEhxEEhxMv2sfuA7wPn281pKYS40T4mhRA/FUK8KIQ4Crxg7x9sy1puf94UQgyMud6NQoiwEOIcIcRqIUSlfV+j68h1sRBinRDCL4T4UghRLUOLmvRCiCuEEF/Y+R8RQqwQQpwZc3yAEOIVIUSJneZLIcTX7WNZQojnhRB7hBBVQogtQojbhRCiievVcl0cj/sUQvw/IcQO+9wlQoh+McfquYyEEOfaefe1t2u5gxq5xhlCiGX2NbYKIfQWyNXSez9LCPGRXcal9rPUrW4+bcj3Qrssq8v0wgZkzBNCzBPWO1dmv0fnxRz/lf3c9I3Zd68Qorip8opJe58QYrsQQhdCbLNl/ZcQIl0IcbX9TJUJIV4VQmTEnDdKCPGOEKLQfn9WCiGm1sl7lxBiphDiMfv5LRBCPCSEcMSkuURYLpkSYemeD4UQ4+rk008Iscgupz1CiB+LOm4cIYTTvpeddroNQogf1snnFCHEu/b/uEcIMaO58mkWKeVJ8wEeAQqBK4AhwEPAMWB7TJrbgHLgemCQvV0ChGPS5AL7gYfs7V/bafo0ce3+gAQmNJHmNmAfcAHQBxgL/Mw+loql3JcB3e1Pkn1MAsXADGAAcCqQBOwG/guMtj8fANsBt33ejYAJfARMtMtkEfAV4LTT9AIqgaeBYcAk4HP7mt9pQZl3B4LAHVito6FYBvG0mOMFwBIsAznA/n8uizn+K6wWVT/gO/b/c1PMNe6r8x/eWOf/Sth92teuAJba/9dYYDmwFhANyWfvO9fOu6+9fYG9nW9v97W3z7W3k7CeubeBM4AJwEpb5t80IV9L7r071nvwInCaLduXwMftLNOedtnMtcv0EjvfaJna97UReA0YAwzEep8CwFA7jQDeAz4FnPb1QsA3WvjeV/9HbwGnA+cDh215q8tzItZz+KeY8y4AbrBlPxX4PdazfGpMml1AKXAnlr64FghT+/m8Cphm5zEc6xkrAXJi7m8N1nMzDhhpy3UUeDomn3l2+U3GeheuBY4A34/JZ7X9XIy381ls/7dPt6SsGiy/jlLY8f4AKYAfmF5n/ypqK5D9wP110rwc+wLY+y60/+x77Qfy6iau7QDeBVYAWhPpHgPex1YeDRx/GvhfA/sl8M86+76PpSB8MfvygCrguzEvsgRGxaQ5y9432N7+g/2gO2LSTKXlRuBMYpRdA8fvBw4BKa34Lx8DFsds30fzRiAh92lfWwIDY/adau+7uCH57H2tNQI3Yxm/rJg8RthpmjMCzd37/ViVD3dMmjPsNOe1o0x/j1URccak+Tq1jcCN9rWddeR+H3g0ZrsbcBD4O7AXeKwVz8t9WO9q7LswG4gAuXWeq1XN5LUW+HXM9i7g33XSvAu81EQeGpbh+La9fUkDz1A2dqXE3u6HZXSH1MnrHmCN/ftiO59YI5WL9c632QicTO6gAViun2V19i+t/iGESMeqvXxWJ82ndTOTUn4A/AXrAXtaSrmwoYvazcLnsBTD1bJpn+9crJrYdiHEE0KIa0TL/fsr6mwPBzZKKYtiZC4AttjHoruxHuxq9tvfefb3MGCllDISk6ZeeTTBl1i1uPVCiNeFELcKIXrHHB8NLJNSVjR0srBcbXcKIdYIIYqEEOXAj4BTWiEDJPY+D0spo+4eKeVWoMjOM14MAzZJKUtjrrMeq7bYHM3d+3DgMyllMCbvtXbesc9Ka/MdBqyQUsaOKlpKbcZitUSOiBq3ZTlWzXxQjDyFwPeAW7BavXc0IVdD7I99F7AqHoeklIfr7It1geUKIf4uhNhsu6PKscqj7rO3pu61qCmDalfPfNsldQyrZp4Rk88woKjOM1SC9a5WMwarpr+qTjndTU05VeezNSafw3XyaTXO9pzcyaj2Ict2prESWsr9HKzaxEAhhJC26Y1J4wZewqpVXSCl3NdUnlLKNcLyJV+C1dJ4DLhfCHGWlPJYMyI1pEQbug9RZ79ZR/FVH9Ma2NdUvg0ipYwIIS7FetkvBq4BHhBCTJNS/qcF+d0O3IXlKlsNlAE/B77WUhlsEnqfDRDbZ2HW2QZwtSG/tsrTlntvbn9L8m1I5rrbGrAJy2VSl8o62+djvW95WEq0sAnZ6hJqQI6G9sWWyTwst+wdwE6sGvXLQN2KWbDOdt18/oNVKfgxVismiGUM3XXOaYrq/M6mfrlUn9ueZ6TZC58MbMcq/HPq7D+7+oeU8ijWEL0JddKc1UB+92HV7s/Bqs3+KvagECIZ+DeWdT5PSrmnJUJKKcullK9LKX+KZf2HYj382PI7Gj25NhuA4UIIX4xMebbMG1qYB1j+2rGxHV3UL58mkRYrpJSzpJTnAR8CN9mHPwfOEUKkNHL6ecC7Usp/Sim/sGtLgxpJ2x7ac5+5QogB1RtCiFOBHCzlBpay6lYn71GtlG8DMEwIkRlzneFYyrC9bAAmxLY6hRBn2Hm35llpKN/xde677sCIVVj9ZceklNvrfKLDZYUQFwO/AL6B5WJ6VojGBwfEifOAv0sp/y2lXIfljurfmgyEEDlYOuABKeV7UsqNWG7pbjHJNmI9Q7GDNrKw3tVqPre/+zRQTl/ZxzbY+QyKycdXJ59Wc9IYAdvd8ATweyHEN4Q1cubPWB1asfwF+JkQ4ttCiEFCiJ9hdcRELawQ4nys2ukNUsrlwHRgphDiLPt4GpYLZDBW540phOhuf5Iak1EI8Uv7usPtFsH3sGo+1c27ncAQ+7hPCOFpLC+sTr7DwAJ7lMNorFrMfqwx3y3l71g1rzlCiKHCGt3xB/tYS1pMZwshfiuEGC+E6COEmITVObcxJn8NeENYI036CSG+brcewGrKXiCsUSanCiF+j9XpFW/ac5+VwFwhxGghxBjgWWAdVmc3WB3yyVitugFCiGlYtcLW8CJWK+h5YY0SOgt4Bqt22l4eB9KBeUKIEcKaoDYfWCql/Lgd+c7B8kk/aZfpJGrKtJoXsJ7rt4QQk4U1SW68EOIuIcSVYLllbHkeklK+DXwLq/J2WztkawlbgG8LIU4TQozEatW3tBJWTSnWezjdfn4n2PnE/m9LsNxqzwkhxtoGeD5WP4bV6WdVfp4BnhJCXC+EGGg/B98TQlRXQP9r5/O8EGKcLfMLdj5t5qQxAjZ3Av/CKuAVQCZWB1Esj2K9FI8BX2C1Av6CZb0RQmQDz2N1TL0DIKV8Dcuf/5KwhpiNxqrx9MX6Uw7GfK5tQr5jWA/2p1hK5CrgGilltU/vn1g9/8uwHqxvNZaRlLIKy3gFsEZwfIjlMpoa6/ttDinlfqza19lYvs/HgOoZqv4WZHEUq0b9BrAN60F+AaszEinlQayyKsMaEbEBS1FU1/Lut2V/A6tcsoC/tlT+ltLO+zwIPIk1wuUTrBf8qmr3oP3/TQeuA9ZjGfe7WylfJXAZVgtjBVYZVo92axd2X9FkIB/r+fqPLec17cx3P3A51oiX6jK9rU4aP1ZLdxXWO7QVWGifs9uu7c/Dqv3/1j5nJ1a/0Czb6CaKm7B04AosvfEuVvm0GLsPcBpWn+SXWPfyKNYzU51GYr3rFcDHWOX/DpYRin32foD1n/8aqxL1X6zRSzti8rkS6537yM7nbSw3apupHuLWpRFCPAOcIaUc3WziLoCwxnB/CJxuN5NPSrrKfSo6H7Y3YR/WyK+/daQsJ1PHcIsQQvTEssofYLliLge+C/ykI+XqSIQQt2C1aA5g+TcfAZafbIqxq9ynovMhhPgGlttmE1Z/wb1YriCjI+WCLmgEsBT/NCw3hBerQ/kWKeVTHSpVx3IKVh9IHtYwusXYHeFCiLtpwrUhpUw9HgLGiUbvU9E5EUL0oaZ/qSF+KKV84XjJ0w6Sscb898VyC32ONUekw0N1KHeQoknsPpLsxo7Hjn1WKOKNEMKJpTgbo0BKWXacxDkpUUZAoVAoujAn2+gghUKhULQCZQQUCoWiC6OMgEKhUHRhlBFQKBSKLswJN0RU1/VnsMLVFhqGMaKZtKdgzWDNxYrv/R3DMJoM8qZQKBRdiROxJTAPKw58S3gIeM4wjNOBmcAfEyWUQqFQnIiccC0BwzA+0nW9b+w+XdcHYMUIysUK9jXdMIzNWLNCf24n+wArPohCoVAobE7ElkBDPAnMMAxjNFY42r/b+9dSEyTrKiBN1/WcDpBPoVAoOiUnXEugLrqup2JFhnxF16PrcleHYP4F8Liu6zdiRd3bTzvDrioUCsXJxAlvBLBaM0cMwxhZ94BhGAeAqyFqLK4xDKMly/UpFApFl+CEdwcZhnEM2Knr+jQAXdeFrutn2L99uq5X3+NdWCOFFAqFQmFzwsUO0nX9JeACwAcUYIVkfR9rlaMeWGu7vmwYxkxd17+JNSJIYrmDfmwYRqAj5FYoFIrOyAlnBBQKhUIRP054d5BCoVAo2s6J1jGsmi0KhULRNkRDO080I8CBAwc6WoQoPp+PoqKijhajUTq7fND5Zezs8oGSMR50dvmgfTL27Nmz0WPKHaRQKBRdGGUEFAqFogujjIBCoVB0YZQRUCgUii7MCdcx3BBSSioqKjjecx6qqqoIhztfKCIhBCkpKR0thkKhOAE4KYxARUUFHo8Hl8vV0aJ0CgKBAPv371eGQKFQNMtJ4Q6SUioDEIPH48Hv9/PKK69w7NixjhZHoVB0Yk4KI6Coj8PhwOFw8MUXX3S0KAqFohOjjMBJjBCiU/ZZKBSKzkOXNQJJCxfSbdw4euTn023cOJIWLoxb3iUlJVx//fVMnDiRiy++mJtvvpni4uJaaWbNmsUpp5zCww8/XGu/aZpMnz49eu51113Hrl276l3j4YcfplevXmzevDlucisUiq5HlzQCSQsXknHHHTj370dIiXP/fjLuuCNuhkAIwS233MLHH3/MkiVLOOWUU5g1a1b0+EMPPcTatWtZtmwZS5cuZfbs2bXOnzZtGh9++CFLlixhypQp3HHHHbWOr1u3jtWrV9OrV6+4yKtQKLouJ8XooFjS77kH18aNTaZxf/45IhistU+rqiLz9ttJfvHFRs8LDRvGsZkzm5UhKyuLs88+O7o9atQonnvuOQBmz57NV199xfz583G73bz44ovMmDGDp556iunTp6NpGpMnT46eO3r0aJ5++unodiAQ4O6772b27NlMmzatWVkUCoWiKRJiBHRdfwb4OlBoGMaIJtKNBT4DrjUM49VEyNIgdQxAs/vbgWmaPPfcc1HF/uMf/7jWca/Xy1NPPdXo+XPnzuWSSy6Jbj/00ENcc8019OnTJ+6yKhSKrkeiWgLzgMeB5xpLoOu6A/gT8F48L9ySmnq3ceNw7t9fb3+kVy+KX42vLfrNb35DSkoKN910U6vPnTNnDtu2beOVV14BYNWqVaxZs4a77747rjIqFIquS0L6BAzD+AgoaSbZDOA1oDARMjRF2Z13YiYl1dpnJiVRduedcb3OzJkz2blzJ3PmzEHTWlfUc+fO5fXXX2f+/Pkk2bJ+9tlnfPXVV5x11lmMHz+egwcP8u1vf5sPP/wwrnIrFIquQ4f0Cei63gu4CrgIGNtM2h8APwAwDAOfz1cvTVVVVauuX3X11QCkPfAAjgMHiPTsSdmdd0b3x4MHHniAL7/8kvnz5+PxeFp17vPPP8/zzz+PYRhkZWVF9//kJz/hJz/5SXR7/PjxPPvsswwZMqReHpqmoWkaqampDZZZZ8HpdCr52omSsf10dvkgcTJ2VMfwo8CvDMOI6LreZELDMJ4EnrQ3ZUOLKrRlLHzV1VfHVenHsmXLFv72t7/Rv39/vvGNbwDQp08f/vnPfzZ7bnl5OXfeeSf5+flcd911gDUD+D//+U+rZDBNE9M0KS8v79SLZXT2xTw6u3ygZIwHnV0+SNyiMh1lBMYAL9sGwAdcput62DCMf3WQPHFl8ODB7G+gz6ElpKamsm/fvhalXb58eZuuoVAoFNV0iBEwDKNf9W9d1+cB/zlZDIBCoVCcSCRqiOhLwAWAT9f1fcC9gAvAMIwnEnFNhUKhULSehBgBwzC+1Yq0NyZCBoVCoVA0T5cMG6FQKBQKC2UEFAqFogujjIBCoVB0YZQRUCgUii7MSRdFtCX4dkzGHdhQb3/QM5yi/ovanX9JSQm33noru3btwuPx0LdvX/70pz+Rk5MTTTNr1iz+8Y9/cOutt3LbbbdF95umyQ9/+EM2b96Mx+PB5/PxwAMP0LdvX8CaJezxeKKzkH/9619zwQUXtFtmhULRNemSLYFg0mgk7lr7JG6CSWPikn+i1xN48sknWbx4MYsXL1YGQKFQtIuTriWQfugeXIGm1xNABoFQnZ1hXIH15Oz+ZqOnhTzDONa9Y9cTUCgUinhy0hmBFiHcRBzdcEQKEUgkgogjF4S7+XNbSbzXEwCiQeTGjh3LnXfeSUZGRpylVigUXYWTzgi0pKYOoIUKyPtqAsgACA9F/d/FdHaLuzzxXE8AYOHChfTq1YtAIMC9997Lb37zG/72t7/FU2SFQtGF6JJ9AgCmK4+KjGuRCCoyrk2IAYj3egJAdF1hj8fDDTfcwMqVK+Mqs0Kh6FqcdC2B1lDu+xmuwFbKc38W97wTsZ5AZWUl4XCY9PR0pJS88cYbDB8+PN6iKxSKLkSXNgKmK4/ivq/FPd9ErSdw+PBhpk+fjmmaRCIRBg0aVGvUkUKhULSWLm0EEkWi1hM45ZRTWLSo/fMYFAqFopou2yegUCgUCmUEFAqFokujjIBCoVB0YZQRUCgUii6MMgIKhULRhVFGQKFQKLowXdoIVGoFvJNzDZVaYVzzLSkp4frrr2fixIlcfPHF3HzzzRQXF9dKM2vWLE455RQefvjhWvtN02T69OnRc6+77jp27doVPe73+7nzzjs555xzmDRpUr0IowqFQtEaEjJPQNf1Z4CvA4WGYYxo4Pi3gV/Zm+XALYZhrE2ELE2xNvVRCtzLWZv6KBOOxW/SVXUo6epIovfffz+zZs3iL3/5C1A7lPSMGTPweDy1AstNmzaNiy++GE3TmDt3LnfccQeGYQDwhz/8AY/Hw9KlSxFCcPjw4bjJrVAouh6Jmiw2D3gceK6R4zuB8w3DKNV1/VLgSWB8PC68PP0eSlzNhJIGIgQpcq8GIdmSMp9i13ocNB1FNDs0jPHHOi6UdEVFBa+++iqrVq1CCAFAbm5us/IoFApFYyTECBiG8ZGu632bOL4sZvMzID8RcjRFhSN2Vq6kwrGP9Ej/uF8nnqGkd+3aRVZWFg8//DDLli0jJSWFO+64g3HjxsVdboVC0TXoDGEjvg+809hBXdd/APwAwDAMfD5fvTRVVVXR3y2pqVdqBbyWNwGEtHYISdBxlPOL/k6yGd9oovEMJR2JRNi9ezcjRozgt7/9LatXr+bGG2/kk08+IS0trda5mqahaRqpqakNlllnwel0KvnaiZKx/XR2+SBxMnaoEdB1/UIsI3BuY2kMw3gSy10EIIuKiuqlCYfDrbru2tRHkcha+yRm3PsGqkNJz5s3r82hpBcsWBANJZ2fn4/T6eTKK68ELDdTdnY2O3bs4Iwzzqh1vmmamKZJeXk5DZVZZ8Hn8yn52omSsf10dvmgfTL27Nmz0WMdNjpI1/XTgaeBKwzDKG4ufTwpdH+OKYK19pkiSKF7VdyuUR1K+plnnmlzKOmXXnqpVijp7Oxszj77bD766CMAvvrqK4qKiqKL0CsUCkVr6ZCWgK7rfYCFwPWGYWw93te/oiixkTgTFUoaLONy++23M3PmTJxOJ3/961/V8pIKhaLNJGqI6EvABYBP1/V9wL2AC8AwjCeAe4Ac4O+6rgOEDcMYkwhZOoJEhZIGK5z0q6++2lbRFAqFohaJGh30rWaO3wzcnIhrKxQKhaLldOkZwwqFQtHVUUZAoVAoujDKCCgUCkUXRhkBhUKh6MIoI6BQKBRdGGUEFAqFogujjABQoJVwV/o/mOy7LS75JWo9gb1793LJJZdEP+PHj2f48OFxkVmhUHRNOkMAuQ6jQCvh0dRXWJDyARKToGhdDKLGSNR6Ar1792bx4sXRdPfccw+RSCQuMisUiq7JSWcE7kn/Jxtdu5pMEyTEPsdhCh1HAIkUNce+mfPbRs8bFurLzGPfb1aGRK0nUOsegkFef/11XnzxxWblUSgUisY46YxAS9jq2keZqATRfNr2Es/1BGJZtGgR3bt357TTTouvwAqFoktx0hmBltTUC7VS2w30PmYdN9CrxffHVZ54ricQy4IFC6IB5hQKhaKtdMmO4W5mFrOO/YBPC+bwrYqL8Uo3bhl/e1i9nsCcOXPavJ7A/Pnzo+sJVHPo0CE+/fRTrrrqqniKq1AouiBd0ghUU9cYDA/2jVveiVhPoBrDMJg0aRLZ2dnxElehUHRRTjp3UFuoNgbxIpHrCYBlBO6/P75uK4VC0fl4fGQeFYcdMXusFcJSciP8ZE1BXK6hjEACSOR6AgBLly5tU94KheLEouKwA4HkAoq5hCIW4eNDcuoYhvahjIBCoVAkADMM4aDADFrfkaAgEsT+rvkdbmR/JCjIJcCN7CWXIB4kl1HIGI7yLPlxk1MZAYVCccIiJZghiIQEkUAjyjbmmBkShAPU2h8JCrxujWOlqUQCwk4H4UD1+Xb6QM3vcMBOU32NmGPV15dm28agOzBJIUIyEW5jB17MaOetB0lP/NzKTspIj0sZKiOgUNThePhhTzSkxFKYIWErQSyFGhS19tdKE8RWjoIkl8bRkpSadDFKOhwEs5kacbih/SFL+caPdIRD4nRLNDc43RKHW+JwY3/X/PakmzjdoLkkTo+93yVxxP52mSRJk6RwBG/QxBuK4AmauP0mHn8Ed5WJq9LEVWHiKpc4y00cxySOKtmklA7gIB5S43TXyggoFHVozN8aTz9sY5gRGnUZ1Kqdxhzb69E4UpxU41oI2cq3kd9RZWsr0ZbUds1QPJRtRvSX5rKVrQtbiTagcF3gSjFr0rltJeuy0zTwO6q4PbYirvPb6YnJK+Z6eT1zKD1WhNbQXywlohK0UhOtVCJKJVqpjG5rR+ztw2bN71KJOCoRjehzqYGZKZCZAjNLYPbTCGc5CVZvZ2mYWYLPf+TlPErwUJORH43lZDGJyjj8J4lbaP4Z4OtAoWEYIxo4LoDHgMuASuBGwzBWJ0IWhSKefPlyUrR2W10jjtZKY2q+kSC2Em2kFltXwdt+Yxlpq7KtP5QYsGum0laC2EpQ4nTXHHN6JZ70OgoyVsHWqw3beTVRU65bm87tns3R8uKochcdNTg9KGsUd4EZVdieYAkZ+ytrK/UYJS+CjWdpphJV2maWINxbQ8Zsm5k1St20lbxMF6A1/18v+3U25xSXQowRMIEdOcmd2wgA84DHgecaOX4pMMj+jAfm2N8KRafmndvrK1uhNVDjdINmK0OHrXDdKSaOLFqsSC33AnZNWOL0NKxsc7plUlZZWusch1uiOUEch9AoLSHVB/7GqsVtwbRq2lppzOeIGfNb1q65V29XNJZhJakeaint8ABHPSUuG1DquBJXyNO/LOIIGRwBfD4fRUVF1n6K4naNhBgBwzA+0nW9bxNJrgCeMwxDAp/pup6p63oPwzAOJkKeukz2pTA6GOZn5UHyzDg+mDYlJSXceuut7Nq1C4/HQ9++ffnTn/5ETk5ONM2sWbP4xz/+wa233sptt9WEsDZNkx/+8Ids3rwZj8eDz+fjgQceoG/fvgAsXryYBx98ECklUkpuu+02LrvssrjfQ1dEmrDlbW+TaW5ZUVBf2SbeS9QkPh9oRSdoNNmmXC1HYpV8K10tGbbCzhKY3TXCQ0RNzTxLi9bILcWukTXQR1FlceexmseRjuoT6AXsjdneZ++rZwR0Xf8B8AOwJkn5fL56mVVVVbXq4hvcDra5NBakuLm2Ihh3Y5CoUNJSSm699VYWLlzIkCFD2LhxI1deeSVTp06tF5ZC0zQ0TSM1NbXBMussOJ3ODpdPStj0tuC93zk4sLZpP0XPoVm4ndDKKCAJpTOUIQBBCcURRGkEik0oiSBKIlAcwXGkhLwie7vEtNLZv0Ww8XdPpmmQrSFzHJDlQA7UkNkO+6NBjvWbmG0ytFquFo3mQyM4nU58KbnxKYcEkaj/uaOMQEPmtsEnwTCMJ4Enq9NUN4diCYdrAsDdk+5ho6v5qlnQtvjzU9zMT3HTLSLJj5i4mzhnWCjCzGOBZvNOZChpIQRlZWUAHDt2jG7dujUYl8g0TUzTpLy8nIbKrLMQ28Q9nlQKP4dEKVuWwdY/noJ/lQ/Zv4SSp9/D++PrSGlg1EmFBzK+Yz0hTk3ickrcDnA5JS4HuJ0St1PicoLbIevvt3+77HPcDonbSa18au2rlWBBwPgAACAASURBVNY+t9Y1rbR5uZmUHyupl5dTa2PFtq6rJca10jZXC0i3gCyIVNfE+wrMkQ7MLGeNq6VOp6iZIcDd2A1IIGJ/YogAJa2/5Y56DltDe2Ts2bNno8c6ygjsA3rHbOcDBzpCEGm/JQUOqNQ0RoTMuOYfz1DSQgieeOIJbrrpJpKTk6moqODZZ5+Nq7wnOlUEKHSUUuAo5ZBWQoGjhEKtlEOOUgocJRRoJRQ4Skn9dCATfvND8v83mrLeh1jx9wfZPjCI+Pgy/FOb0JzXzyY1mE6PQHe6BbvhC/hIC2YQDguCEUEoLAhFIBgWBO3fRys1QhEIhQUBe18omh5CEStt++heb4/AJAtJXiRMbjhMt3CE3FAYXzhCTihCdiBCVjBClj9CRiBCpt8kvSpCqt9Ea6RybgqoStGoSNXwp2kE0gSBgRqhdI1QuiCcqRHJFERshU62hswW+HpnUll+JGq4XI4ag9aZWlVdkY4yAv8GfqLr+stYHcJH49Uf0JKaeq+eNZMs3FKiQdQt1C3OfQTxDCUdDod5/PHHmTt3LmPHjmXlypXccsst/O9//yMlJSWucnc2AoQojFXsjlIKtFIOOUps5V5KoaOUI1p5vXPd0km3SBZ5ZjanrZjA5JlX4Fw0CK1bFa5bd7E1N4Ud/7udwHsavXLC7Nekpe3q4gpz1aVDOKRtZZvrNbY5tgPlJJteRoVOZVxwCGODQxgVHEyqTKp/fhNISYxxsL9jfof9EkokohREqYmjVOI4InEdNUmpckGRH3eZifeYibfcJKncJLnCxNnEYnkVbkGZ18GRJAdHPA725rgodTso9jgodjkpcjkocjoodDkpcDg5qDk44nREK071KLM/exs+DHkN7u0Mrapu5YKKMqedrn5ebW5VxYFPtrj55V/CPPvZtXx3whM8dJuDcwY3MVyplSRqiOhLwAWAT9f1fcC9gAvAMIwngLexhoduxxoi2noN2U4SrfyhJpT0vHnz2hxKesGCBdFQ0hs2bKCgoICxY8cCMHbsWJKTk9m2bRsjR46Mu/zHgyAh9jsOU6CVRpV5XcV+SCuh1FFW71yndNjKPYsB4Z6cHRxBXiTL+pjZ5EWsT5ZMpWizi6UPpbH1nSTcGSb+KwO8lupm/66+eApM8s4OEbowxP4RwAYH2kwPP99Ywp3bivjjqT4eHZqNeU+I15PHY9VbrgfAJSM4OMZqVyFL3ftAHETIDfSKCE4PZjAhmMdF/r70NW1fboJcLWaWPbIlW8PsLzCznFRlCWRmzFDFLIGZGTOqxXa1pNufPjU5AiH7U5uICUG79WK1bGp+B2NaNTW/rW9vUjpFpWV2Wmq1mgIhUWe/3ZKq01KqblUFY1pboQYMZqjNw2y7NXpEiPpuuOrvasNRY7BsY2X/ttLZhstVd3/NcXedtC6nZPshJ39+M51HNtzFucXL+dX6R7hhziyevaUkboZASBl/5ZdA5IED9b1GZWVlpKWltTiTyb4UxtijgxKh/MEKJb1q1aoG1wNojueff565c+diGEatEUWFhYVMnDiRt956i4EDB7Jt2zauuOIKPvnkk3ohp3fs3M7r2+9lmHMal154ZVzuqTWEiXBYO2Ip85gafFSx266ZYsexeuc6pEaumUl3W4nnmVl0i2TRParYrd9ZZhpaM11+JTscfPJwGhv/lYTphY0jTZZmOQh6JJwZgQvDMDZMb4fJaaEII0IRFhY4MC4vYtD2EKkRSblDsGugyc9fe4H7fT/lkEPjoIASv6D8KFQdhXCpiXnEqqFnlEpyis3oJ7skQk5xGF+xSWYpjbpapKgzgajO2PJa29kiOhY9p7ePouLiePxtCeN4+txjW1W1jFQDBqP625ucTlFJWb20oQiWC6+W4bGMU6COEarrBmzsmqGIJZeUzRuryjf7kmTW925UaR5K9y5rcZnYfQINXrBLGoFEs2XLFi666CL69++P12sNOWxNKOkhQ4aQn58fvafYUNILFy5k9uzZCLtt+otf/IKpU6fWy2fz7tW8tO1n5CedwfRz/havWyNChCLtaG2fe6xrxva5F2lHkXXG8GlSI9fMqKXY+3vySSv3Woo9kk2emU22mYaDto27lMA+h+DzQy42PZgKL3uIaLC2H3w+QBAYHCHnvCBjxwcYkxJmhK34s2JEdY48SnaxidOsnS9OCA/QokMVm5pAFE4Bf7ZGaY7JoZwQB3MiHMiBQp+L4pwUSrJTKM5xUJyjUZKtUZyj4U8X5CHpHrE/pmn/tr57mCbdIrLe4IWTvVPzeNAR8lmtqvqtpqjxCgu+P9PkofW/48pD75ISqaLCkcTrPS7lF8PvZfUzTfj66tCUEVBhIxJAIkNJX3311Vx99dXN5hMSFYBkj2cV1/Q4n72kc1/Qgdf04TVzSDJ9eCM+69v04TazqZRuSkWEAseRRn3uhdoRTFG781xIgc/MiLphTg8NsN0xWeRFsuluZtMtkoXPzMBZR7n7fD6KKtv28pnADofGBpfGOreDdS4H2wudDP6lhxELnEgJ6/oK1o8xGXaRnwfGVXFZ9wCp1Qq/ERfLrqFOun1UW8MLwEyGcP+aCURNzQqtdrU4sMY+9wIkkr2OQla6V/Cpeytb3AXscAZB+BAyF5/ZH2H2oUjLZa8jjSKHm0ADjmifbRSqjcQALUJ6kquW0ciQx2UJbUU7cGiQ5JYkuaGRwZGUpvXgmCsNb8RPlebBG/FzzJnGkbRcGhhR3yaUEThJMYEtZLKbJPYTIiIEG4VGoWsfxdo2SkWEY8JJGW7KcFGGKyZWYQ3pUiPbdOOTyYwKZ5AX6UePSDd6RXqRH+5Dz3B3cs1MXAl+lMLANqfGOpeD9S7re4PLQYU9HjxtA0z4lZer33UgTMGWfpK0a/z8aHIl5w4O4Ky2PS1o+A76phPz82AtP7yZAkd/n0zVNa1bJS4WgaBPJI8+VXlcU3U+AEdEOZ+7t7DSvZlV7v/whXsbfruJ0Tucx+nBMxkYPp08cxBO8ijQNMsd5RAccGisdglKHBHIqu1y9JqS7mZNK6KlrQpF52L+/xXjuv4wT/T9Lk/2vZ4f7JpPr1Ah8/8vfu4/5Q46Sfl4z3Lu3voIgUzJ/m/U7+DLiqSRa6bhM1PIkW4ypUYGJukESaGKJHEMr1ZCWCumSjtMuJGeSaeZXK914TVz8Jo+ksxcvJGcaGvDY2ah1TEWDTXD/cAWW9FbSt/BJpcWrRUnm5LhoQiDj5pEljipeDyFniscOCNQMizCGT8u54oplaQlte3ZFsckeeOPoh2LideSLihYnmHFfEkgQUKsd+1kpXszK92bWOneTJHjKAAZZgqjg4MZGxzC2OBQRgYHkoSHNF8OG0pLLOOgCQ45NA45hG0warZb0qroHjFrG4+ISWYcWhXKHdR2Ptni5vq/5xAICTwuyfz/K251p7ByB3VBLPUliK36/uvwLLqb2eRGMvG2sg4YFlX4tWL8WhFVWpH121FUa7vcsZ8i15f4tWKkaMBfKQUeM8s2Cpah8Dr6syNjALvFQHY48tnu8LHDkUrYVlgZpmREKMKNFUFOC0UY6jc5sMHF6x8lsX2hl9O3auSGQBsT4tLfHGXE2PaPmJDpgkObMoHjrxzcuBgVOpVRoVP5YcU3kEh2OQ6xwr2JVe7NrHRv5v10K9aiSzoZEerHeY4zOc3Vl7HBIYwzMxu+J6BUiFpG4ZBtMGq3Kuq3BlWromM5Z3CQHY8dTNizqIzASYoAtDq+j7GhIW3OzymTSI3kkxppfkUjiUlQHKXKYRmN6s9hRwWbnMmsdmaz3dmD3Vp/CumLTLEUTwqF9ORTzmEtPVlDvtxET7OCJOHj8IGxvPvhN7jvw/PovSaTsdsl+QFB5rmlTLn3IL2HJeM4CdWQQNAv0oN+VT24tuoiAEpEGavcm6NG4QntXwSyLePXN9wjOl9hbHAoA8O9EAgEkC0l2WHJsHDjEyIDQEG1odBiDYb1/YXbwSGHs9lWRV5E0iOmVTFUmHgEcWlVKOKLMgInKfmBIKOPVbAiw4vblAQ1gRY6iOnqkfBrCzSOiWzWO3yscw2LunT2OGtqmT3DJiP9Eca7XPQ9WsKgyGHSKSTg8OPXcqjSzuBQ+RCWfDqMD5eezb4dgxixN8LVO4J4j4Hr4v+S9vtf4xq/gv/ZebrNDKuFEe3wzsFr5pIUsVsd9icpkoNbZiCaGV5aqRXwsvNaztb+RrLZ+Bjy4022TGNyYCyTA9Z8kTRfOu8fXcEq92ZWuDex2LMKI/kDwHL7jQlZLqRxwaGcFhzQZCvQA/SJSPpEGg9I17ZWRRh6pKtWRSdE9QmcpOzZ+jFbP7wbLd3kEz2Fz9IFX6xYT9B7Jv60yfjTphJ2D2r3NEgJHNAE6+3ROetcGutdDg7FKIC+4Qinhaxx+NZYfJNse35G3SZuVRAWfenl1eXJfLjJgxkWTK4KM3StwDzsIH9cgAm/KsB3zl7LFeUoxq8dtt1SNS4qv1ZMlVZEQCuloXCTQjptI5FDUsQ2DtWGwu7X2J68gN3edxhc8V0mHJvVrnJKJHXLUCL5ynGAlZ5Ndt/CZnY4rffGLZ2cHhrAuMBQxgaHMCY0hGwzPssU1iUAFNqGoiIzna0VFbVaFU31VeTU6ZvokaC+imo6c59ANXGIHaTmCXQldm9fzbYPfkZSzkjOueoxnMHteMvew1v2Lm7/FwCEXX3xp03FnzaVYNIoEE2PzTeB3Q4RrdlXj9SprvFpUjIobNpj7y2lPzwUIb2JR8zn83H4cBErvnLz6vIk3vw8iTK/Ro+MCNc4g6QvcVG+x0n3M4JMvKOMfucHWmW3TMIEtFLbSBzG7yi2+zBq+jWifRxaEWGtgYU6pGBIxY0MqLoaX2hksy2I401LlEORdoRV9iikFe5NrHPtIGT32wwM9bLdR5YLqV+kh+1ASryMzbUqCmy3VHFDfRVS1mpFNNS5ndfCVoUyAq1A13VhrwPQEcTNCMSjx70xErmewJIlS3jwwQcJh8NkZmbyyCOP0KdPn7oisHPHdrYvvRdnj2lceEntGcNa6BDe8sV4y97DU7EUQYiIIwd/6mT8aZMJpEwkrCXxlVOL1u432Iq/zB6S6ZKSwaGaWbanhUyGhSO0ZkDOzkIH76z3Mf8DyZ5iJ8kek6+N9HNhJEzRC0kUbXbhGxJi4i/LGDTFf1xit4REJX6tiJXpM9nrXYQUEWsqL1a1MymSR2//xfTxT6VH4BwctH3IaLxoi3KoIsCX7q9YYY9A+ty9JRp3KSeSXssonBbqj9uK+nJcZYwltlVxsIG+iva2Kgbm5FBcdPLOuo63EXjEMIyft0mS9hMXI/DJFjc3zMmmKlhTu0hym3GLx1FaWsqmTZtqrSdw5MiRWusJrFy5kocffpgZM2YwadKkaHRR0zRZsmRJrfUE3nnnHQzD4MiRI5x77rm88cYbDBgwgNdee42FCxfywgsv1JNh586dbN26FafTyaRJkxqVVUTKEBUfsiOykQ2OKtakDmZ12ijWpp5GlcNScF5TMizWpROMcGq46bDbjXGkUvDm50m8ujyZVTvcCCGZODjANeOqGF5msuKRNA6tdZPdP8w5vzjG0Mv9x30pwkqtgNfyJhARNdP1HdLDmKO/5ZDnU/Z7/kdYq8BpptArcAF9/FPI90/CIxsemZNo4lGLNTHZ5twXdR+tdG9it7MAAK90c0ZwYNQwjAkOIVO2bpnz41HTbqpVcSims7vhVgW2Uajdud09YtKjla2KRNFpjEAH06wRuOeVdDbua7rW8tl2d4NxO4SQnDWwcSMwLD/EzGn1Y900x1tvvcVzzz3HggULmD17NuvXr+exxx7D7Xbj9/uZMWMG48aNY/r06fXO/fLLL7nlllv45JNPWLNmDT//+c/54AOr06+0tJQRI0awbt06srOza523fedOvmrACFQJ2Ois8d2vcznY4tII2TWotEiIMyp3Mfrox4w5uoyRZevpJzMJp03GnzqFiLs3rSUUgQ82eHh1eTKL13kJhgWn9ggxbXwV35vi5fCnx/joz2nsW+4hPT/MObeVMeKaKrQOGrbwafpdbEt5GTMmLoQm3Qyq+BYTjs0ijJ9Dnk/Y432Pvd7FVDkKEdJB9+BZ9PZPoY9/SotGUcWLRCnYAq2EVe4t0eGp6107CQurw3hwqDdjbKMwLjiUPpG8Jl1Incnd0lCr4mhqMjsDgU7XVxFLooxAs6+Zruv/NAzj+/ZvATxlGMbNbZKkk9BY4KaWBHRqLfFcT6B///4UFhayZs0aRo4cyeuvvw7A/v376xmBgw6N/2gmg1waX6W4o0p/u1PDtB/urIjJ6SGTH5QHbZdOhFMiEo08ENfg8g7CG3oPd/l7pBTcS0bBvYQ8w/CnTaUqbQphz/BGO5alhPV7XbyyPIl/rUyiuNxBTmqE6ydWMG18FSN6hzj4hYu3r09l2399pHaPMHnWEU7/ViWODh4aUuj+vJYBADBFkEL3KgCceMkPTCI/MAl51KTItcY2CItYkXEPKzLuISs0jD7+qfSpmkJ2eHjcfezHgzwzm6/5J/A1/wTAWohnjWt71Cj8O+kTXkhZDEC3SCZjg0OjrYXhoX4Jn0XeVjxA74ikd8wIKF9SOkVHjkS3Y1sVsUNmD8a0Kta4mu6ryGugr6K1rYrYpXATtXZcS/6l/tU/DMOQuq4PSJAscaElNfX+t/YgEKr/Unpckld/Hl+/YDzXE0hPT2fOnDncd999BAIBLrzwQjIyMnA66/+NElgjJMuT3ezP8JIbMTkjFOFr/rDlxw9G6GnKxlWTEISSziCUdAZl3e7AEdxldyy/R2rRo6QVPUzY2cvuWJ5MMHk8CBcHj2i8viKZV5cnseWgC7dTMvl0P98cX8kFwwK4HFC4wcnCm7LZvthLik9y4T1HOfO7FbhaF2w1YVxRtCj6u7nal0AjNzSK3NAoRpfdxTHHDvZ4F7HH+x5rUx9hbdrDpIR70ds/mT7+yXQPTkBrp3+9o0iWXs4OjuDs4AjACia4xbnXDnlhdTi/lfQpAEmmhzNDg6JGYTJnN5V1p6P2vAqot4KZTXN9FWuamFfRXKsiLyJrLYV7gxnmR5qI+7rozbqDdF1/BXgPWAZMAC41DOObcZWi5ZwQfQLVzJw5k02bNjFv3jw8ntZ1IM6dO5eXXnqJBQsW1AsTXc3hw4cZP34869evJzk5udaxj/bs5tdbNxDIzODA5VMZH4zwWnEDI1/agBYuxlO+BG/Zu3grPqIyqPHazu/w7Nb/44NdpyOlYOyAAN8cX8XXR1WRmWw9Y8XbnSx9KI3NbybhyTAZ/6NyLvmll/JA53ATNER7muBVWhH7PEvY432PA56PiGh+3GYG+f6L6O2fTK/Ahbhl+0e1dSZXy0GtuFbIiw2uXZjCREjBkHCfqPtobHAIvSK5naaFlMgylMARQb2+iYMt6KuIxSHBiWzTuujt6hPQdT0Za6H3QcAW4GnDMOKjTVrPCTE6CBKzngBYawp069YN0zT55S9/SUpKCjNnzqyXx0d7dnPP1g2EMzM476KL4r52gmnCsm1uXvvMzVtfJFERdNEvfRffHTKPbw8x6N2rJ/7UqfjTLqFkf3c+eSSNDa8l4UqSjL65gnE/LMebITuVAmuIeMkXFlUc8HxkuY08iwk4StCkm+6Bs+njn0If/2SSzfpLRB5PGRNBuahitWsrGzP38GF4NZ+7t1Ch+QHoHsmOGoSxwSEMDfWtF2X2eNEZyrChVsXvMrz10gkpW12pa68RcADTgFxgDnCmYRgrW3z1+HJCzBNI5HoCv/jFL1i5ciWhUIjzzjuP++67L3qNWD7es5vXtmzkAqeHKy+8KG73tv2Qk1eWJ7FwRRIHSp2keU0uH13FtPFVjO1fiadqJd5yy21UfiDCey/8ls/e+T6aUzL6uwWMm+EkOacmbEFnePmaIhHymUQ47F7FHu977PG+R5lzl3Wt4EjbbTSFzPDgFteSO3sZQo2MYSJsdu625ivYk9kOOiwXbIq9TOfY4BDGBoYyKnRqq5fpbK98nY3aS+FaoWDashpie43AS8CHwHcNwzhb1/UlhmFc3OKrx5cTwgh0BhobHdQWSso13lhlzeJds9uNQ5OcPzTAN8dXMvl0vx0PvYaKwxqfPZ7KF88lgSkZ/41XmKrfTobvICH3IPxpU/CnTSHkHYkvt1unfPmqSbRykEiOOLey1/see7yLKHJbE/nSwn3p459Mb/8UugXHojVRQ+6sCiyWpmTc7zgcna+w0r2ZTc7dSCHRpMaw0CmMCw6NjkTqaSame7SzlmGvnunRpXBvMB386PDRNrXo2xtFNNcwjCd0XddbfWVFh9HeRnUgBP9d7+XV5Un8d72XsCkYnh/i3muOcuWYKrpl1A9CVlUqWPFEKp//M4VwUDBiWiXn/KycjN7n4w+9gSxbRFLZu6QWP0Fa8eNEnHlw5HI8rgsIJJ8NWsdPvDreCARZ4cFklQ/m9PKfUqkdYq93MXu877EpZR4bUp/EE8mid+ASevun0CtwPs7jVDs+XvSK5HJVVS5XVZ0HwDFRwWr31ujs5peS/8szqW8DkB/Ojc5VGBccyuBw7zavQnciMDwYiS6FOyw7h6IELIfbEiNQqOv6tUCSrutXEa/lbBSdDilh9S4Xr36WzL8/T+JIpUa39Ag3X1TBNeMqGZbf8HJ2gTLBqqdTWPGPVILlgmFXVnHOz8vIHlAzoiLi6kVl9k1UZt+EiBzBW/6+Ndro8MvkRJ7G1FIJpFxojTZKvRDpyDhet92pSDa7M7jyegZXXk9IlLPf80F0tNH2ZAOH9NIzMJHe/in09l9CUoJqxh1JukzhgsCZXBA4E4AQYTa6dkVbC8s863k9+WMA0sxkRgdPjQ5PPTM0iGRZ3z16orKoqJHl7+JIS4zA94CbgdVAPlB/RlMD6Lo+FXgMq1L6tGEYD9Q53gd4Fsi009xpGMbbLRddES/2Fjt4bYU1i3dnoROvy+TSkX6+Ob6q9qpcdQhVCVbPS2b57FSqSh2cemkV595eRu7Qptc+lY5MqjKupirjanzZqZTtecMefrqIpLI3kTgJpJyNP3UK/rTJmK6eCbjrzo9LptLXfzl9/ZdjEuKQ+zP22gZhb+ZikIJuwTEM0a4ix3EuGZFOPXq7zbhwckZoIGeEBjK94vLoMp01LqRNPJj+EgBO6WBEqF/UfTQ2OIQ8M7uZK3RtWmIEhhiG8biu692AG4G+wOamTrA7k2cDlwD7gJW6rv/bMIyNMcl+AxiGYczRdX0Y8LadtyIOOA4exPGHP5CWlETS0aNU1VmXuKxK8PYaL698lsyn2yw3zIRBAWZMKeOykf4mV+UKB2DtCyl8+tdUKg476Hehn4m/LKHHGfVXMGsWzUsgdRKB1Ekc7f4ArqrVeMsX4S17l8yCX0PBrwl6z7Ain6ZOJewZ3O7IpyciGi56BifSMziRccdmUuLcYBmEpHf5yHk35EFGaBB9/FPo7Z9MbujMThfoLl7ELtP5zaoLgJplOqsnsj2fsoinU63BFKeE86Luo7HBIQwK56OdpGXTFlpiBP4CTAJmYnUQz8WaL9AU44DthmHsANB1/WXgCiDWCEiguus7A6jf46toM7vdvfjdqfcgZQpP/fEpRgFlV1zN0i0eXl2exDtrvPhDGv27hbnj8mNcM66K/JzGY8gDREKw/pVkPnkklbIDTnpPCHDlk6Xkj4vT0FqhEUoeQyh5DGXd7sYZsCOflr9L+uEHST/8oB35dLId+XRMs5FPT0YEgpzwCHLKRzCy/DZcvgq+rFrAHu97rE+dw7q0x0mKdKO3/xJ6+yfTI3AuTk4eF0lDZMpUJgVGMykwGqhZprPaKHzoWcNryR9aac3UmGU6h3BGcGBHit7htMQIJOu67gE8hmG8pOv6j1pwTi9gb8z2PmB8nTT3AYt0XZ8BpAANjjjSdf0HWPMUMAwDn6++D7SqqqoFInUtqucCBzUXXx/1OpcbS1j6eQ8OlgqyUiQ3XGjynfNDjB0oEcILTSgJMwJfLNBYfL+D4h2CPuNMvvXPEAMvFAjRvlj0Tqezwf/UwgecBdxLMHgQrfgttOJ/k1I6j9SSJ5FOH2bO1zBzLkdmTgJHciP5JEq+zoHT2Z2JSXcAd+APlbJDe5ft4k12Jr/B1pQXcMkU+srJDDQvp795KUkcf/dIR5RjT3pEZyrLiGR7ZB+fauv5VKzjE8+X/Nf7OWAt0zmawZyVN4KzzdM5W55GLh0TDLApElWGLTECzwNvAPfquu4FdrbgnIba63X9C98C5hmG8Rdd1ycA83VdH2EYRq1hJ4ZhPAk8WZ1HQ8O4wuGmfdB1eXxkHhWH69cgU3Ij/GRNQavyaoj2hJIGuOuuu1i5ciVCCFwuF3fddRcTJ04ErFnCP/3pT9m7dy9er5c///nPjBo1qkl5/I4kXsn6OpdmFHL/NBcXDffjsSMXFDcRJUOasPUdLx8/lEbxVifdhoW4Zt4xBlxsxfRv6tyW0vKheS5wXQndr0TkluOp+MDuWH4dV8GzmMJLIOUCKxR26iWYzvgous46dDCWujJ24xK6cQnjCXDQs4w93nfZ613MNufrCOkgLzjedhtNIS3S+oCA8ZCxI8giicsYy2VYK7JVL9O50r2JNSlf8XftNR51LACgX7hHrYlsA+xlOjuSOASQa5BmjYBhGLOx/PvV3NiCa+4DYp+ufOq7e74PTLWv8altYHxAYQvybxcNGYCm9rcWIQS33HJLrVDSs2bNqhVKeu3atSxbtowZM2bg8XhqBZa76667SE+3atgbNmzg2muvZd26dQgh+OMf/8j48eN56aWXWLFiBTNmzGDp0qWI5vzkQvD2Y90JfjKG8u9/H/+ll4Kr4Rg2UsKO9z18/Oc0Cta7yR4Y4oonShj8teMf1rlB+Ryp+NMvx59+OcgQ7spP7U7ld0kqfxeJRjB56kvP+AAAIABJREFUvL0+whQi7lM6WuQOwYGH/MCF5AcuRB79I0WutdH5CCsy7mVFxr1khYbaM5ankh0a0eGK7ngSu0ynz+NjX9EB1rm/sjqbXZtZ5FnJguT3rbSRdMYEBzM2ZBmF04MD8ZygMaDqkqgwfyuBQbqu9wP2A9cB/69Omj1YfQ3zdF0fiuWPONzeCy+5J53CjW3/c178Zk6jx7oNC3HxzOYD1GVlZUUNAMCoUaN47rnnAJg9ezZfffUV8+fPx+128+KLLzJjxgyeeuqpaCjpagMA1kS4WAX/5ptvsnz5cgDGjRuHx+Nh7dq1jBw5skmZvCLE0fvuI2XuXLJvuYVI9+5U3HADld/5DmZMBNLdS9189Od0DnzuJvOUMF97rJRhV1WhdVbXu3ARTDmPYMp5HMu7H5d/Pd7yd/GWvUdG4e/IKPwdIc9Qa4Ja6lRC3hFdsmPZCnR3JrmhMxlVdifHHDujkU+/TP0ra9MeJSXck94Ba8ZyXuAsHF1spV8vbnuo6VDAXqbTuT86X2GlezOLkqxgCR7p4vTggOhEtjHBIWTHIQ5UR9CoEdB1PcswjNK2ZGoYRljX9Z9gBZ5zAM8YhrFB1/WZwCrDMP4N3A48pev6z7FcRTd24IplCaOtoaQffPBBXn/9dY4ePcpTTz2FEIKSkhKklLXCRvfq1YsDBw40aQSStBDPzjhKxeDpVHzve3jef5+UZ54h/U9/Iu3RR6m86iq2jPsZ/3ttJLs/8ZDWI8KUPx3htGsrcZxIlR0hCCWdRijpNMpyf4kjuNvuWH6P1KK/klb0KGFnT3vG8mSCyRNAnEg3GD/SI/0YUfEjRlT8CL9WzF470N22pJfZnDIPl5lOvv8i+vgn0ytwUVwC3Z1oCAQDw/kMDOfzrUqry7J6mc5qo/Bk6pvMFlZI90Gh/P/f3n2HR1Xm//9/npnJZErKECChIwqKKxakyIoKKE0FYsE7iGLFspbP6ioqLrqrroJ11d+6uH4pFlzhFsEgIsWCXQFFXSsiIM0kQEibksnMOb8/zhADJJCEmcwhuR/X5SVJTmZenJB5z7nPfb/v6oVs/cI9E7JNZyLU2TZCCPEs0Ar4GVgOfCKlbNjge/zFpW3Ewx3rHh+7c1t8JyndfffdFBQUMGPGDGy2ho+lfPTRR0ydOpWFCxdSUVFB//79Wb9+ffXXJ0yYwMUXX8w555yz1/d9+MVm7p6+Dt3h45FbT621OZ5j3TpKHn2Hd5cO5Ad9JGkpxZx2wY8cd38nHGlN0wu+qcaKbZFiUitW4CpfTqp/JTYjhG7LJJR2VmxLzSEY9v13y7LCWPbBxDNjRAuy3fkhW2Ib5oTsu7AZKbFGd8PpHBqOV2/4ug2rn8fG5gtSydfO9dUtL9Y4f6TUZi7wahPN3Gt1c6+qboe0TWeTbyojpbweQAjRA3O+/7WxTWU+BxZIKbc2Kk0Lcv/997Nx40aef/75RhUAgNNOO43y8nJ+/PFHTjjhBMC88bznamDbtm213vTplBVlykVRHI7SWgvAjp8cfPTYKaxbMhhXRpRhfZcz5Meb8M77mchHHQlccQX+iy/GqKON9eFGd2QR9OUR9OWh6UFS/R+YeyxXLMdTtgBDc1LpOY1Q+gi8u2eRUvlT9ffuObvh1OPYeeTy2p+gmXAYbrpUDqdL5XD00ig7nF/Eho2W8Znvr3zGX2kdPpEu1Y3ueh4W73YTxU0qA8LHMSB8HLD3Np17pqe+5TaHb12Gk5PC3atXN/cJH9PgbToToT43hn/GvBr4d2wR2ClAe8ybv4clb9tonbOD4mXatGl88803vPTSSw3aS8AwDH755Re6dzfnLn/99dfs2rWrejP5UaNG8eKLL3LLLbewatUqQqFQdXGoj90b7Xz0RDrfL3Tj9BoMvK2MfhP9pGb0ojT6DpUrVuCdOZOMBx8k7fHHCV54If6rriLSs2fDToCFGTZ3dRM7jAjO4JrqDXN8BXdiYE6x1WpMaDNwmusSWhAbdnLC/ckJ96dv2RRKHevZ7FrKZtcy1mY8ytqMR0mPdK3ufGo2urPmbmJNxYaNYyJdOCbShUsD5hBwoa14ryuFf6ctJBrbY+GYyN7bdHaOZh+wqBbainky7VW+cP7E8p1PxCVzs9tj2AoOpZW0ruuMHTuWkpIS7HY7LpeLSZMmccYZZnOtoqIibr75ZrZu3Yrb7Wbq1Kn069dvv8f56oPNzPnrOuyVPrK2jcGdFaXHyBD/m+fBnmLQ52o/p1xfgTur9p+/44cf8M6ahWfBArRQiMqBA/FffTWhoUPBHr+7xJYaJjAMHJU/4S6dT1rxs/sUAQdFR75LNNV6rRmScQ4DtkK2uFawxbWM7akfoWthUqOt6FR5Fl1CI+lQOYgU4/d1G5b6OdeiKfMFtBBrU36ubnnxhXMd5TZzb4CcaKu9VjcfV9UNB3YKbcU82/YNXrC9hYFOWIuwbfuCej9ni9poXjHtWwQA7E6Dkyb4GXBTBWnZ+3cBrY1WXIz3lVfwPP88ju3biXTpgv+KKwiMG4eReehN3qz64pDx22S8Ja+gUYWB+dtjaC6C6ecQ8OUR9pyKJebLkvxzaDa6W8lm1zK2ut4lbCvBbrhoX3labD3CMDpnHWvJn/MeyTyHe7bp3DN8tNr5I1sd5kRJl+7Ep6exw16CDY0q7ffRiiYrAkKIkVLKpUKI7sCtwDwp5Qf1fvb4UkWgnmorAn9aVUBGx/q9+O8nEsG1dCneWbNI/fxzdI+H4EUX4b/ySiI9ejQ6Z7JfwOpiqyok55c/ohmVGJqLXR1n4PavwF36Oja9lEhKZwKZgqBPEE3plNSsVjqHOlUUOj+v7nzqd2wFQ6ODMYD2FWfSJTiczKj12jRY6RwCbLftZLXzR+7LmE2hfXetL9/xKgL1eStze+z/d2OuHn683s+sWEqjCwCAw0Fo1Ch2LVhA0bJlhEaNwvPKK2QPHkzW+PGkvv22uedkM6Gn5ODPzMPAhj8zj3D6EErbPURBjy/Y3eEZIs5upO98guz1A2i9eRzu0oWgq/YlNlJoHz6NU8ruZ2zRZ4wpWs5J5bcRIcgXGQ+yMGcQC9qewZr0hyhKWYNB8/k3E08d9Dbkhk5j6c7HuNw/klTDidNIzP2W+hSB9Fjb56iU8lMg8Q2uFUuL9OpFyT//SeHq1ZTdcQcpP/1E68svJ/v00/HOnIlWXp7siHFR0eYWjIxTqWh7y++ftLkJZp5HcZdXKOr+GeVtbsMe3kSr7TfR7ueTyfxtMinBr81l1y2chkZW5DhOqriVyyKfM7ZwFaeU/AOv3oHv0v7Dkra5zMs5mY8zJ7El9W0ihJId2XKy9VY8VHYtnxVO50p9FK4EFIP6DAedB1wA/ANzle/fpZR3xTVF/anhoHqqbTgo3msgqlVV4VqyhLSZM3F+8QW610sgLw//lVcSPfLIA36r1S7D91WvfIaOM/ApnpK5uMuXoBkhqlJ7EsjMI5h5Ibqj7lXoTZYxyfbNWKmVss31Lptdy9iW+h5VtgocuoeOlYPpHBpOp9BZuIyma3R3uJzD74t/5sm0V1nj/LFBs4MOdXvJbpgbvux5BUlWAVAOQTynv+4nJYVQbi6h3FxSvvoK78yZeF96ibRZswideSb+q6+m8owzoJFrJSxPsxH2DiTsHUhp9EHcZfl4SuaRWXQfGUUPEkofTiBTUJk2BLSWPYVyj1QjkyOD53Nk8HyiVFKQ+ml1G4tf3UvQDDvZ4f6xvkYjSI92SXZkS9hzZRBP9bkSyAUuwuz5/wYwX0pZHNcU9aeuBOpp48aNrIvTRvONYSsqwjNnDt4XX8S+YwdV3bvjv/JKghddhOH1Vh9n9Xdgh5LPUfkTnpJ5uEvnY4/uImrPJuC7iECmIJoav5ujVj+HUP+MBjq7Ur5hc6zRXUmKuX9Vq6pjY+sRRtK66vi4L1BrTuewNnGZIiqEyAKexez8uQx4Skr5UaMSNZ4qAvWU7CJQLRzGvXgx3pkzcX71FXpGxu9DRV27Wv6XLy75jCpcFe/iLpmLq+IdNKKE3X0J+MYRTB9da7uKJs+YYI3NWGbfVN35tMi5CkPT8UTbx1pYjKBd5R/j0uiuOZ9DOMTZQUKIs4UQLwDPAyuALpibvDzUqDRWETXwTg+S06sE77NBiMbvRl5xcTETJkzg9NNPZ+jQoUycOJFd+zTff+ihh+jatStPPLH/uN7kyZMZOnQow4YN45xzzuHDDz+s/tqzzz7L6aefTqdOnVixYkXcMieM00nwggvYuXgxOxYtInTmmXhnzyZ74EBaXXkl2rvvNv+bqFoKofQR7O48m8IeayjNnoIWLcH32+3k/HwSvu234Ax81vzPQyNkRI/gOP91nL3rNfIKv+a03f+kTfhE1rslK1qPZ267E3jfdwMbXPmEtYN3+FX2V58ByuOBu6WU22p+UghRrw3nrci+IUqr6/04NkSxBSH9sRDuBVXsftZL9MhDXw2byP0EBgwYwIgRI5g0adIh52xSmkZVnz6U9OlD2ZQpeF96Cc+cOdjPPpu2xxyD/6qrCF54IYbbneykCaU7svG3/hP+rOtJCX1pDheV5eMpfZVIyhEEfHkEMseipzS8SVtz59Kz6B4UdA8KIgT5LfVDNruWs8W1go2e/Fijuz9W77PcmEZ3LVF97gl0BSYDacDlwJVSyhlNkK02Bx0Oyrg3QMr3B74J6lwdgcje10YGgAPC/equi1V/sFN2f8O3MHzzzTd58cUXmTdvHs888wzffvstTz31FE6nk1AoxM0330z//v2r9xOo6bPPPuOaa67hm2++2WtfgbFjx3LdddcxbNiwWp/TMsNBBxIKkf3ee+hPPonz22/RfT4CF1+M/4oriHZK7gKsPZpimEDTA7jKl+ApmUtq4FMMbFR6BxHw5RFKGw62A/eeau5DGQejE2WH88vYsNEyyhwbAGgdPqF62KhV5NgD3kdo7ufwUGcHzQRuAv4tpYwKIS4GklUE4sLwgG2fK0cN0OO/RW1c9xNodlwu9AkT2DlyJM7Vq81ZRc89h/c//yE0ciT+q64iPGBAs98ExrB5CGaOJZg5Fnv4VzylEneJJGvb9eh2H4GMCwj48oi4eiU7qiWZje76kRPuR9+yKZQ41scKwlLWpj/O2ozHSIt0qS4IOeH+Lb7RXU31ORN2KeWPQog9H1t6nl993qm7X6skc3IAW41lb7oXSv/hIXhh/Tt+1seUKVPwer1ceeWVDfq+SZMmMWnSJD766CMefPBBFi5ciNPZTHd60jTC/fsT7t8f27ZteF98Ee+cObiXLKHq2GPxX301gfPOg2Y+VAQQdXalvO0kytv8hVT/R3hK5+IteZm03bMIp/YybyZnnodhbx4tvhPBF+mOr6I7x1fcSMBWxFbXCja7lvGj9yW+T5tBqu6jU6hmozvvwR+0GavPC/q7sQ1mOgghnsK8OXxYCw1zgn2fd5d2zfx8HO3ZT2D69Olx2U+gJdA7dqR88mQK1qyh5LHHAPDdfjs5/fqRPnUqtm3bDvIIzYRmpzJtELs7TqegxxeU5DwImoavcArtfj6ZVluvJ7ViJRgJXP/RDHj0bI4OXMLQ4he5uOB/DC5+jk6hs9jieof3sq7hlXbH83bWZXxjm0nAlvDtzS2pPvsJPCCE6AW8A/yEuWr4sGZkaBT84EvocyRqP4EWw+0mcPHFBMaNw/npp3hnzSLt3/8mbfp0QmefjX/iRMJ9+zb7oSIAw96KQNYVBLKuwBH6LnYzeQHu8jeIOtqD/3LsztFEnUckO6qlpRhejgidyxGhc9GJUOhcxWbXUra4lrPccQPkaLStOjl2Y3kEvoj1Gt0lwoG2l3wVuERKGa7xuWOAl6SU/Zso374Oi3UCidxPYPr06cyYMYPi4mK8Xi+pqamsXLlyv7//YXFjmIbd7LJv2YL3hRfw/Pe/2EpLCR9/vDmraMwYiJ3nZOZrUnolrooVeErmkepfiYZOpeePBDIFoYxRGLYE3OA6BJY9j5gbytO2gG+Cc9nsWs4u5zcAZESOpEtwJF1Cw2lTdTI24rePRmM0+WIxIcSFwJ+AsVLKEiHECOAB4DIpZbLGJg6LImAFzbEI7KEFArhfew3vrFmkrFtHtHVrAhMm4J8wAb1du6Tna2pt0isJbXwOT8lcHFWb0G1pBDPGEMjMo8rdxxJXS1Y/jzXz+W3b2BzbMOe31E8wtAiuaBs6h4bRJTSc9pWn46Dp708lZcWwEOJU4FHMFcKnAuOklCX1eVIhxEjgKcAOzJBSTqvlGAH8HXOG5tdSyvEHeVhVBOqpOReBaoaB88MPSZs1y2xlbbcTHDUK/9VXU3XyycnP10SqMxoGzuBqPCVzcZW9gc0IUOXsTtCXRyDjQvSUnORntKi68oW1Mramvsdm99JYo7tyHLqbDpWD6RIaTqfQ0CZrdJeMK4EHMF+cewLDgelAFYCU8t4DPWFsL+J1mBvUbwVWAxdLKb+vcUwPQAJnSil3CyGypZQHuzOjikA9tYgiUIN90ya8zz+PZ+5cbOXlhHv3NoeKRo2CQ5hVZfUXL6g9oxatwFW+2BwuCq7CwE5l2pkEMvMIpZ8FWtPONLP6eaxPvihhClI/rV6PELAXoBm2fRrddU1qxro0dp3A27H/vwM808Dn7A+sl1JuABBCzAVyge9rHHMN8IyUcjdAPQqAotQpesQRlP3975Tffjvu+fPxzppFq5tvJuOBB/BPmEBgwgT0tm2THbPJGPY0gr5xBH3jsFf+gqdU4il9layKFUTtrQlmXkggM4+Iq2eyox427DjpWDmIjpWDOKX0wepGd1tcy1mdeR+rM+/DV9WTLqHhdAmNoHXViXFvdJcICdljWAgxFhgppZwY+3gCcIqU8qYax7yOebUwEHPI6O9SyqW1PNa1mL2KkFL2CYfD+x7Cli1bcLeAOeQN8euvv7Ju3ToyMjI499xzkx2nTg6Hg0gkEv8H1nW0t9/G/q9/YVu2DMPpRL/oIvQbb8To0yf5+eKo3hmNCNruFdgLXkArXoxmVKGn9UVvdzl6WwGOxM2Ys/p5PNR8JWxgve0N1tsWs037CEPTSTM6cpR+Lt310XQxBh9yo7tDyRhbY9ToFcONUduT7VttHEAPYDDQCfhQCNFr33sOUsrngOf2PEZtl0NW/seVLLquo+s6FRUVh/1leKOdfDLMmoX9l1/MoaJ580h5+WXCfftScdVVhM45B1JSkpcvThqWsR9k98OWdT/usgV4SuaSsv5mjF8mEUw/m4Avj7BnIGjxXRNq9fN46PkyOIJLOIJLCGnFbHW9w2bXMr5LfYmvU54jRU+jY+WZdAmNoGNoCKlGZpNmjA0H1SpRRWAr0LnGx52AfQfztwKfSSmrgI1CiJ8wi8LqBGVSWqjoUUdR9sADlN9xB5558/DOnk3WDTcQbdcO/2WXEbj0UvTWid39y2p0Rxb+rIn4W11NSuh/eErn4i59HU/ZQiIpnWK7ol1E1Nn54A+m7MVlZNE9eBHdgxfFGt19FGt0t5xN7kVohoN24T/SJTiCLqHhePWOSc2bqBYQq4EeQohuQggnMA5YtM8xrwNDAIQQbYCjgQ0JylMrW+EOWl94Dbai+L5DSVQraV3Xueaaa6ofd9y4cWzatCmu2ZszIz0d/8SJFH34IbteeIGqY44h45FHyOnXD99f/oLj22+THbHpaRpV7hMobfcQBT2+pLjDv4k4jyR95xPk/DKA1r/m4S5dCHow2UkPSw7cdK4cxsDSR8krXMs5O/I5ruJa/LbtfO6bwqvt+vNGm5F8lfZPih3fmWsWmlhC7gkACCHOAZ7EHO+fJaV8UAhxP7BGSrlICKEBj2NuUhMFHpRSzj3Iw8Z1dlDG5Kl4X3oN/4QLKZs6ucHfX5fdu3fzww8/7NVKuqSkZK9W0qtXr+aJJ57g5ptv5qyzztqrsVxZWVmtraQNw+Dtt99m6NCh2Gw2Zs+ezVtvvYWUcr8MLW12UGM51q3DO3s27ldfxRYMUjlgAP6rrkILBkl/5BHs27cT7dCB8rvuInjBBUnLeSCJOIf2qm24SySeUomjajO6LYNgRi4B3ziqXCc2eO1Bsn/OB5OMfKX29Wx2L2ezayk7Ur4EzSAt0jm2g9oIcsKnVDe6C9gK+ST7/zi16P/Do2c3+LnisrOYRdSjlfRjpHy/7oAP4vzsS7Ra/t6GphEeUPf88qo/HE3Z/bc3MHJiWkkDfPPNN/zpT3/i448/3u/7VBFoGK2kpHqoyLFlC4am7fVvRHe7KX3kEUsWgoSeQ0PHGfjUXHtQvgSbEaIqtWdsuOhCdEf9htGs8nOuS7LzBW072BJrdLc99UN0rRKn7qNTyLyPsC11JT975nGMfwJ/LGv4fl6H2kq62QmffDyOX7dgKy5B0w0Mm4ae5SPSNf7jn4lsJT179uw69xNQGsbw+fBfdx3+iRPJ6d0b+z7Dd7ZgkPSpUy1ZBBJKsxH2DiTsHYgWfRB32SI8JXPJLLqPjKIHCaUPI5CZR2XaENBa5MtJXLj1thwdGM/RgfFUaX62p77PZtcytrreZoNngTmtRoP13nmcWHFLo64G6tLsrgTqK/Ouh/DMWQDOFAhXxX1IaI+7776bgoICZsyY0ahOoh999BFTp07dr5X09OnTefPNN3n11VdrnR6rrgQar32nTrVfKQLBCy4gOGYMlYMGHdIitHhKxjl0VK4zG9mVzsce3UnUnk3AN5ZgZh6R1P0br1nx51yTVfPpRHiv1TVscb0Nmo7NcNLDf3GDrwYOaY/h5sq2sxj/ZWPZ8cYL+C8bi33HroN/UwMlqpX07NmzWbhwIS+99JJaH5EA0Tqm0xkeD65336X1FVfQ7qSTyLz9dlI/+ABa4BTlSOrRlOXcQ2GPNRR3mkXY3Zu0Xf8he8Mg2mzKxVPyClq0ItkxD3sh2y62u94HTQdA18Ks986La9vrFnv9tnvGY9V/Lnvorrg/fqJaSc+ZM4c5c+YgpaRVK7WxSCKU33UXmXfcgS34+4wY3e2m9OGHCY4aReqHH+LOz8f9xht4X3mFaOvWhEaNIpibS7hfP2hkwT8saSmE0kcQSh+BLVKEu9Rce+D77XYyCu4hlDGKgG8cGNZdsGhlX6c9ud+MIQOdr9OebNS9gdq02OGgREpUK+mKigp69uxJp06dqv++qampLF68eL/HUcNBh8a9YAHp06YdeHZQKITrvfdw5+eTumIFtlCIaLt2BMeMIZibS9WJDZ9F0xiWO4eGQUpoLZ6SubjL8rHpFRiuoyhPv5BA5kXoKdbbAN5y5zAmv81wdju/2+/zrcLHkbtzeb0fp0XNDlJMqgjER33zaX4/rhUrcOXn43rvPbSqKiJduxIcPZpgbi6RY49NWEGw8jnU9CCu8iVk+hdgK12JgY1K7yACPkEobQTY4ruda2NZ+RzukcyN5hVFOQjD6yV43nkEzzsPraQE17JluPPzSZs+nfR//YuqHj0I5uYSHD2aaPeWsWMVgGFzE8y8EO9R17F7+xd4SiXuEknWtj+h230EMi4g4Msj4uqV7KgtVgsavFSUpmH4fATz8ij+738pXLuWkqlT0du0If3xx8kZNIg2I0aQ9swz2LdsSXbUJhV1dqW87SSKun/Grs6vEPIOwlvyMtkbR9B2w3C8xbPQIsXJjtniqCLQTB1mw3zNlt66NYHLLmPX/PkUrl5N6d//DikpZDz0EDkDBtBm9Gi8M2ZgKyhIdtSmo9mpTDuDko7/pqDHl5TkPIih2cksvId26/vQauv1pFa8B0Y02UlbhGZRBAzDUC96NRiGQTSqfoGsRm/fHv8117Bz8WIKP/2UsrvvRqusJPNvfyOnb19ajx2L58UXse2K/3RlqzLsPgJZV7Cz21sUdVuO3zcBZ+AjWm+5lJz1p5Be9DD28MZkx2zWmkURcLlclJWVJTuGZZSUlLAlNtRQ20pjJfmiXbpQceON7Fi+nKL336f8L3/BtmMHvsmTyendm6xLLsE9bx5aaWmyozaZiOs4ytrdT2H3Lyju+BxVqceStutf5PxyGq1/vRB3iUTTA8mO2ew0i9lBANu2bSMYDGK325ssjM1mQ9f1Jnu++jAMg+LiYrZs2YLD4aBr166ccMIJyY5VJ6vPymjSfIaB4/vvcS9ahHvRIhybN2M4nYSGDCGYm0vlsGEYHk9yMzZSYzPaqn7DU/oantK5OMIb0W1eguljCPjyqHL3jduMq+Z8DqEFTBHd49tvv2XDhg1NNhTi8XgIBKz5zsRms9GzZ0+OPvpoS18NWP2XL2n5DIOUr76qXpRmLyhAd7upHDaM4JgxhIYMgdgaFKufQ4hDRsPAGVyNu2Qe7rJF2IwAVc6jCPrGEci4ED0lJ7n5moAqAqYDFoGmZvV/OFbPB9bPaIl8uo5z1Src+fm43nwT+65d6OnphEaMIJibS/r557PT4sNG8TyPmu7HVbYYT8lcUoOrMLBTmTaEQOY4Qulngdbwnk6W+DkfhFonoCgtlc1GeMAAwgMGUPrAA6R+8gmu/Hzcb72FZ/58jKwsMs8+22xbMWAANOGQaDIYNi9BXx5BXx72yl/wlEo8pfPJqphI1N6aYOYFBDLHEXH1THbUw4K6EjgEVn/3YPV8YP2Mls5XWUnq++/jW7YMbdEibIEA0exsgqNGERwzhqo+fSzTxyjh59GIkOp/H0/JPFzly9GoIuw6iYAvj2BGLob9wHv6WvrnHKOuBBRF2VtqKpXDhxMdP55dW7aQ+s47uPPz8b78MmmzZhHp2JHQnj5GvXo1SR+jpNEcVKadRWXaWdgixbjLYo3sCiaTWXgfwfSzCfjyCHsGgmaNwmgVqggoSjNguN2ERo0iNGoUWnk5ruXLzYLw//4fadOnE+nWrbqxXeSYY5IdN6F0Rxb+rIn4W11NSuhbPKVzcZcuxFO2kEhKJ4KZgkCmIOqM/yZShyM1HHQIrH4JafXFQRtDAAAUnklEQVR8YP2MVs8HB86o7d6Ne+lS3Pn5OD/+GE3XqerZ0ywIY8YQ7dYt6RmbhB7CVbEMT8k8Uv0foGFQ6TmNgG8cwfSRtMnufFj/nA8mKbODhBAjgacwN5qfIaWcVsdxY4FXgX5SyjUHeVhVBBrA6vnA+hmtng/qn9G2YweuN980W1+vWgVA+MQTzSmno0cT7dgx6Rmbgr1qG+4SiadU4qjajG7LwMjOo9h1HlWupmn/3RiJKgIJGRwTQtiBZ4CzgT8AFwsh/lDLcenA/wGfJyKHoii/09u2JXDFFexauJCCVasovece0DQyH3iAnP79aX3eeXhmz8ZWFL9dq6womtKRira3UnTUx+zs8iqh9GHYiubQdtO5tN14Ft5d/8EWsUbBagqJukPSH1gvpdwgpQwDc4HcWo57AHgECCUoh6IotdA7dsR//fXsfPNNCj/+mLI778RWUYFvyhRy+vShdV4env/+F2337mRHTRzNRth7KiUdnqbqlF8pafcIhs1LZtH95Pzch1ZbJ5JavhyM5r19aKJuDHcEavbJ3QqcUvMAIURvoLOUcrEQ4va6HkgIcS1wLYCUkjZt2iQgbuM4HA5L5dmX1fOB9TNaPR/EIWObNtC3L8b991P1/ffYXn0Vp5SkTppE5uTJGEOHoguBPno0ZGQkJ2OCORwOPD3+DPyZsP8H7IUv4Cr6L+7ytzBS2qHnXEI05zLwJG/tQaLOYaKKQG1jT9U3H4QQNuCfwBUHeyAp5XPAc3sewyrjimCtcc7aWD0fWD+j1fNBnDNmZ8ONN8INN+D47juzbUV+Po6lSzFSUwmddRbBMWOoHDoUw+1OTsYE2DtfW8i4HdL/TGrFe3hK5uLa+iT2rY8TdvchkDmOYMZoDHvT7mYYh3sCtUrUcNBWoOb8q05AzTu66UAvYKUQYhMwAFgkhOiboDyKojSEphHp1Yvyv/6Vos8/Z0d+Pv5LL8W5Zg1Z119Pzgkn4LvxRlKXL4fKymSnTQwthcr04ezuPIvCHl9Qmn0PWrQcX8Ekcn7ujW/7n3H6P4XDa4blfhJ1JbAa6CGE6AZsA8YB4/d8UUpZClRf1wghVgK312N2kKIoTU3TqOrbl6q+fSn7299wfv55dR8jz+uvo2dkEIq1ragcOBAczW/5ke5oi7/19fizriMltBZPyTzcZfl4SucTSTmCgE8QyLwIPaXud9xWlZArASllBLgJWAb8YH5KfieEuF8IMSYRz6koShOw2wmfeiqlDz9M4dq17Jozh9CIEbiWLKH1+PHk9O5N5l134fz0U7BYm/W40DSq3CdT2v5hCnusZXeHp4mmdCBjxyPkrO9P1uZLcJUtAv3wuTpSi8UOweE1zmlNVs9o9XxgkYyhEK6VK801CCtWYAsGibZrZ/Yxys0lc9gwdlp4x7RDPYf28GY8pRJ3yTwcke3odh+BjPMJ+MYRcfVKekbVSjpBLPHLdwBWzwfWz2j1fGC9jFogQOqKFbgXLcL17rto4TBG165UjB5NcPRoIscdZ7kFWXE7h0aUVP/HuEvn4S5/C82opCr1OAK+cQQyzsNwZCUloyoCCWK1X759WT0fWD+j1fOBtTNqZWW4li4l86230N55By0apeqoowjl5pp9jLp3T3ZEIDHnUIuW4C59HU+pxBn6GkNzEkobTsA3jkrvGaA1rOX3YbViWFEUBcDIyCAoBJE33qDwq68omTYNPSeHtH/+k+xBg2g7bBhp//oX9s2bkx017gy7j0DWFezstoSibivwt7oMZ+BjWm+5lJz1/UkvmoY9vDHZMdWVwKGw8jswsH4+sH5Gq+eDwzOjraAAd6yPkfOLLwAI9+5tNrYbPRq9ffuk5ksYI4yr/G08pXNJrXgPDZ1KzwACmXmEMkZh2PbfQzoeGdVwUIJY/ZfP6vnA+hmtng8O/4z2LVtwv/EGrkWLcP7vfxiaRviUU8zGdueei94EK42TcQ5tVQV4SufjKZ2HI7wB3eYlmD6GgC+PKnff/e6bqCJgUkWgAayeD6yf0er5oHlltP/yC+5Fi3Dn55Py888YdjuVAwcSzM0lNHIkhs+X1HwJYRg4g2twl8zFXbYImxGgynkUwcw83KWvkRL+ab9vCacex84jl9f7KdQ9AUVRDgvRo46i4tZb2fHeexS9/TYVN9yA49dfaXXbbbQ76SSyrrgC98KFaH5/sqPGj6YR9vSjtMPjFB79FbvbP4Fub0PGjodwhH/C2Oe128BJ2B2/5grNb2mfoiiHP00jcuyxlB97LOV33knKN9+YfYwWLcK1YgW6y0Xl0KHmFcKQIdCAPkZWZti8BH15BH152Ct/wbt7Nt7ds/c+SLNR0faWuD2nKgKKolibplF14olUnXgiZVOm4FyzxmxbsXgx7sWL0dPSCA0fbratOOMMcDqTnTguoqlHUdbuH2BU4S2Zi0YEAyf+zDx0R3bcnkcVAUVRDh82G+H+/Qn370/pfffh/OQT3G+8gXvJEjwLFqD7fATPOYfgmDGETz0V7A2bi29FFW3+grf0VXNfgzhfBYC6J6AoyuHK4SB8xhmUPvooBWvXsuuFFwideSbu/HzajBtHTp8+ZEyZgnPVqsO6j5GekoM/Mw8DW9yvAkBdCSiK0hw4nVQOHUrl0KEQDOJ6913cixbhfeUV0mbPJtq+vbkGITeXqhNOsFzbioOpaHMLHn1D3K8CQBUBRVGaG7eb0LnnEjr3XLSKClwrVuDOz8c7axZp//kPkSOOqC4IkZ7J2ymsIfSUHCInvoOegGmsqggoitJsGWlpBM8/n+D556OVlOBauhR3fj5pzzxD+tNPU3X00QTHjIHLL4esxjd3O5ypewKKorQIhs9HcNw4il95hcIvv6TkoYfQs7JIf/xxnMcfT5uRI/FOn45969ZkR21SqggoitLi6G3aELj8cna99hqFq1YReeQRcDjI/Mc/yDnlFNqMGYN35kxshYXJjppwqggoitKi6R06oP/5z+xcvJjCTz6hbPJktGCQzHvvJadPH1pfdBGeOXOwFRcnO2pCqCKgKIoSE+3alYqbbmLHihUUrVxJxa23Yi8owHfnneT07k3WhAm4pUQrK0t21LhRRUBRFKUWkR49KL/tNoo++ICiZcuouO46HOvW0erWW2l34om0uvpqXPn5aIFAsqMeEjU7SFEU5UA0jUivXpT36kX55MmkfPml2en0jTdwL12K7nYTGj6c0JgxhAYPBpcr2YkbJGFFQAgxEngKsAMzpJTT9vn6X4CJQATYAVwlpfw1UXkURVEOmaZR1acPVX36UHbvvThXrTL7GL35Jp78fPT0dEIjR5p9jE47DVJSkp34oBIyHCSEsAPPAGcDfwAuFkL8YZ/D1gJ9pZQnAPOBRxKRRVEUJSHsdsJ//COl06ZR+OWX7Hr5ZULnnINr2TJaX3opOb17k3nnnTg/+QSi0WSnrVOirgT6A+ullBsAhBBzgVzg+z0HSCnfq3H8Z8ClCcqiKIqSWCkpVA4eTOXgwTB1Kqnvv2+2vn7tNbxz5hDNziY4ejTBMWOo6tPHUm0rElUEOgJbany8FTjlAMdfDbxV2xeEENcC1wJIKWnTBFvN1ZfD4bBUnn1ZPR9YP6PV84HKGA9xzzd+PIwfT8Tvx/bWW9ikxDtnDmkzZ2J07Yo+diy6EBgnnljvgpCoc5ioIlDb36rWfSyFEJcCfYFBtX1dSvkc8Nyex7DSNnpW39bP6vnA+hmtng9UxnhIaL7Bg2HwYLTyclzLluHOzyf1qaewP/44kSOP/L2P0dFHJyxjbHvJWiVqiuhWoHONjzsB+20OLIQYCvwVGCOlrExQFkVRlKQz0tMJjh1L8UsvUbB2LSWPPEK0fXvSnn6a7CFDaDt0KGlPP41906a9vs+9YAHZ/fuT4nKR3b8/7gUL4porUVcCq4EeQohuwDZgHDC+5gFCiN7Af4CRUsqiBOVQFEWxHCMri8AllxC45BJsRUW43nwTd34+GQ8/TMbDDxM+6SSCo0djpKSQMXUqtmAQAMe2bWTecQcAwQsuiEuWhFwJSCkjwE3AMuAH81PyOyHE/UKIMbHDHgXSgFeFEF8JIRYlIouiKIqV6dnZBK68kl2vv07hqlWU3nMP6DqZDzyA7957qwvAHrZgkPRp0+p4tIbTDKPWoXqrMrZv329UKWla9DhnnFg9o9XzgcoYD1bMZ9+4kezTTqv9Bqum8VsDup3G7gnUegdatY1QFEWxoGi3bkQ7dqz9awe40dtQqggoiqJYVPldd6G73Xt9Tne7Kb/rrrg9h+odpCiKYlF7bv6mT5uGfft2oh06UH7XXXG7KQyqCCiKolha8IILCF5wQcLuW6jhIEVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBZMFQFFUZQWTBUBRVGUFkwVAUVRlBYsYZvKCCFGAk8BdmCGlHLaPl9PBV4E+gC7gDwp5aZE5VEURVH2l5ArASGEHXgGOBv4A3CxEOIP+xx2NbBbStkd+CfwcCKyKIqiKHVL1HBQf2C9lHKDlDIMzAVy9zkmF3gh9uf5wFlCCC1BeRRFUZRaJGo4qCOwpcbHW4FT6jpGShkRQpQCrYG9NtEUQlwLXBs7jg4dOiQocuNYLc++rJ4PrJ/R6vlAZYwHq+eDxGRM1JVAbe/ojUYcg5TyOSllXyll39j3WOY/IcQXyc5wOOc7HDJaPZ/K2DLyxSljrRJVBLYCnWt83AnYXtcxQggHkAkUJyiPoiiKUotEDQetBnoIIboB24BxwPh9jlkEXA58CowF3pVS7ncloCiKoiROQq4EpJQR4CZgGfCD+Sn5nRDifiHEmNhhM4HWQoj1wF+AuxKRJcGeS3aAg7B6PrB+RqvnA5UxHqyeDxKUUTMM9eZbURSlpVIrhhVFUVowVQQURVFasIS1jWiuhBCdMdtdtAN04Dkp5VPJTVW72MrtNcA2KeWoZOepSQjhA2YAvTCnBl8lpfw0uan2JoS4FZiIme9/wJVSylCSM80CRgFFUspesc9lAfOAI4BNgJBS7rZQvkeB0UAY+AXzPJYkI19dGWt87XbgUaCtlHJnbd/fFOrKKIS4GfN+awR4U0p5x6E+l7oSaLgIcJuU8lhgAHBjLS0xrOLPmDfmregpYKmUsidwIhbLKYToCPwf0Df2S2jHnOWWbM8DI/f53F3AO1LKHsA7JHeSxfPsn28F0EtKeQKwDpjc1KH28Tz7Z9zzBm8YsLmpA9XiefbJKIQYgtlp4QQp5XHAY/F4IlUEGkhK+ZuU8svYn8sxX7w6JjfV/oQQnYBzMd9tW4oQIgM4A3OGGFLKcDLfGR6AA3DH1rF42H+tS5OTUn7A/utparZgeQE4r0lD1VBbPinl8tiMQYDPMNcNJU0d5xDMHmZ3UMui1aZWR8Y/AdOklJWxY4ri8VyqCBwCIcQRQG/g8yRHqc2TmP+g9WQHqcWRwA5gthBirRBihhDCm+xQNUkpt2G+09oM/AaUSimXJzdVnXKklL+B+SYFyE5yngO5Cngr2SH2FZu6vk1K+XWysxzA0cDpQojPhRDvCyH6xeNBVRFoJCFEGvAacIuUsizZeWoSQuwZS/wi2Vnq4ABOBqZLKXsDfiy2TkQI0QrzHXY3oAPgFUJcmtxUhzchxF8xh1NfTnaWmoQQHuCvwL3JznIQDqAV5jD0JEDGo+mmKgKNIIRIwSwAL0spFyQ7Ty0GAmOEEJswO7ieKYSYk9xIe9kKbJVS7rmCmo9ZFKxkKLBRSrlDSlkFLABOTXKmuhQKIdoDxP4fl2GCeBJCXI55o/MSC3YGOAqz2H8d+53pBHwphGiX1FT72woskFIaUspVmFf5bQ71QdXsoAaKVd6ZwA9SyieSnac2UsrJxG6+CSEGA7dLKS3zLlZKWSCE2CKEOEZK+RNwFvB9snPtYzMwIPYuMYiZcU1yI9VpTwuWabH/5yc3zt5iG0zdCQySUgaSnWdfUsr/UWMILVYI+iZzdlAdXgfOBFYKIY4GnOzTdbkx1IrhBhJCnAZ8iDllcM94+91SyiXJS1W3GkXAalNET8K8ae0ENmBOG0zKtMa6CCHuA/IwhzDWAhP33JRLYqZXgMGY7wALgb9hvjhIoAtm8bpISpmUZox15JsMpGLuIAjwmZTy+mTkg9ozSiln1vj6JpJcBOo4jy8Bs4CTMKfb3i6lfPdQn0sVAUVRlBZM3RNQFEVpwVQRUBRFacFUEVAURWnBVBFQFEVpwVQRUBRFacHUOgGlRRJCDMKcdmcDosA9UspPhBClwJdACmaLgw7AUCnllNj3/R1YKaVcWeOxPJhtOo6Ofd9zUsoXaKRYh9UzLboQUWlm1JWA0uIIIdoA9wHnSSkHYzZcC8a+/D8p5RDgNszeS/XxN+D92GOdBmw8xIg+4IJDfAxFqRd1JaC0ROcAc/b0fIp1g127zzFfUf9ul6dKKe+MPZYBfAAghHgac2FPGXAJZrPBoVLKKUKIK2LfuxJzAVAxZuuCXOBaYJgQYiXmwq8dDf8rKkr9qCKgtEQdMFd8I4QYD9yAuYr19hrHnAH81NgniHV49Eopz4g1nrueurvNtsLsVXQxcCHmhuJdrNTqQ2m+1HCQ0hL9hlkIkFL+F7iU3xtxHS+EeA+zMEwDQpgtD/Zw8fvQ0YEchXlvAcyeQ93Zu099ze6P30spdWAb5lCQojQZdSWgtERLgPlCCCmlLGXv34M99wQAEEKEgd5CiD1vmE4GHtnn8T4RQlwipXw51mBwIGY/pOGxr/fF3FaxFGgf+9zxwDexP+9bHKowdzJTlIRTVwJKixMbY78PyBdCvAv8G3Pf6NqO3YXZNvwDzMaB82tpznYfMCg2hv8xcFSs1W9QCPEhMB54FvNFv4MQYgnQ9gARC4AsIcT82P7BipIwqoGcoihKC6auBBRFUVowVQQURVFaMFUEFEVRWjBVBBRFUVowVQQURVFaMFUEFEVRWjBVBBRFUVqw/x+v9jbwaEwdHgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgUxdnAf9Vzz+wB7LKccggCIt7iTcCIYNTEiKHRqPGIaPyin0QTwqdGjRrERImJMRpjRMWL9oqJGkU0oIgHnkENN6tyLSwLuzv3THd9f3Tv7uzsNbvMusDW73nmma7qt6vf7umpt+qt6reElBKFQqFQdE+0rlZAoVAoFF2HMgIKhULRjVFGQKFQKLoxyggoFApFN0YZAYVCoejGKCOgUCgU3ZhuZwSEEDcLIdZ2tR5diRCiXAhxQ1fr0RLZ+gkhFgshHuyKc+9GOQ8LIRZlpNt87oQQE4QQUggxcHfPn4N+ebmnQoiLhBDpfOikaBshxBDnGTkxX2W681VQd0YI0QO4GTgFGALUAm8D10spV3adZvsMU4C9raK5mj27kbU33lNFJ7AnP6R7E/2AocCNwBHAGUAB8IYQomdXKrYvIKWsklLWdLUe7UFKWS2l3NnVerTE3nhPFZ3DPm0EhBA+IcR9QohqIcROIcR9gC9LRhNCzBZCbBdChIUQTwkhZtR1cYXNS0KI5UIIT8Yxi4QQbwsh3FLK/0opz5RSPiulXCWl/AD4IbZxaLHbJoRYKoS4LSP9a6erNzEjb4kQ4rcZ6VOc88aEEJuEEPOEECUZ+48QQvxLCLHNuZ7lQohT27hPE517dG0O99QjhJgrhNgohEgIIbYIIZ7KkpkmhPhQCBEXQuxw9OmZof9iIUSVc84lQoij2zhnI9dFXVoI8SshxFanrIeFEKEMmVZ/1zYIOOXXCCEqhRB3CCHq/yvNuYwc+cUZ6UbuoBau6yrnPkaFEK8Cg9pSLMdrF0KInwsh1gshkkKIdUKIGc2V04Fyb814tp4CmjRyWntGhf2f/FgI8fcM+YAQ4jMhxIK2rt+RL3f0qPtvbxNCXOmUfY+w/+ubhBBXZh13tRDiE0f3rc4z0S9jf5077hQhxJvO7/KFEGJyVjm/EUL819n/tRDifiFEcZbMuc59jwshlgkhzhBZbhwhxHAhxLNCiF2OzguFEAdnlaMLIdbWlQMckss9ahdSyn32A/we2AacCYwC7gRqgLUZMtcAYeAC4AAnXQWkM2R6A5uAO5309Y7MoFbOvT8ggeNakbkFeCcj/Zaj7+1OOgAkgFOd9LeBKHCVo+tY4N/Am4BwZCYAFwKjgRHAbUASGJFxnnLgBmf7POf6f5jjPb0G2OicZ5Cjw4yM/RcDKeBXjg6HYLtGSp39ZwFTHd0OAh507mVJc/o56cXAg1npXc7vOwo41Un/uj2/awvXV+48I7cAI53jI8A1Lenn5D0ILM5IPwwsykjfTOPn7kxsd8w1zr34MVDhPDMDW9Evl2v/KRADLnOu/SdAHPjxbt7Tq517caGj80xHJvO/ksszOgLbZXqlk/4rsB4ozvEZLHfOew0wHLgBsICXM/L+z8kbnaX/ROxe+3HAMmBJxv4Jzv3/1Ln+A4BHnXP1yJC7ARiH7fo9GVgJPJKx/0jn3Lc5z9D3gbVO2Sc6Mn2ArcB9wMGO3D3ADqC3I3O4U87tzv4pwIbMcvJST+aroD3tA4ScB396Vv4HNP4zbgJuzZJ5iqzKAjgJ+097E3YlN6WVc7uAV4D3Aa0VuQlOmUVAELvCvxZ439l/CnYFHsr4o87JKmOQ81Ac1sp5PsUen2hUiQE/B6qBU9pxX/8AvIHzh25m/1fAn9pRngbsBM7L1i8jvZimFdZ/ssq5n8YGNafftRl9yoG3svJmAxtb0s/Ja68RWAo8nlXGneRmBNq69q+B32bJ/B5Yv5v3dCPwmyyZZ2hsBHJ6RrENSRzb2CaBo9vxzJQDf896hmqAfzbzXF3ZSjmHO3oNcNITnPSUDJm+Tt7kVso5C/u/qznpx5t5hn5CYyNwM/BulowA1uE0qoDHgGVZMleSZyOwL7uDhmG7fpZl5S+t2xBCFAH9gXezZN7JLkxK+W/gLuwf70Ep5XPNnVQI4cJuPYzAfpisVnR8B/sP8C3slsWXzrGHO93Lb2MbhIgjPxaY4XRnw0KIMPCFs+8A5/y9hRB/FkKsdLqZYewW9+Csc1+G3VL5tpTytVZ0zGYedstlrdMNPlsI4XXOXQbsByxs6WAhxFAhxHyni1uD/ectbka/tvgkK70Ju3XVrt+1BbLl3gYGOOXmi9G08my2QVvXPhC75Z3JEmCIECK4G+UOyEHnNp9RACnlI8AL2D3GX0kp329Fr+b4NKMsC9gO/CcrbxtQVpfnuHtedVw4tRm6Zz979fdBSrkVMHHug1POFMddtNm5vscBL7bBAPu3bevZGwscmXWfarF7F3X3aTT2s5dJrs9IzuzLs4OE891amNRcZGxBu3I/AfuBGC6EENIxzRkyXuBJ4FBggpRyY2tlSikTjp/vZGxj8IaUcrsQYiV2q+TbwKsZh2jAHcD8Zorb6nw/jN3ymonddYxht4C9WfLvYPdufiyE+Cj7WlrR+RMhxFDsXspJ2D2DW4UQx2aKtVLEi0Altsvia+zrXtqMfm2RzFaNhjGunH/XHBFZaauZPE8Hyu2ofq1de0tlZ+vb3nJzvae5PKMIIQqwJ1GY2A2m9pLKSssW8jTnfIOw3UXzsXsfldjGchFNn73s+0BGOccAT2O7aH6B3ds4Fngkq5xc7tPr2C37bKqdb5FDObvNvtwTWIv9Y56QlX983YaUshrYjO0fzORYmnIz9sN6ArbP75eZO50W1j+wrfe3pJRf5ajnG9iV/bexH4q6vLOc87yRIfsBcJCUcm0zn7Aj8y3gz1LKf0gpVwBbsMcnslmBXYlPAR4QQuRSSQAgpQxLKZ+XUv4vcBRwIDBeSrkN22UwubnjnMHB0djuglellF9guwTKmpPvKO38XZsjW+44YLNsmE2zDbunkcnh7VLSbh1nP5vZ6Xbj6LgRGJ+161vABilltIPlVmP3DNrSOZdnFGxfuIn93J8vhDinI3q1g7HYY2wzpJRvSylXkdG6bwcnApVSyhuklO9JKVdjG5NMvqDtZ+8D7B76pmbu03ZH5nM64RnJZp/tCUgpI0KI+4HbhBAVwCrswbdR2H/iOu4Cfu20vt8HTgcmkWGBhRDjsQeaviulfE8IMR14UgixWEr5rhCiELuVMRB7wM8SQtR1DaullLFWVH0D2y1jYg+g1eU9g92yyexG3ggsFEL8HrvlUYvddZyK7fuMOdd5nhBiKfbYxC3Od3P36HPn2t4A5gkhLmnDfYUQ4hfYFewn2AOA5zq6r3ZEfg3c59zzZ7AbGidh90aqsLvt04UQ64AS4LfYvZV80+bv2gqHCSFuBp7ANnJXYzcC6lgE/I8Q4nlsF95PsF0KVe3U72khxPvYz86J2IPQ+eB24C4hxBpsH/23gSuwe1+7w13Yvb6V2O6O72EPtGbS5jMqhDjfSR/r9CyvA/4ihHhPSrlhN3VsiTXYv/21QojHsXvrN3agnFVAbyHEj7H/rycC/5MlMxdYLoS4BduvPwp7rA8anr8/YddHfxf2DMGvseuP7wAvSSmXYY/jLBdC/Ab7Xh6UUU7+yNfgwp74wbb8f8HuXlUDD2D/QTIH6DQnrxJ7NslTwHVArbO/l/MD3ZVV9l+w3S3FNAwoNfe5qA0dXY5un2bk9cAeMF7UjPw47EqoFnumxn+BuwG3s/9gbL9tDHsA7X8c+Yczyiin8cDrcOwB3ccBVxv6Xg58iO3LDwPLgTOzZM7D9tkmsGc7vIQzuwK7hfopdg9gFXA2dq/t5lb0W0zTQcwHs855A1Ce6+/ayvWVA7/BHvuowa7Yf5d5X4BCbLfCTuwGxc20c2DYybsau3Udc36jC8ltYLitaxfYrooN2A2J9WTM4NrNezrbuacRbCP/M5pOomjxGXWetRrgqix9/wW8B3hy+F83ej6cvEbPkJO3ErgtI13ngoxhuyBPde73BGf/hObuP/Z/8aKM9K3YM7ki2Ab8XOe4IRky52IP8iawG3K6I3Nkhsxg7P/cdkfuS2yjMTRD5pyMct7DbmTmdWC4bsqWIgMhxEPAoVLKI7taF0X+UL+roqsQQvwIu2FRIqXc1dX6ZLLPuoNyRQjRH9v//m9st8Z3gR/R/ICNYi9B/a6KrkQI8XPsZ68KezziDuDpPc0AAKonIIToAyzAfqnJj92tvEdK+dcuVayLEEJ8TsvTNR+TUv7km9Sno6jfde/EGR+4rqX9UsqCb1CdDiOEeBR7Bl2dO/l54CbZwYH5zqTbGwFFY4QQg2l5umONtGcAKRSdghCiF3bF2SxSym4dAbgzUEZAoVAoujH78nsCCoVCoWgDZQQUCoWiG6OMgEKhUHRjlBFQKBSKbsxe956ArusPYa/ctc0wjDFtyA4GHsJeD6AKON8wjFaDuikUCkV3Ym/sCTyM/bp3LtwJPGoYxiHYMXRu7yylFAqFYm9kr+sJGIbxpq7rQzLzdF0fBtyL3eKPAtMNw1iJHbHyZ47Yv4G/o1AoFIp69saeQHM8AFxlGMaR2Ktl/dnJ/xQ7QBnYIQQKdV0vaeZ4hUKh6JbsdT2BbHRdL8BeI+BpXdfrsusWk/858Cdd1y/CXmlpE3ZEQIVCoVCwDxgB7N7MLsMwDsveYRjGZuxFU+qMxdmGYVRnyykUCkV3Za93BxmGUQNs0HV9KoCu60LX9UOd7VJd1+uu8f+wZwopFAqFwmGvix2k6/qT2Is/lGIv7HAT9spY9wH9sIOfPWUYxi26rv8Ae0aQxHYH/dQwjERX6K1QKBR7InudEVAoFApF/tjr3UEKhUKh6Dh728Cw6rYoFApFxxDNZe5tRoDNmzd3tQr1lJaWUllZ2dVqtMierh/s+Tru6fqB0jEf7On6we7p2L9//xb3KXeQQqFQdGOUEVAoFIpuzD5vBALPPUfZ0UfTb+BAyo4+msBzz3W1SgqFQrHHsNeNCbSHwHPPUTxzJlosBoB70yaKZ84EIDZlSleqplAoFHsE+4QRkFISiUTIfuchXlLCrj//uam8z0eqtna3zxuLxUin97xQREIIQqFQV6uhUCj2AvYJIxCJRPD5fHg8nsY7xo9v8Rh/J+vUlSQSCbZs2UJhYWFXq6JQKPZw9okxASllUwPQjfH5fESjUZ5//nkSCRUlQ6FQtMw+YQQUTXG5XKRSKVasWNHVqigUij0YZQT2YVwul+oJKBSKVum2RqAzp45WVVVxwQUXMG7cOCZOnMill17Kjh07GsnMnj2bwYMHM3fu3Eb5lmUxffr0+mPPOeccysvLm5xj7ty5DBgwgJUrV+ZNb4VC0f3olkagbuqoe9MmhJT1U0fzZQiEEFxxxRW89dZbLFq0iMGDBzN79uz6/XfeeSeffvopy5YtY+nSpdx7772Njp86dSpLlixh0aJFTJ48mZnOtNY6VqxYwUcffcSAAQPyoq9Coei+7BOzgzIpuvFGPF980aqM98MPEclkozwtFqPHtdcSfOKJFo9LjR5NzS23tKlDz549Of744+vTRxxxBI8++igA9957L+vWrWP+/Pl4vV6eeOIJrrrqKv76178yffp0NE1j0qRJ9cceeeSRPPjgg/XpRCLBddddx7333svUqVPb1EWhUChaY58zAjmRZQDazN8NLMvi0Ucfra/Yf/rTnzba7/f7+etf/9ri8fPmzeOUU06pT995552cffbZDBo0KO+6KhSK7sc+ZwRyaamXHX007k2bmuSbAwaw45ln8qrPDTfcQCgU4uKLL273sffddx9r1qzh6aefBuCDDz7gk08+4brrrsurjgqFovvSLccEamfNwgoEGuVZgQC1s2bl9Ty33HILGzZs4L777kPT2ner582bx/PPP8/8+fMJOLq+++67rFu3jmOPPZZjjjmGLVu2cN5557FkyZK86q1QKLoP+1xPIBfq4gYVzpmDa/NmzP79qZ01K6/xhObMmcN//vMf5s+fj8/na9exjz32GI899hiGYdCzZ8/6/CuvvJIrr7yyPn3MMcfwyCOPMGrUqLzprVAouhfd0giAbQg6K4jcqlWruOeee9h///353ve+B8CgQYP429/+1uax4XCYWbNmMXDgQM455xzAfgP4xRdf7BRdFQpF96bbGoHOZOTIkWxqZswhFwoKCti4cWNOsu+9916HzqFQKBR1dMsxAYVCoVDYKCOgUCgU3ZhOcQfpuv4QcAawzTCMMa3IjQXeBaYZhpHfuZkKhUKhaJPO6gk8DJzamoCu6y7gDuDVTtJBoVAoFG3QKUbAMIw3gao2xK4CngW2dYYOCoVCoWibLhkT0HV9AHAWcH9XnF+hUCgUNl01RfRu4JeGYZi6rrcqqOv6ZcBlAIZhUFpa2kQm5iwkr2hA0zQ0TaOgoKDZe7an4Ha7lX67idJx99nT9YPO07GrjMBRwFOOASgFTtN1PW0Yxt+zBQ3DeAB4wEnKysrKJoW1d7H3F0onsdP7eZP8nsmDOLNyYbvKao6qqiquvvpqysvL8fl8DBkyhDvuuIOSkpJ6mdmzZ/OXv/yFq6++mmuuuaY+37IsLr/8clauXInP56O0tJQ5c+YwZMgQwH5L2Ofz1b+FfP311zNhwoQmOliWhWVZhMNhmrtnewqlpaVKv91E6bj77On6we7p2L9//xb3dYkRMAxjaN22rusPAy82ZwA6i7LkkVR71mCJhqihmvRSljwqL+XXrSdQF0761ltvZfbs2dx1111A4/UErrrqKnw+X6PoolOnTmXixIlomsa8efOYOXMmhmHU73/ggQdUqAiFQpEXOmuK6JPABKBU1/WNwE2AB8AwjE4dB3iv6EaqPK2vJ2CSxCLVKM8izQ7PZ/yr5ActHtcrNZpjarp2PQGFQqHIJ51iBAzDOLcdshd1hg6t4cJLwCwj5toGQoIUBMzeuPDm/Vz5Xk8AqA8iN3bsWGbNmkVxcXGetVYoFN2FfS52UC4tdYCoVsGzfY7DJIELH9+tfIWgVZZ3ffK5ngDAc889x4ABA0gkEtx0003ccMMN3HPPPflUWaFQdCO6bdiIoNWH4ZFpIAXDI9M6xQDkez0BoH5dYZ/Px4UXXsjy5cvzqrNCoehe7HM9gfZwaHgGuzyrOTQ8I+9ld8Z6AtFolHQ6TVFREVJKXnjhBQ466KB8q65QKLoR3doIBK0+fGfHs3kvt7PWE9i+fTvTp0/HsixM0+SAAw5g9uzZeddfoVB0H7q1EegsOms9gcGDB7Nw4e6/x6BQKBR1dNsxAYVCoVAoI6BQKBTdGmUEFAqFohujjIBCoVB0Y5QRUCgUim6MMgIKhULRjenWRqBCE5xdEmSbJvJablVVFRdccAHjxo1j4sSJXHrppezYsaORzOzZsxk8eDBz585tlG9ZFtOnT68/9pxzzqG8vLx+fzweZ9asWZxwwgmcfPLJzJw5M6+6KxSK7kW3NgJ3F3h5z+vi7oL8Bo6rCyX91ltvsWjRIgYPHtzopa7MUNJLly7l3nvvbXT81KlTWbJkCYsWLWLy5MmNKvrf/OY3+Hw+li5dyuuvv84vfvGLvOquUCi6F/vcy2I3Fvn4wuNqUy4JfOR1IYVgfsjLZ562Y4iOTpncUpNos+zOCiUdiUR45pln+OCDDxDC7r307t27TX0UCoWiJfY5I5ArG10NnSDppPc3rbyfJ5+hpMvLy+nZsydz585l2bJlhEIhZs6cydFHH513vRUKRfdgnzMCubTUKzTBcX0KkE5rWgpBtQv+XBmjzJJ51SefoaRN0+TLL79kzJgx/OpXv+Kjjz7ioosu4u2336awsDCveisUiu5BtxwTuLvAS3ZVbzn5+STfoaQHDhyI2+3m+9//PmC7mXr16sX69evzqrdCoeg+dEsj8KHXTVI0nhGUFIIPvPnrGNWFkn7ooYc6HEr6ySefbBRKulevXhx//PG8+eabAKxbt47Kysr6RegVCoWivexz7qBcWFgZ6dTyOyuUNNjG5dprr+WWW27B7Xbzxz/+US0vqVAoOkxnLTT/EHAGsM0wjDHN7D8P+KWTDANXGIbxaWfo0hV0VihpsMNJP/PMMx1VTaFQKBrRWe6gh4FTW9m/ARhvGMYhwK3AA52kh0KhUChaoVN6AoZhvKnr+pBW9i/LSL4LDOwMPRQKhULROnvCmMCPgX+1tFPX9cuAywAMw6C0tLSJTCwW6zTl9lY0TUPTNAoKCpq9Z3sKbrdb6bebKB13nz1dP+g8HbvUCOi6fhK2ETixJRnDMB6gwV0kKysrm8ik0+lO0W9vxrIsLMsiHA7T3D3bUygtLVX67SZKx91nT9cPdk/H/v37t7ivy4yAruuHAA8C3zEMY0db8gqFQqHIP13ynoCu64OA54ALDMNY3RU6KBQKhaLzpog+CUwASnVd3wjcBHgADMO4H7gRKAH+rOs6QNowjKM6QxeFQqFQtExnzQ46t439lwKXdsa5O0K4QuPtuwvY/KGXixfuvl+wqqqKq6++mvLycnw+H0OGDOGOO+6gpKSkXmb27Nn85S9/4eqrr+aaa66pz7csi8svv5yVK1fi8/koLS1lzpw5DBkyhK+//ppLLrmkXrampoZwOMznn3++2zorFIruyZ4wO6jLqKv8P1sQQkowk/lZXKZuPYG6cNK33nors2fP5q677gIarydw1VVX4fP5GkUXnTp1KhMnTkTTNObNm8fMmTMxDIP99tuP1157rV7uxhtvxDTNvOisUCi6J/ucEVh0YxHbvvC0KmMmoXqji8g2Z90B2VD5P/GDkhaOgrLRKSbeUtOmDp21nkAmyWSS559/nieeeKJNfRQKhaIl9jkjkAs7VntI1Aogv8tKNkc+1xPIZOHChfTt25eDDz44vworFIpuxT5nBHJpqYe3aSy7u4AVC0JIq7Eb6IfP5He2aj7XE8hkwYIF9QHmFAqFoqN0y1DSBWUWk2bXcPk7FRxybgS3X+Ly5ncxGcj/egJ1bN26lXfeeYezzjorn+oqFIpuSLc0AnVkG4Oyg5J5K7sz1hOowzAMTj75ZHr16pUvdRUKRTdln3MHdYQ6Y5AvOnM9AbCNwK233po3fRUKRfdFGYFOoDPXEwBYunRph8pWKBSKbLq1O0ihUCi6O8oIKBQKRTdGGQGFQqHoxigjoFAoFN0YZQQUCoWiG6OMgEKhUOwlhCs0Xv2/IuZNyt8yk93SCJROOpei/7sdrWJ7p5RfVVXFBRdcwLhx45g4cSKXXnopO3Y0Dkcxe/ZsBg8ezNy5cxvlW5bF9OnT648955xzKC8vr9//2muvMWnSJE455RQmTpzIyy+/3CnXoFAo9hzCFRrPXeXiL8f1YcVTIbZ97s1b2d3SCHg/X03oqRfoc9yZnWIM6kJJv/XWWyxatIjBgwcze/bs+v2ZoaSXLl3Kvffe2+j4qVOnsmTJEhYtWsTkyZOZOXMmAFJKrr76av74xz/y2muv8cc//pEZM2ZgWVZe9VfYVGiCk90ptmmdH2hQ0f2QEtJxiO0U1GzWqFrnouIzNxuXeyh/08uahT5CR51HxRG/5/Gx8N5DGumEyFvI+zr2uZfFim68E88Xba9YKZIpAELznyU0/1nMshLMgf3B23IY6tToEdTc8vM2y+7MUNJCCGprawF7UZmysrJ2xyVS5MbdBV6WCcndBV5m1yS6Wh3FN4SUYKUgFROkYoJ0TJCOi0bpZrfbkKnbn7kvM4x9c4xnJWNYz0E8w/tM41V+Rg198nq9+5wRaC9C2oHjXBWVaNE4qTEj81p+PkNJCyG4//77ufjiiwkGg0QiER555JG86quAF0onUe7dweN8hgXMLxBsKphLSTrEYeEr0bC70Jq0g5HXp5HN5GXLSTRAyCyZRnmymbxsuQaZKJJdmmhGTrZyDifNNxFQPX9YaUjFW6mIW8rLPiarQrYSbhKRPvX7pdn+u+LyStwBiScg8fidbb9J0BumZ48IQXcYvztMQIvg18L4RAS/jOCVEbxWGK8VxZMO40lHcaeiuJMR+Bg82DHNjuMxjmZBvTHIF521xvBDwBnANsMwxjSzXwB/AE4DosBFhmF8lI9z59JS7z/gyPpt6fWAphGZ9j3CMy7FKsvfgAvkN5R0Op3mT3/6E/PmzWPs2LEsX76cK664gsWLFxMKhfKqd3dkuyZY4nPztLiHd+UYTGEH/rPwsoibwQ0LenStjs2Tgr6FHT4621g0b3hkE+OhZcpZ4I5JPBGBOwqeqMAdA1dM4IkIfAkLEe6BKwquKLijAldU4IqBVrcdBS0m0KJ2nhar+xYIZ1t0wBUiXRJCEvxAUEJAIgL2tugtIWDhKTSxvCncvgTFLruS9osIfhHGTwS/FcYvI/jSEfxmFF8qgi8ZwZeM4otH8MajeCMRPJEo3nAUTySKZ3sETySWs56m30c6FMR0PumCYKP9LkxcmBzPo/RlFXBv8wW1k87qCTwM/Al4tIX93wEOcD7HAPc5398YnV35Q0Mo6YcffrjDoaQXLFhQH0r6888/p6KigrFjxwIwduxYgsEga9as4bDDDsu7/vs6SeBDr4vFPjf/9ml87rgCC60DMWnsFnTLGL9In0dPmURIPxp+hPSh4QfpR5M+BH6QXgQ+hPRlfHsR0vlgp5FewIOQXsDr5HmcbzcCDxINS4BFw0eCkyfq80KFBdSEw83ICUwJVgKsmHA+YEXtbRkVWHH7W8YEVlRADKSzjzj2d8z5OPuFkyeiIGICERWIeAcqZyGxgmAFJWYQrIDEDErSQTB7SMz+YAYszCCkgxIzIEmH7O10AEy/hUcL4xZR/ITxyCg+K4zPjOA3I/iTEfypKKFohGA4SkFthFAkYn+HoxTWRiioilJQtx2O4k2lctLd1DTCBUFqC0OEC0JsLwzZ6dL+jfJr6/Kd76Z5djoSCpD2NHVFS3FA/Xbc60FqLtYOPpvX5Qz0dt/x5umshebf1HV9SCsiZwKPGoYhgXd1Xe+h63o/wzC2dIY+2SQPGkHyqEM7rfKHhlDS8+fP73AoacMwGoWS7tevH1u2bGHt2rUMHz6cNQtTjrUAACAASURBVGvWsH37dgYPHpxv9fdJJJLVnh0sDER50xfkI3d/4sKPJlMM4j0msYjhLGK5dhEfyQswnb9HcIvgmFsKqXzHYNS7P8AUEUx2YmoJTJHAFHH7mwSWSGKKBJZovTKREkh5kNFgkw/RIDIWgEgRIlKMiBZCtBCiBRAphGgIoiFHPsCueAFmxIsVDSCjPsyYDyvqw4p7MKMekLk1QDKl3H4Ltx/btRFocHO4AxJPX6tRXpNtf1O3SO9+xURrtuIlgteM4LUiuJNRtGgUEY4iIvZHizjpaBQRiaGFI4htMUQ4ghaJISIRRMRJx+I5//aW34cMBZEFQWQwiFUQRIZCyNLeWAVBvCW9iLg0akP2PjMUxCwIOi3zkL3tpFOhIKbfh9REhuEVuIBiAYVAH7KNdoPhllmG3UqDrElikWxk3OuGC5JeD2mXxryLf8AdN/wP3y4szusYVVeNCQwAvs5Ib3TyvhEjULnwyU4tv7NCSZeVlXH77bdz+eWXI4T9hMydO7fZNQdcW7bg+s1vKPL5CFxxBbEpU/J4hXs2aWLUuNdT7V7HNvdXvOv1sdwziBXa4VSK4QD0pJzD5FMcmvovY1NV9DX7U5weRnH6Dt4UIzFLfQQ3C074pZcDnnYjLHCl/Ax9ZkHLfudG/mZIxSXJGKRiZByjOR8X0mz/gL7wptCCcUQwjhaIIQJxtFAMArugxyZEMIIrGEYL1iJCEUQwWv8hY7vZTyBmywViaFLiDQv8tX4CNT78tV58tW58tR78tW68YTe+sAtvrYY3DN5dAs9G8IYl7rDEE7Zwh03c4TTucBpXJI2Wzm0Wm9Q0ZEEQKxR0Ku4QMhggPaAvMhSwK+9QAMvJlwVOuk7eqeytYF2lHwB361VdaWkp0crKxvca8DifpkoCpszKyD/Rg0by6LeO5OYbr6Sib28AFkjJjHCSMis/5+wqI9Bc37HZK9J1/TLgMrDj6JeWNm25x2K5+92+CTozlPSUKVOYkkuFnk6DlGhVVfT45S8pLCzEOvfcDulkT5ewwDTtctPphu3mvk0T0ZaM8+2Skt7JJCIH2cZ5KWoLqqnqtZ2q0kqqSnewo2wnO8p2sbbXfqzWTmENJ1POVEx8eK0Yh277gAtXv8Cp737MkR9up6jCjUjVlbsWzH8jU2me+mI/XrJuYROHkz2L+pkLSpq9RUKTeEPgCYI3AJ6gxBsEXxAKi+x8T1Da++r3p51t8Gbur0+DNyCdY+3jXG4An/MpBsDtcpEOR6A2DOEoImJvy3AtZngnMlyNrK1Bbq1F1tZCJAy1YUTYB2EfIuxHCwfRwnG0cAKtNokrlm7mKi0g4XwaSAUhVSBIFkpSBZAssAj3NkkMNUkWWiQLJMlCmfXdND9VYOel/dTXEEJquPDjdj4uvHZa1qX9uPHhwodbNqTtfb4MWV99OQ2yvvpyK12bodSTcWyDrIarY/+bPDBpxSUsFadh4q/PS4kEM/q8xsJUfpaX7SojsBHYLyM9ENjcnKBhGA8ADzhJWZllrcEeMFW0jIhGcU2fjnXnnXZlW1dRZ1fYGXmk0wjLsr9Ns1P1a609nAxC1QjYMRKqRsKOUc72CEgV2DIxelAem0B5dBL/DZzMDq0/ACPLV3Plv59k8pJ3OfHjFfjTaaTbDS4XuFyk3RZoGqbHx9fJI1mxYwIrto1junUex2PwKgObTMf7GcfiIYrX+XiI4urhQ/QtwerfD7NvX6y+fTEzPla/flg9e4LTeyOdtl0ade6PSBQRjiCiMbTtjrsj4rg/wk7acZvISIbrJMNt4s3xN5IuV1YrO4gMlmL1cwYjnXzLcZs0aVWHso8N2PczA4E9ButDYpHCFAl6lBSwvWqz4z7L+JCVTiQwE8kGF1uGnO1qizfKS4o4MVHdyCVn769zy7XDbdLC+1dCunFJn/3B17CdQ54mvbgcg9NYxjZmmXlaM3lflo3FdPkb6WPi50trLM3VhS3Rv3//Fvd1lRH4B3ClrutPYQ8IV39T4wHdllQKq18/pFMB4nLZFaKmNaoY6/JwuxvL1m1n5LdLNjPPOQaXix6lpVTV7CIS2snO0NdUhzZRHfya6sBXVPvLiXgrGq5BCgrMARSmR2BZE1gdO56P3SP4zN0DKyAo9knGJdJM2BVjfDxNf29fmDwFJk+hFqjNuB2WCRvf97L6ZT+rXg4Q3upC80iGfivBwNc/px8rOYYFvMcPeJ0rSRHCS4TQs9fj3rgR16YtaFsrENu2Q2UVYtcuPCs24F32GSKeQNRPvrQ/Eg00F0hRPy05F6xgoMHFEQpihQJYJT2R+/W33SGhAIHepUQEzbhH7O/6dDAIPm+DMepkBMJuuUsvQUoJWd/8ZFSJhUmdQUg6hiLTwNh5oSIvO2u31+dbIukYlCxZssaBRAKTJEmttlG+5Zwzl/Gh1pjeTJ5L+jl7+zsdvylZdNYU0SeBCUCprusbgZtwXGuGYdwPvIw9PXQt9hTR9s+fVLROVkVjDhhA1cMPd40uDikRptq9nhr3Oqrda6l2ryPi+5IqVmNqDYN8HquQ4vQw+qRPoDg+nOL0MBLWKD50DeN1f5A3fS52aRpCSg5LWVwdTjI+nubwlNnqA22m4Ot3vKx6KcDqV/xEK124/ZKhE+KMPD3O8IlxfEUSBjRMxzuBxzmRxxsKObvl8qXbhSzogVkSBK8H6XGDJgDpTHBPIhJxRDSCCNeipZP2Pnv4EJCYxUVYZWWY/fpgDhiA2b9fk56FzOxVAN7SUsLtaBV2JwSa7fKR/lbd9qWylMp459xDCzPDqGT3guL1higzz8rIK/e9xA7vChAWmvQyPDKNoFWWN/06a3ZQq85nZ1bQT1uTaQ9SSqSU9YOl3R0pJel4Q6VqBQLUzpr1jZzbwiTi2kS1e51T2dsVfo17PVHX1no5ITUKzEGUMore0WOcQdnhFKWHEbB6k0Sw3Ovin343iwvd/NdjuxzKTItJ8TQT4mnGJUx6tdGqNpNQ/paPVS/7WfNKgPguDU/QYtjJCUacFmPYyQm8IaeMeILAglcbHV/3RElgh3d/tD9f4bTInVZ2hnsEXzviuUiJqK7GtXUrrq1b0bZutQfzM9KeLz7H1UzlLv1+zD596o2Ca+hQQsXF9a4ns29fzD59wJu/+DKKjqPhQpMB3AQ6NH48LPoDnu1zHCYJBBqHhmfkVb994o1hv99PTU0NxcXFXa3KHsGuykq2vvYaUghkz55UX3993mcHJUVNVkW/zpmRs76RH9ZrFVOcHka/xDinoh9GUXoYRekhuPBRWlpKZY1d0W1wCf7ld7PY7+Ztr5uYJvBIydFJk+tr4kyIpzkwbbX5hmsqBuVv+ln1op+1i/wkajS8hRYHnBJnxOlxho6P4wk0yLs2bSH46DMEH38e187qRmVJrxc0QWTa90jnc0qxEMgePUj36EF61KiW5ZJJXNu2oWUYiHqjsXUr3k8/RXv1VYrjTadLmiUljXsR/ZrpVfTo8Y25hxQdI2j1YXhkGqtCj+W9FwD7iBHwer1s376d7du348oapDKBLS4Nid2q62dabY71p0SEhFZFwCrDJRvP8Xdt3oz0+bBKStA0Lf/B26RExOOIWAwRj9uDtEIgfT5kIID0+1ud7ialpKqqiooTTkAbN46CQw8lNmRIh1SxMAm7vm7kvqlz58Rc2+rlhHRRaA6iKD2M/onxjSp7v1Xi+MebEhbwjrD4Z7GfxT43X7rtIeIhaYtpsRTj42lOSKYJ5dB6SkYF61/3serlAOsW+UhFNfw9LEZ8J87I02MMPjGBO/OnlBLvsg8IPWzgf2UxAPFJ3yJy8TRKp11R/zKhdaHO9p+c32nvk7SJ14s5cCDmwIG05FkuLSlhx9q1TY1EhuHwfPIJrqxIttC0V2FlDWirXsWewaHhGUSC6/PeC4B9xAgADBgwgM8++4z169djZsyUWORz85nXhYk9C+WQpMnJidZnE1kixeehv1CU3p/B8dMa7Qv8859oFRVELr2UYDBINBrdbd1FLIZr3To869fjKi9HpFJInw9z6FBSw4ZhDhliV/45sCo4n5hrG6KnpGhkNTuP3AlAz+RBnFm5sNljEmJXM636ddS4y7FEsl7OZ/ak2BzGgMRJ9ZV8cXo4helBuFqaWpGBBL5wayz2u1nsc7Pc6yIl0gRdHk5IprksnGB8Is1QM7c+c6JWsG6Rn1Uv+Vn/bx/puEawxGT0WTFGnhFn0HEJXFmTvEU0RuDZlwk9vADPynVYPYoJX3EB0R9NxRzYD2j8MmGv0aOw9nR/u9PjS/fsSfrAA1uWSyRwbdtmG4ks15Nr61a8n3yCa+tWRKLpjBrVq+haglYfzkm/TqWV/2ex3UZA13Xh+PT3OMaMGcOYMQ2hiio0wbV9CkhkPJhfS8mNFeE2X7QYWPQZq0KPcmrF7QSshlZgqLKS4ptvZuuxx9Lr4IPbNU0rE9eGDfhffRX/woV4ly9HWBZmv37EJ00iPnkyieOO61Dr652i91kT+rhR5a1JL2XJI6hxrXda9OsbVfpxV8M1COmmKD2EovQwBsYn2pW9abfs/VavdutTpQne9NmhGZb43Gxz2a390SmT6eEkZ/pDHLC9ilzfqY7tFKxdaM/oKX/Th5kUFPQxOeTcKCNPizPwmCRaM109V/nXhB5+muCCF9BqwiTHjGTn3JuIfW8SBBob2M5+mbDL8Pkw99sPc7/9WpaRErFzZ0OvoqIi516F5fc37klkbItRo3AFAphlZapXsYfRkZ7AXMhjCLtO5O4Cb5NxmBQwt8DLnDZeux4Z/RH/Lfgba4JPckj4qvr85FFHAeD94AM4+ODclbEsPB9/jH/hQvwLF+JZbYe7To0eTfh//5f45MmkDj54t1tSh4ZnsDa0oPGpSbE69CSrCubX5/nNEorSw9gvfgrF6eEUp/enKD2MQnMQWvPvSOZEGvjYicez2OfmU4+GFIIelsX4uMn4hN3a7+sY4VJfIW2Z0egOjdWv2C3+r972YaUFRQPSHH5hhJGnxxhwZArR3MsGloVvybuE5i3A98bb4NKIn3YykUumkTzqUNVqbQ4hkL16ke7Vi/To0S3LtdWr+PjjJr2KujcuzNLSpu9TZPUsZHGx+n2+IdptBAzD2CsMAMCHXjfJrAdJCsEivwfaMAI90sPplziBVcHHGBP+n/q3BlNjxmD5/XiXL4e2IoPG4/iWLrUr/tdew7VtG9LlInnssVSffz7xSZNab5W1k4TYRXngn7gsP6bLuT4JBeZ+DI19j6L0/k6FPwyfzF84zE2aYInj4nnL56ZGE2hSckTK5NraBBMSaQ5JtT0Wk0ntVo01r/hZ9VKAr9/1Ii1BjyFpxl4eZuRpcfoemmqxjhA1tQSffpHQPAP3hq8we5cQnnEpkfPPxnJevVfsJu3sVfSMRomsXl1vJFxbtuDavBnPRx/hqqpqcmh9r6Jfyy/gmWVl0EzQNUX7aNMI6Lr+N8MwfuxsC+CvhmFc2uma5YGFlZFGaQn8sFeQj7wutmiCfm24hEZGLmRxr8vY5HuD/RJ2TH88HlKHHYb3ww+bPUZUVeF//XX8CxfiW7wYLRrFCoVInHQS8cmTiX/727bvNE9IJNu8H7A6OJ/ywEuYIk6v5Bh2aREskcaFn9MqX8jrjII48J6vobW/2pm+2de0OD2WYnwizbhEmh7tdBrWbHKx6mW7xb/pAy9IQckBKY69Ksyo02P0Hp1utXHoXrOB0LwFBJ55CS0SJXnEwey89jZip09sdbEgRSeR0auQpaVEnV50ExIJXBUVTXoVdT0L74cf2r2KZLLRYVIIrFx6FUVFqlfRCrn0BPav2zAMQ+q6PqwT9elUBHB7dYyTywq4sdjPX3e2HnNoUHwSQbMvK0OPNhgBwCosxP/uu+D3U9a/P5FLLwUpbf/+++/b/v2+fYmdfbbt3z/+eGhnJNG2SIhdrAs+y+rg4+zyrMJjFTA8qjMich4l6TG8U/R/eZtSJoF1bq2+0n/H6yKuCbxScmzCZFo0zkmJNCNymL6ZTeU6eO/xAla97GfLx7avuGx0ihOvrWXk6XFKR7QREsQ08S96i9BDC/AtfR/p9RD73iQil5xD6tBW3BmKPQefD3PQIMxBg1qWkRJt584Wp8q6Nm7E88EHuHbubHKoFQg0GIUWehZ04+nluRiBSl3XLwWWAccBTUeE9iKGmJIZtQnmFPlZGE0xqZWZQhoeDoj+kE8Lfk+Nq5wicwiB557Dv2SJXdlJiXvTJop//WsAUgceSPiqq2z//iGH5L31IZFs93zIqtBjlAf+iSnilCYP4/hdv2No7Ew8smFhmd2dUlYrYKlT6S/2udnoTN/cP21yXjTJ+ITJcck0wQ5MEdix1s2ql2xXz7bPPYCXvocmGX9dDSNPi9FzaNtxcMTOaoJPvUDokadxf70Zs18fan75U6LnnYVV0jSqqmIvRwisXr2wevUifdBBLcvF4w29imZewPMuX46roqLZXkWf3r1bnyrbty+ysHCf61XkYgQuxI7i+VNgFfCjTtXoG+DycJLnAx6uL/ZzwvZwq/PQR0R+yH8K/sDq0GMcVXMDhXPmNHmAAMw+fdi+aFGn6JsQ1awPPMfq0OPs9PwXtxViWPQHjIyeT0mq+cHp9k4ps4DPPA2t/Q+9LtJCUGBJTkyk+WnY9u0PynH6ZiZSQuVKNytfCrD6ZT+Vq2zXTP8jk5xxR5qB43dQvF9uAdDcn68mNO8pgs+/gognSBx3JDW/mkF88vg2wwUrugF+P+bgwZitrbHhRNfN7FUU1tQQX78e19atuL/+Gtf776Pt2tXkUCsYbNn15LxvYfXps1c9i7lomgC2Yr93dR9wOLC8M5XqbLzAHbvifL93iN8V+ri5lUHikNWPQfHJrAk8xWE1P8e1udlgp2jbtjWb31HsVv9HrA49xgb/PzC1OCXJQzh+12+dVn/Bbp+j0llOcbHPxRKfmx3O9M2DkyY/CSc5KZHmyGT2Gls56i+hYoWnvsW/c4MbhGS/Y5NMvLWaEd+JUdjPst8YrmzDAKRS+P/1b0IPG/je+xjL7yN69mlELppGevQBrR+rUGQjBFZJCVZJCWlnSnmwtJTq7OnesVh9r6K5noX3/fftXkXWamRSCKw89ioCzz1H4Zw5uDZvpqx/f2pnzcprBIBcjMBjwBLgXMMw7tF1/XZgYt406CLGpkzOjyT5W8jLlFiKQ1Itv/k7KnIhXwZe5svAi/Tv3x93M2sFmK2Eam0PSVHD+sBzrAo95rT6gwyLnc2I6PmUpg7JuZwKTTDNneIeTdS/E5GiYTnFxT43K7z2gG4v02JCIs34hMn4RJreHVysQlqw+WOPHaDtZT/VX7sRLsmg45McfXmYA06NE+qd+xvW2vYdBB9/ntD8Z3Ft3UZ60ACqfzWD6DlnInsUdUhHhSJnAgHMIUMwW3vj3rLsXkUzrifX1q24v/qqY70K5+NbtoziWbPQnDVT3Js2UTxzJkDeDEEuRqC3YRj367qeryUt9xiuq4mz0O9mZnGAFysjLd6MvskTKEoNY2XoEcbMmkXxzJn1PwrsfoA2iaTS8wmrg4+xIfACaS1Gr+QYjts1h/1jZ3Wo1X93gZdlQnJbkY+xSZPFPjdLfW7CmsAlJUclTWbW2AO6Y1JWqzH9W8MyYdNyrzOrpyEk85BxCY6fUcsBk+IEerXPqHg+/ozQQwsIvPgaIpkiPv5Yds35PxLfPqFJ7HqFokvRNKzSUqzS0vpeRXOIWKxhEDu7V1FR0XKvgqYrcGmxGIVz5nyjRmCbruvTgICu62fxDS0B+U1QLOHX1XGu6BVkXsjL9EhTXz/YcdFHRS/k/eIb2ThtNvDb+u6ZuRvds6Sorff1V3k+x20FGRo7y/H1H9JizJ22+NIleCLkxRLwbNDLs0EYkLY4M5bipESaExJpinbjnW8rDV85IZnXvOInst2FyyfZf0KckdfFGTYxjr+4nSdIJAn88zVC8xbg/eRzrIIQkfOnELlQxxw+pOPKKhR7ADIQwBw6FHPo0JaFmulVFLfQuGzJLd0RcjEClwCXAh9hrwDW3DoHey3fjad5Op7it4U+ToulGNCCK2RY9Ad8VHg7K4OPUjLld8SmTHH82e0LGyGR7PB8yqrg42wI/J20FqVnajTH7prN/rEpeGVhh68lBjwW8jKn0Efa8TW6pOSMWIp7d8U7aFJszCSUL/Wx+mU/a17xE9vpwhOw2P/kBCNPi7H/yQl8BR2wLJu2UviHB+wInpVVpIYNZtdtM4n94HRk4e6PeygUew3N9CoK7rmnU93PkJsRGGUYxp90XS8DLgKGACvzpkEXI4DZ1XEm9C7ghmI/D+2MNVtZ+mQx+8emsC7wLEfV3IBPtm9ecUqEWR94nlXBx6jyfobbCjA0dqbj6z+sw61+sF/eeiLk4U8FPipcGlpGjH1TCF4NeNhek2j3wtTpOGx408eqFwOsfc0JyVxgMXxSnJGnxRk6IYEn0IGKX0q8739C6KGn8Pzr33gsi8TEcey6ZBqJccfsc1PwFIqOUtsJ7udscjECdwEnA7dgDxDPw35fYJ9hP1Py89oEtxX7eSWW4jvx5t8dGBn5EatDj7Mu+DSjI7m9NF3p+Q+rg4+xPvC80+o/kGN3/cZp9e/e4GYCeDLo4Z5CH1tdGscl0hyaNFnsd5Pp2LKwxwhmtxEqA5yQzG/YLf51i/wkI3ZI5gNOjTPq9BiDx2WFZG4HIhYj8PwrhOYZeL5YjdWjCOt/f0ylfgbmoAEdK1Sh2IepczPnw/3cErkYgaCu6z7AZxjGk7qu/yRvZ9+DuDSS5LmghxuK/ZyYCFPYTAO3JD2G3skjWBl8lAMjP26xLLvV/3dWBx9nh/c/uCw/Q+NnMiJyHr1TR+xWqx/syv8pp/Lf4tI4JpHmjztjnJA0mVQaahIvKSkEH3jdzpHNlFcrWPe6E5L5jYaQzAd+P8bI0+MMOr5pSOb24PpqE6FHnyH45N/RdtWQOvAAdv3uBmJnnUrJfvth7umhmhWKLiQ2ZUqH3c+5kOsU0ReAm3Rd9wMb8q7FHoAH+O2uGN8tDfHbQh+3ttBqHhW5kLd6Xs1W79v05vuN9u1wf8aq0Hyn1R+hR2oUx+y6jf1jU9rtPmqOJGAEPfyhwMdmt8ZRyTRzd8YYlzTrzUpmvKTWHpr4LsGahX5Wvxxgw5KMkMznxBhxWoz9jkmi7c77LlLifet9QvMW4H/tTdA04qdOIHLJOSSPOVy5fBSKPYQ2/+aGYdwL3JuRdVEuBeu6firwB8AFPGgYxpys/YOAR4AejswswzBezk3tzuHwlMVFkRTznHcHDm/m3YHPQvcD8GrpNF4FcMZnXJYfU4vjkn6GxL7LyMj59E4dudutfrDn9z/tVP4b3RpHJNPcuSPGtxJmm6WHKzTevruAzR96mfZkFatf8bP6ZT9fLrVDMhf2T3P4jyKMPCPOgCOTzYdkbgciHCHwzEuE5i3As7Ycs6Qn4asuIXL+FKwBfXevcIVCkXc65d1mXddd2IbjFGAjsFzX9X8YhvFFhtgNgGEYxn26ro8GXsYedO5Sflkb518BNzN7BPjX9qbvDpQlx7LTswpEYwPhkj6Oqr6e/aNT8hamOQ08G/Bwd6GPr9wahyVNbt8R5aREOqfK/81fu3j/kT5YaZCm4E+H9bFDMg9OM/ayMCNOi9PvsJZDMrcH17ovCT28gKDxIlo4QvLQ0ey8+9fEvnsK+PMbPE+hUOSPFo2Arus9DcNoGpIvN44G1hqGsd4p6yngTCDTCEigbmS0GMjfxNfdoFDCrdVxpvcK8mDIy0+y3h04NDyDNaGnsJyhV3NLX6K33ETwrR9x4MKmbwV2hDTwXMDDHwp9lLs1Dkma3LI9yridJqmwoCrsIhnRSIYFyXDdtyAZ0ait0PhyqZcdazxIC5ANNfyxV4YZeXqMsoNaD8mcM5aF7423bZfP4neQHjex755C5OJppI5ox4I7CoWiy2itJ3C7rus9gTXAQmCZYRhtxPWtZwDwdUZ6I3BMlszNwEJd168CQrQQikLX9cuwA9hhGAalpZ2/4PcFSP5upbmryM/5gUKGNGp3l3KwdRGfVPyL2ttmEZt3McJyE016KC1tfDvTSUjUOp+wIF5Tt20PxmZux2sgHoYNYVgXlpi1gom1gh61Amrg4wh8JHOpuetGtJvKTvmdD3JeyLEVdlWjPWzguv9RxIavkP37kL7pGqwf/xB3n960Z/TD7XZ/I79pR9nT9QOlYz7Y0/WDztNRSNn6PG9d1w/Aduscj12zvAc8ZxjGxlaOmQpMrlt8Rtf1C4CjDcO4KkPmGkAYhnGXruvHAX8DxhiG0VpwGbk5j2/KtcYml2BC7wKOTaZ5tMp+d0BasOkDD2/+3sfXywJgCbAaKv7eB6YatcrNZG7NbZdPQoGktggihRJ3oWRQwKR/wMJXYOENSbwFEm+hha9uO2TZ33XbhfZ2olbwzh8KWLEghLRopMMvN+3evXOvXEtonkHg2ZfQYnESRx9G5OJpxL9zUodXeOqsGQ/5Yk/XD5SO+WBP1w92T8f+9stlzVZIuQwMr8HuDfzZ8fUfA/TDbt23xEYgc925gTR19/wYONU5xzvOzKNSIL/hODtI/7Tkmv+kWLAqyN/e9hL60E3FZx4SNS2PnPYYlG6osAsyKunMijxk4SuQeAsl7pDFa6Uu7u7lY43HxaiUyTW1Cb4TT3c4lo83KJk0u4bjZ4T56P7eLH9Ea2IM2kU6jX/hm4QeegrfOx8i/T6iZ51K5CKd9JhRHdRSoVDsKbRrYNgwDBN7cZm2WA4coOv6UGATcA7wwyyZr7BfQntY1/UDAT+wvT36tJfMmTIXL2ywqFJC9Vcutv7H43y8bF3hIVGt8R1gm1fS98AUB54Zo+eQNJs/9rB2kR/LSiOT3vpypjyU2xCKBbzkd/P7wiCrPC5Gs10SmQAAFPVJREFUpEzur4py+m5U/tkUlFlM+aPJET/ZzrK7C+zlGtuBVrWT4OPPE3z0GdybK0gP7EfN9f9L5Jwzkb3ytzymQqHoWjpldpBhGGld168EXsWe/vmQYRif67p+C/CBYRj/AK4F/qrr+s+wHdkXGYaxG2HNWqau8v9sQQgp7Vbxyhf9VKzwsOVTLxUrPMR32dWv5pGUHZhi1Bkx+h6aovaINOef6OOH6RSXVMcbytymOS1tmXNL2wJe8buZW+jjvx4Xw1Mmf66KckY83a5F2NtDQZnFpNk1Oct7/vNfQg89ReAfCxGJJIkTj6bmtpnEJ45TETwVin2QXMYETjUM4xVd14cDPwMWGIbx5jeiXVPaNSZQV/mveCpkT5O0GlfUmlvSe1SKvoem6Huw/V06MtUkLMJNRT7+FvLy98ooR6UaFkApLS2l/Iuq+pZ2Zu+ikdLAq343dxX6+MLjYv+07fb5XqzzKv86/XLyISZTBF5aRGiewf+3d/dRVtX1HsffwxkGmIEUGEwHRhkJMnyKInVh10cyMAVX5ldQl6h5Wdo1r1dJ08p86N646s1oZRZh0b0+4Fd0iRmJJpKWaZr4/FAG5AyggAIJDMw5M+f+sc/gmTPnDPNw9pwN+/Nai8WcfX5nn4+zZH/3/u3f77cr/vIyLZUDaDzjlKDLZ8yBu/58b2QskajnA2UshqjngxLeEwBmAY8A1wA/B34EfK5bSXrZQ18bTP2zFW2GSbY6d/F6hh3U/oCfz5Uf7mBx/75ctXd/Hlm/tc2Ttjo6004Dj/ULDv6vViQYmWpmzsZGTmtMhnMJ1kV93ltP1Z0PUHnn/STWvU9qZC2br7+CbXYq6Y91fzVTEdl9dOZYNCgzu7c5cwN36y4/ERFTbt/I0z/MjJRphubkR8Vgv8OTHXyyrao0/Ofm7Zw/tJKfDazgki35nzvQKg083i/o9nmpIsEBqRZu3djIl6Nw8E+n6fv8y1TNv5cBD/+OslQz2084mk0XTGPHsUdBn2LdlRCR3UFnjknfB74HfC8zgueZcCMVT+tZ+oTLtnxUDLo5UuakHSlObkxy66B+nNqY5IA8D1xPA09kDv7LKxLUplr4n42NnN6Y7NZzeruj+qTpNH32MLZcdiFkjynevoMBi5YED2155U1aPjaQredPY+uMM2iuqy28QxHZo3WmCNQRrOvT2hlfvIWse0luMejqSJlWN2zezpP9BnL1XgP4wabGnc/wHdaS5sl+CW4Z1I8XKsoZnmrh5k2NnLGt9w7+rSpe+yt9/7aSqnt/TcuMM+h72kn0X7w0eGjLxs0kPzmKTd+/msbTTyZdVdnL6UQkajpTBFYAN5nZXsCvgYXu/kG4scLR1ZEyufZrSfPNf27n23sP4LK9+/N0WZor9+rPxkSwVHNNqoXZmxo5c1uS7pWZHkoFE7rLmoKurj4/v4vquXcCsP34CWy9+FyaJozXCp4islNnJostAhaZ2RDgp8AtZrYEmOPufwg7YNScuy3JgsoKnupXDmXw2IC+DGtu4b82NTJtW7IYizJ0LJUi0bCW8pX1JFbVU77yHcpX1lO+qoFEfdvH0JVlRn6ly8ro07idpqN3i/v5ItKLdlkEzGwywWSvwQRXAjMJhhotAo4JNV0EJYCRqWZe7RvcQE2k05zUmGTGts7faN6lZJJEw7uUZw7ywcE++JOoX01Z6qNhqi2VA2geWUvyU5+g8eTjGfTj+TvfS1f0hT592HrmlOAegYhIjs50Bx0KXOPubU4zzWyPeuB8Z73Xp4zHBvTd2aXSXFbG/VUVzNrS1LVn+CaTJOrX5j/QN6xpe6CvqgwO9AePofFLJ5I6cH+aR9aSqqulZdjQNt07g348f+fBv2WGsf6ic2jZJ9oLY4lI6XSmCNwLfMfMBgIzgPPdfZ67vxVutGga8sWz+OGEcVx/7SW8u98+QAfP8E0mSbyzJutA3xB036yqJ1G/lrLmtgf6VF0tyUMPovHUL5Cqq6W5rpbUyPYH+o40HTyGpvGHs+WyCxky9iBaIj4BRkRKqzNF4A7gEuAn7t5sZtOBeeHGiq4DXn6T8978OzPmP8Avzz+d2VfNpP+OJipfX0HVy39r01efaHi37YF+YFVwoD9sLI1Tvkhq5AiaD9w/ONBXDynKDdsNj97T432ISHx0pggk3P1NM2t9HfvZRP0zo28uvv1uLr797jZzsVsGVZGq25/k4QfTeNokUiNrSdXtT3NdLS1DB2tkjohESmeKwFIz+ylQY2ZzgMdCzhRJ5W++TeU9i9psaz2cp8vKSI4dzQf3/ISWIXvrQC8iu43ODBG90cwOAR4H3iJYAjoWyj7cwoBFj1K54EEqlr9Gum/bX1e6ogL6lO0cfdMydHCJkoqIdE9Hzxi+Dzjb3Zvc/VXgVTP7JMGjJo/orYC9Lp2m4s8vUnnPg/R/+Hf0adxO8qBRbL7uChpPn8y+h07U6BsR2WN0dCWwAFhsZl9x901m9kXgRuDc3onWu/qs20DlfQ9TueAhylf8g5aBVTR++WS2TZ9K8tMH7+zi0egbEdmTFCwC7n6/ma0FfpOZITwBOMndN/VaurClUvRb+kcq71lE/8f/QFlzMzuOHMeHXz+f7adMJF05oN1HNPpGRPYkHXUH3UiwMOZq4HLgduByM8Pdr+2lfD2SvaJmy8eH7dye+Ps/qLx3EZX3PUxi3fs07zOULRedw7Yzp9I86oASJhYR6V0ddQf9LvP348BtvZCl6LJX1Nx2+mSSY0Yx4LdL6ffsctKJBNtP/Dzbpk9lx/EToG9vr/cpIlJ6HXUH/b4nOzazScAcguV25rn77DxtDLiO4IrjJXfPfRh9j7WuqFl594OUAS2DBvLhpRew9Txrc3UgIhJHoUz8MrMEwdXDZGAsMN3Mxua0GQ1cDRzt7gcDl4WRpVXryP2yLVup+POLKgAiInRuslh3HAG87e4rAMxsATAVeD2rzb8Ct7n7RgB3XxdSFkAraoqI5BNWERgO1Ge9bgCOzGkzBsDM/kjQZXSduz+SuyMzm0mwfDXuTnV118bkpysqINGHlnPPoPmaS6nYdx+GdGkPhZWXl3c5T2+Kej6Ifsao5wNlLIao54PwMoZVBPKtm5C7znI5MBo4DhgBPGVmh+QOQXX3ucDc1n1s6MK4/OqsMf07J3QVcVx/dXU1XcnT26KeD6KfMer5QBmLIer5oGcZa2pqCr4XVhFoALKfXj4CWJOnzTPungRWmtlbBEXhuWKF0Jh+EZGOhVUEngNGm1kdwTyDaUDuyJ8HgenAfDOrJugeWhFSHhERySOU0UHuniJ4BsES4I1gk79mZjeY2ZRMsyXA+2b2OvAE8A13fz+MPCIikl9ZOt2FRyKWXnrNmtxepdKJej9i1PNB9DNGPR8oYzFEPR8U5Z5A3jXuY/+AGBGROFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGCsPa8dmNgmYAySAee4+u0C7rwD3AZ9z9+fDyiMiIu2FciVgZgngNmAyMBaYbmZj87QbBFwKPBtGDhER6VhY3UFHAG+7+wp3bwIWAFPztLsRuAnYHlIOERHpQFjdQcOB+qzXDcCR2Q3MbBxQ6+4Pm9msQjsys5nATAB3p7q6OoS43VNeXh6pPLming+inzHq+UAZiyHq+SC8jGEVgbI829KtP5hZH+BW4Lxd7cjd5wJzW/exYcOGYuQriurqaqKUJ1fU80H0M0Y9HyhjMUQ9H/QsY01NTcH3wuoOagBqs16PANZkvR4EHAIsM7NVwFHAQ2Y2PqQ8IiKSR1hXAs8Bo82sDlgNTAPOan3T3TcDO69rzGwZMEujg0REelcoVwLungIuAZYAbwSb/DUzu8HMpoTxnSIi0nWhzRNw98XA4pxt1xZoe1xYOUREpDDNGBYRiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRhTERARiTEVARGRGFMREBGJMRUBEZEYUxEQEYkxFQERkRgL7fGSZjYJmAMkgHnuPjvn/cuBC4EUsB64wN3/EVYeERFpL5QrATNLALcBk4GxwHQzG5vTbDkw3t0PAxYCN4WRRURECgvrSuAI4G13XwFgZguAqcDrrQ3c/Yms9s8A54SURURECgirCAwH6rNeNwBHdtD+q8Bv871hZjOBmQDuTnV1dbEy9lh5eXmk8uSKej6Ifsao5wNlLIao54PwMoZVBMrybEvna2hm5wDjgWPzve/uc4G5rfvYsGFDUQIWQ3V1NVHKkyvq+SD6GaOeD5SxGKKeD3qWsaampuB7YRWBBqA26/UIYE1uIzObCHwLONbdd4SURURECgirCDwHjDazOmA1MA04K7uBmY0DfgZMcvd1IeUQEZEOhDI6yN1TwCXAEuCNYJO/ZmY3mNmUTLObgYHAfWb2opk9FEYWEREpLLR5Au6+GFics+3arJ8nhvXdIiLSOZoxLCISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjKgIiIjGmIiAiEmMqAiIiMaYiICISY6E9Y9jMJgFzgAQwz91n57zfD/hf4LPA+8CZ7r4qrDwiItJeKFcCZpYAbgMmA2OB6WY2NqfZV4GN7v4J4Fbgv8PIIiIihYXVHXQE8La7r3D3JmABMDWnzVTgV5mfFwInmllZSHlERCSPsLqDhgP1Wa8bgCMLtXH3lJltBoYCG7IbmdlMYGamHTU1NSFF7p6o5ckV9XwQ/YxRzwfKWAxRzwfhZAzrSiDfGX26G21w97nuPt7dx2c+E5k/ZvaXUmfYnfPtDhmjnk8Z45GvSBnzCqsINAC1Wa9HAGsKtTGzcmAv4IOQ8oiISB5hdQc9B4w2szpgNTANOCunzUPADOBPwFeApe7e7kpARETCE8qVgLungEuAJcAbwSZ/zcxuMLMpmWZ3AEPN7G3gcuCbYWQJ2dxSB9iFqOeD6GeMej5QxmKIej4IKWNZOq2TbxGRuNKMYRGRGFMREBGJsdCWjdhTmVktwXIX+wItwFx3n1PaVPllZm4/D6x291NKnSebme0NzAMOIRgafIG7/6m0qdoys/8ALiTI9wpwvrtvL3GmXwCnAOvc/ZDMtiHAvcBIYBVg7r4xQvluBk4FmoC/E/weN5UiX6GMWe/NAm4Ghrn7hnyf7w2FMprZ1wnut6aA37j7lT39Ll0JdF0KuMLdPwUcBfxbniUxouLfCW7MR9Ec4BF3Pwg4nIjlNLPhwKXA+Mw/wgTBKLdSmw9Mytn2TeBxdx8NPE5pB1nMp32+x4BD3P0w4K/A1b0dKsd82mdsPcH7AvBObwfKYz45Gc3seIKVFg5z94OBW4rxRSoCXeTua939hczPHxIcvIaXNlV7ZjYC+BLB2XakmNnHgGMIRojh7k2lPDPsQDkwIDOPpZL2c116nbs/Sfv5NNlLsPwKOK1XQ2XJl8/dH82MGAR4hmDeUMkU+B1CsIbZleSZtNrbCmS8GJjt7jsybdYV47tUBHrAzEYC44BnSxwlnx8S/A/dUuogeRwIrAd+aWbLzWyemVWVOlQ2d19NcKb1DrAW2Ozuj5Y2VUEfd/e1EJykAPuUOE9HLgB+W+oQuTJD11e7+0ulztKBMcC/mNmzZvZ7M/tcMXaqItBNZjYQuB+4zN3/Weo82cystS/xL6XOUkA58BngdncfB2wlYvNEzGwwwRl2HVADVJnZOaVNtXszs28RdKfeVeos2cysEvgWcG2ps+xCOTCYoBv6G4AXY9FNFYFuMLO+BAXgLnd/oNR58jgamGJmqwhWcD3BzO4sbaQ2GoAGd2+9glpIUBSiZCKw0t3Xu3sSeACYUOJMhbxnZvsBZP4uSjdBMZnZDIIbnWdHcGWAUQTF/qXMv5kRwAtmtm9JU7XXADzg7ml3/zPBVX51T3eq0UFdlKm8dwBvuPsPSp0nH3e/mszNNzM7Dpjl7pE5i3X3d82s3sw+6e5vAScCr5c6V453gKMyZ4mNBBmfL22kglqXYJmd+XtRaeO0lXnA1FXAse6+rdR5crn7K2R1oWUKwfhSjg4q4EHgBGCZmY0BKshZdbk7NGO4i8zs88BTBEMGW/vbr3H3xaVLVVhWEYjaENFPE9y0rgBWEAwbLMmwxkLM7HrgTIIujOXAha035UqY6R7gOIIzwPeA7xIcHBzYn6B4neHuJVmMsUC+q4F+BE8QBHjG3S8qRT7In9Hd78h6fxUlLgIFfo//B/wC+DTBcNtZ7r60p9+lIiAiEmO6JyAiEmMqAiIiMaYiICISYyoCIiIxpiIgIhJjmicgsWRmxxIMu+sDNAPfcfenzWwz8ALQl2CJgxpgort/O/O564Bl7r4sa1+VBMt0jMl8bq67/4puyqywekJEJyLKHkZXAhI7ZlYNXA+c5u7HESy41ph5+xV3Px64gmDtpc74LvD7zL4+D6zsYcS9gS/3cB8inaIrAYmjk4E7W9d8yqwGuzynzYt0frXLCe5+VWZfaeBJADP7EcHEnn8CZxMsNjjR3b9tZudlPruMYALQBwRLF0wFZgJfMLNlBBO/1nf9P1Gkc1QEJI5qCGZ8Y2ZnAV8jmMU6K6vNMcBb3f2CzAqPVe5+TGbhuYsovNrsYIK1iqYDpxM8UHz/KC31IXsudQdJHK0lKAS4+93AOXy0ENehZvYEQWGYDWwnWPKgVX8+6jrqyCiCewsQrDn0CdquU5+9+uPr7t4CrCboChLpNboSkDhaDCw0M3f3zbT9d9B6TwAAM2sCxplZ6wnTZ4Cbcvb3tJmd7e53ZRYYPJpgPaSTMu+PJ3is4mZgv8y2Q4GXMz/nFockwZPMREKnKwGJnUwf+/XAIjNbCvyE4LnR+dq+T7Bs+JMECwcuzLM42/XAsZk+/D8CozJL/Taa2VPAWcBPCQ76NWa2GBjWQcR3gSFmtjDz/GCR0GgBORGRGNOVgIhIjKkIiIjEmIqAiEiMqQiIiMSYioCISIypCIiIxJiKgIhIjP0/6JL6spkrtlsAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEkCAYAAADHDTFTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZhUxdW437q9T8/OwACD7DsoCAIuiAqCy6fRaGg1LhgTE03008R8xl+ixmhi/Iwxq1FjElBwoVUQl88EURF3QET2fZVlYBhm7+32rd8f9/ZMT0/PBt0zDdT7PP1Mz13qnltd99SpOqfOFVJKFAqFQnFioXW2AAqFQqHoeJTyVygUihMQpfwVCoXiBEQpf4VCoTgBUcpfoVAoTkCU8lcoFIoTkE5X/kKIB4QQWzpbDsWRk/gbCiFuFELoHXTtDrvWiYQQ4lwhhBRC9EpBWTuEEPemQi5F6wghZgkhFrV2XKcr/3QghMgXQvxRCLFWCFErhNgvhHhVCDH0KMv9hxBicYrEPJ6ZC5Rk6rWEEBMtxdY3LRKlASHEvUKIHR14yU+AHsDeDrymogM5LpU/ZqPtB9wPjAEuAbKB94QQBem+uBDCme5rZDJSyoCUsvR4u1YyjtffWkoZllLul1IanS2LIk1IKTvsA7iAJ4FK4LD1/bfAlrhjNOBh4CBQA7wE3Ano1n4BvAUsAxxx5ywCPgbszVy7CyCBS1uQzwE8DnwNhIB9wEvWvges8+M/N1r7JPDfwAvWvb1sbR9iyVpjfd4ABsZd70ZAB84CVgB11n2NTZDrfGA1EARWAedY17yujfV+GfClVX4FsBQ4NW7/AOBloNw6ZhVwibWvAJgD7AICwEbgLkDEnf9Awm94Y+z3Svd9tvdaQN8kv+PiuPOvBlZaMuyw2oM3bv9i4J/AQ1b7ONjGOh4IvGrtOwwsBE5OuJexwL+BKqu9LAUmWPeUKPMDzdTHudb+qcASS551wAUJx7XWNmPl9GpnuaMwRw1BYBPgs+rx3rhjsoE/AXuscr4Erojb7wPCwPi4bTdYZZ6a7L6TtQngPMz2FAA+AHoCk6zr1WLqjJK48/oB8zBHO3XWudcnlL0Y+AdwH7Af85mZldBGxgBvAwesul0GXJhEH71syVFqtadngUUJx90ObLDufTPwC+J0HObzOTeunF8nKydpPR2pIj+SD/AHq0IuA4YCj2E29HjF8ROrwq4HBln/l9P4Ae9qNZzHrP9/YR3Tu4Vr97ca7xktHPMTTMV/LtAbGAfcGddgn7cadnfr47H2SeCQ9UMNAAYDHmAn8C7mQz0WeB/YAjjjGqmB+TCdbdXJQmBr7AfGnNKosxrccGAK8AVtVP6WnGHgbqtxDwO+jaV4rP2lmA/CREv+y4CL4/b/zGrQ/YDrrN/nO3HXeIDWlX9a7rO91wJswDescsdZ91cYd+5hzLbXH1NRrAJmJzz81cBTlpwnt6GOizEVxZPW8UOAv2C2ma7WMSMwH+AXgdMw2/41wBmYbekRYDcNbS+7mfo417q3r4ALrXKew+x08q1j2tI2Y+UkKv/Wyt0D/B9mJ3AGpuKrw1L+mMbb+1Y9TrTq+ftW/U2Ju49nrN8sF/N5qgZub6OeibWBxZid5xhMxfmhte104FRMpTo37ryTgR8Bp2A+B7djdSIJv38Fpi4batVFBfCrhN9gBmb7GIypkMPA4LhjXsfsHM+zfvuZmIbjorhjHrB+p29itquLMY2wh+KOmW/9bpOtcuZg6tTMUf6AF7P3ujlh+3IaK4498TdnbXuJuAfc2nae9cP8EogQZzkkubYN06JaCmgtHPcn4D3irNqE/f8gzkqM2y6BfyZs+67V6IvithVjWiE3xDVSCYyJO+Z0a9sQ6//fYFpOtrhjLqTtyv9U69i+zex/CFMxeVsrK6Ge3klopK0p/7Tc5xFea2KyOrGuf0vCtknWsQXW/4sxH1ot7pjW6vgB4LOEbQJTucWMi9mYijVp+wTuBXa04bc515Il3pLubm27oB1tM1ZOovJvqdzvYRoGBXHHjLSOuTeunCCQlyD3v4DX4v73AGsBP6al/lpr956kDYyO2/Y/1raxcdt+DJS1UtYC4Jm4/xcDqxKOeQr4tJVyvgJ+YX0fZMkS39k5MDv3Rdb/WdZvlDhiuAGosL4PtMqZGrffialDW1X+HTnnPwBz2ueThO0fxb4IIXIxh2afJRzzaWJhUsr3gd9jPlj/kFLOS3ZRIYQN00IZjNlwW5rDnInZ+28RQjwlhLiyHXO6SxP+HwGsk1KWxclcijltMiL+VjAbRow91t9i6+9wYJmUMhp3TJP6aIFVwH+ANUKI+UKIO4QQJ8XtHwt8IqWsTXayEEITQtwjhFgphCgTQtQAtwB92iEDpP8+23OtJgghumLe0+NCiJrYB3P4DuaDFuOLhHbUWh2PA8YmlFuNOQU1yDpmLPBuK+2zPayMfZFS7geiNNx/W9tme8sdDqyXUh6OO2YNpkUbYxyWgkqoj+toqAuklAHgKuAKoBtwUxvuOR6JOW0TY7/1d1XCti6WjkAIkSWEeMQKFCm35LqYpm19ZcL/e4hrW0KIrkKIvwkhNgghKqxyRsSVM9z6W6/npJQRTEM4xgjMDvDVhHp6Gsiz2musnE/iygljjrZaxd6Wg1KEsP7KozzGPND8wc7CbHwDhRBCWl1f3DFOzGH0KOBcKeXXLZUppVwphOiHOa95HqaF+5AQ4nQpZVUrIiVTnsnuQyRsNxIUXmyflmRbS+UmRUoZFUJchPnQnQ9cCTwihJgupXyzDeXdBfw/zCmxFZhK68fAf7VVBou03ucRXCuR2L47MKclEolvO41+6zbUsYY5xXJbknLjFeOR3m8ywkm2tVTX0LRttrfctpyvYd7zuDaUPdH6m4/ZAZS3UnY8SduApWQbbaNB7/wOc8rzLswpoVpMAzOvFTkljet2Fua08d3AdswR1UuYnV7iec0RK2865kgzkfI4uY+IjrT8t2BW2lkJ28+MfZFSVmI6W85IOOb0JOU9gGnNn4VpNf0sfqcQIgtzXm04MElKuastQkopa6SU86WU/4059zoM0/GIJb+tLeVgDllHCCGK4mQqtmRe28YywHSqjYtZJxaJ9dMi0mSplPJhKeUkTOfXd6zdXwBnCSG8zZw+Cfi3lPKfUsovpZRbiLPQUshR32c7iD289deyLN/dmFNDW5J8gi0V2EodL8e05PYkKfegdcwXwPlCiOaeyfa0vdZIVdtMVu5wIUR+XLkjaKw8l2Mqc3eSutiVcN7jwA8wR18vCSFcRyFbW5gEPC+lnCul/ArYhlknR1LO36SUr0spV2MGBvSP27/O+lvfvoUQdkw9FmMt5vRY/2baY5SG3+rMuHKcJO9Ym9Bhyt+aVngK+LUQ4htCiCFCiEcxnSbx/B64UwhxrRBikBDiTmAacb2kEOIcTGt0hpTyc+Bm4EEhxOnW/hzMYfgQzKGjIYTobn08zckohPgf67ojrBHATZgji1jPux0Yau0vaqUxvoAZsTRXCDFGCDEWs/ffg+mdbyt/wxxSPimEGCaEOA9zfhzaNkI6UwhxnxBighCitxBiCqZDK9YA/4bZDhYIIc4SQvQTQlxiWbJgTgWcK4Q4TwgxWAjxa0wnWqo5qvtsJzsxHYIXCyG6CSFiyukXwH9bMfUjrTZ6uRDi6ZYKa0Md/xVTcb8mhDhbCNHXWmvwGyFE7MF9FLNTfV4IcZoQYoAQYroQIqYgtgPdhRBnWG0v6yjuP1VtM1m51cAcIcQo63n8F6blG+M9zOCCeUKIbwoh+gshxgohbhdC3AwghHBb8rwupfwn5vNdgBkgkk42ApcJIcYLIYYDf8echj6Scq4VQpwshBiNOfsQb2hsxoyuekIIcY51racxnduxEUoNZtTjw0KI26y2OEIIcbUQ4n+tY7ZgGrhPWM/ncEy/ZE6bpGzNKZDKD+Yc1tOYw75Kq3KThXr+FiijIdTz50C1tb8Q00L7fULZT2M+IHk0OKeSfW5sQb4fYFpgsVC7ZcBlcfsLMSMZKuPLohmnJGbn8380hNO9SZJQz4RzelnlnRu37XxgDWb46SoaHKFXtqHOR1gy7LfO34k5vHXGHTMYM2qgEtPJ9BUN0T55mE63KszolCcwncQ74s5/gDaEeqbjPo/iWndjKrsojUM9L8f0NdRZ97wSuD9u/2JMH1N767gPZrTYwbhj5gD94o4Zj6kYazGV6OdY4Y6YDsEXMIf7ktZDPXslbNeJa/u03jYbldOOck+16i+E6dC+mqahnrHope2YI5r9mAEZk639T2Ja3Xlx50zEDOz4RhvafLI2cB3mAC1+29XWPcUizk7CNBprMa31X2GG9ca3j2S/fyNnPKbf8BPMTm8H8EPrd50Vd0wX4BWrnR0AHsQM/Xwjoezv0hB6fNhqE7cmlOO3ZD6IqTvbFOoprAIyGiHEv4BRUsqxrR58AiCEiE0rnCLNYeVxyYlynwqFNd25AXO0c1dHXLMjHb5tQgjREzOu9X1Mq+xSzPCmZM6yEwIhxK2Y1vheTB/GH4DPjzeFeKLcp0JhGTbdMMNYczCDKPpiOos7hIxT/pgKfzrm1IIb01F8q5TymU6VqnPpg+njiC0WegfLwS2E+DnmtFhSpJTZHSFgimj2PhWKGFbIY3M8LKV8uMOEOXJsmNNFAzGns9ZgLibrMEPnmJj2UTSPEKIQ0xeRFGk6hRSK4wYhxMAWdpdLKdsTEnrCopS/QqFQnIAcr1k9FQqFQtECSvkrFArFCYhS/gqFQnECopS/QqFQnIBkVKinz+f7F+Zbtw74/f6RrRzbG3MlWz5m2NQ9fr///9IvpUKhUBz7ZJrlPwtzSX9buBfw+/3+UzGXaf8tXUIpFArF8UZGWf5+v3+Jz+frG7/N5/MNwMwn0xUzD8bNfr9/A2ZOjlzrsDzUi6YVCoWizWSa5Z+MvwO3+/3+scBPabDwHwCu8/l8X2MmqLq9c8RTKBSKY4+MVv4+ny8bM1f1yz6fbyVm5s4e1u5rgFl+v78X5tt2Zvt8voy+H4VCocgUMmraJwkaUOH3+0cn2fddLP+A3+//1OfzuYEizPSoCoVCoWiBjLaU/X5/FbDd5/NNB/D5fMLn842ydu8Cpljbh2EmgTuYtCCFQqFQNCKjcvv4fL4XMV8aUQSUAr/EfPPPk5jTPQ7gJb/f/6DP5xsOPANkYzp/7/b7/Qs7Q26FQqE41sgo5a9QKBSKjiGjp30UCoVCkR4yyeGrhiAKhULRfsSRnJRJyp+9ezNjnVZRURFlZWWdLUaLZLqMmS4fZL6MmS4fKBlTwdHI17NnzyO+rpr2USgUihMQpfwVCoXiBEQpf4VCoTgBUcpfoVAoTkDS5vD1+Xz5wD+AkZiRPDf5/f5P21OGlJLa2lo6ei1CIBBA1/UOvWZbEULg9Xo7WwyFQnGMk85onz8B//b7/d/y+XxOIKu9BdTW1uJyuXA4HKmX7hglFApRVlZG165dO1sUhUJxDJMW5e/z+XKBScCNAH6/PwyE21uOlFIp/gRcLheVlZWsXLmSXr16dbY4CoXiGCVdln9/zCRrM61EbF8Ad/j9/tr4g3w+3/eB7wP4/X6KiooaFRIIBNIk3rGNw+Fg1apVjBo1CiGOaH1H2rHb7U1+z0wj02XMdPlAyZgKOku+dCl/OzAG8yUsn/t8vj8B9wD3xR/k9/v/jvmyFgCZuNAhU+fdOxvDMIhGo+zfvz9jR0aZvrAGMl/GTJcPlIyp4Hhb5PU18LXf7//c+v8VzM4g7XjmzaPb+PH06NWLbuPH45k3L2Vll5eXc/3113P22Wdz/vnn873vfY9Dhw41Oubhhx+mT58+PP744422G4bBzTffXH/u1VdfzY4dO+r333TTTZx//vlMmzaNb37zm6xZs6ZFWVRCPoVCcTSkRfn7/f79wG6fzzfE2jQFWJeOa8XjmTePvLvvxr5nD0JK7Hv2kHf33SnrAIQQ3HrrrXz44YcsWrSIPn368PDDD9fvf+yxx/jqq6/45JNP+Oijj3jiiScanT99+nQ++OADFi1axAUXXMDdd99dv++Pf/wjixYtYuHChdxyyy3cddddKZFZoVAokpHOaJ/bgeetSJ9twHeOprDc++/Hsa7l/sP5xReIcGO/shYIkH/XXWS98EKz50WGD6fqwQdblaGgoIAzzzyz/v8xY8bw3HPPAfDEE0+wdetWZs+ejdPp5IUXXuD222/nmWee4eabb0bTNKZNm1Z/7tixY/nHP/7RcH+5ufXfq6qq0DS1BEOhUKSPtCl/v9+/EjgtXeUnJdxMQFFz248CwzB47rnn6hX6j370o0b73W43zzzzTLPnz5w5k6lTpzba9tOf/pQPPvgAKSXPP/98ymVWKBSKGBmV1bMl2mKZdxs/HvuePU22R0tKOPTKKymV595778Xr9fKd77R/QPPkk0+yefNmXn755UbbH3vsMQBeeeUVfv3rXzN79uyUyKpQKBSJHFdzC9X33IPh8TTaZng8VN9zT0qv8+CDD7J9+3aefPLJdk/PzJw5k/nz5zN79mw8CbLG+Na3vsUnn3xCeXl5KsRVKBSKJhxXyj9wxRVUPvooekkJUgj0khIqH32UwBVXpOwajzzyCKtWreJf//oXLperXefOmTOHOXPm8OKLL1JQUFC/vba2lj1xI5aFCxeSn5/f6BiFQqFIJcfMtE9bCVxxRUqVfTwbN27kL3/5C/379+cb3/gGAL179+af//xnq+fW1NRwzz330KtXL66++mrAXK375ptvUldXxw9+8AMCgQCappGfn8+sWbMydgGXQqE49jnulH86GTJkSCMLvT1kZ2fz9ddfJ93XtWtX3nzzzaMRTaFQKNrFcTXto1AoFIq2oZS/QqFQnIAo5a9QKBQnIEr5KxQKxQmIUv4KhUJxAqKUv0KhUJyAKOWvUCgUJyDHlfIv2jaNnutLmnyKtk1r/eQ2kM58/jEef/xxSkpK2LBhQ0pkVigUimQcV8o/7BmLxNlom8RJ2JOa5KLpzOcPsHr1alasWEFJSUlK5FUoFIrmOGZW+Obuvx9HqJX3wcgwEEnYqOMIraHLzm81e1rENZyq7p2bzz8UCvHzn/+cJ554gunTp7cqi0KhUBwNx4zybxPCSdTWDVv0AAKJRBC1dQXhbP3cdpLqfP6PPfYYV155Jb179065rAqFQpHIMaP822KZA2iRUoq3ngEyBMJFWf9/Y9i7pVyeVObzX758OStXruTnP/95qsVUKBSKpBxXc/4AhqOY2ryrkAhq865Ki+JPdT7/zz77jK1bt3L66aczYcIE9u3bx7XXXssHH3yQctkVCoUCjiHLvz3UFN2JI7SJmq53przsWD7/2bNnH3E+f7/f3yhX/2233cZtt91W//+ECRN49tlnGTp0aMrkVigUiniOS+VvOIo51PfVlJebrnz+CoVC0dEcl8o/XaQrn38in3/++RFdQ6FQKNrKcTfnr1AoFIrWUcpfoVAoTkCU8lcoFIoTEKX8FQqF4gREKX+FQqE4AUlbtI/P59sBVANRQPf7/anJrqZQKBSKoybdlv95fr9/dEcr/jqtlLe7XEmddiCl5aYzpfOECROYNGkSU6dOZerUqSxevDilsisUCkU8x+W0z1fZf6TU+TlfZf8xpeWmO6Xz3//+d9555x3eeecdzj333JTKrlAoFPGkc5GXBBb6fD4JPO33+/+eeIDP5/s+8H0Av99PUVFRo/2BQKD+++e591PuaCWlMxAlTJlzBQjJRu9sDjnWYKPlrJ6FkeFMqOrclM7tQdM0NE2jqKgIh8NxRGWkG7vd3uT3zDQyXcZMlw+UjKmgs+RLp/I/y+/37/X5fN2Ad3w+3wa/378k/gCrQ4h1CrKsrKxRAbqut/uitbb4VbSSWtvX5Eb7t7uc1kh1SmegPr/PuHHjuOeee8jLy2v22oZhUFZWlrHKv6ioiMTfM9PIdBkzXT5QMqaCo5GvZ8+eR3zdtCl/v9+/1/p7wOfzzQfGA0taPqt52mKZ12mlvFp8BghpbhCSsK2Sc8r+RpaR2uyeqUzpDDBv3jxKSkoIhUL88pe/5N577+Uvf/lLKkVWKBSZRFTi/XsQx9+24v2Rk9qb3WATHXb5tMz5+3w+r8/ny4l9B6YBa9JxrXi+yv4jEtlom8RI+dx/qlM6A/WvbnS5XMyYMYNly5alVGaFQpE52LZFKbqompzfBxHlBjmPBSm6qBrbtmiHyZAuy78YmO/z+WLXeMHv9/87Tdeq54DzCwwRbrTNEGEOOJen7BrpSOlcV1eHruvk5uYipWTBggWMGDEiZTIrFIpORkq0wxJtv4Ftv6TglhpEXcMkhRYAx/ooRZdXU7oqv0NESovy9/v924BR6Si7JS4rW5jW8tOV0vngwYPcfPPNGIZBNBpl0KBBjaKIFApF5iICMaVuKnat1MC2z8BWatQre1upQYJdanEI+DVwH8IoRB9i6zC5VUrndpCulM59+vRh4cL0dlwKhaKd6BKtTFpKvbEir1f2pRKtUjY51cgCo7tGtFgjPM6GUSyIdtfMT7GGc2WEnN8F0WrnYM6Iz8bw3kHd1al/33hzKOWvOKGYVuRlbFjnzpowmRv8p0grUiIqJbZSGafU46z2/ZbVfkAijIRTbWB0MxW5PsBG+EyNaHdRr9SNHuZfmQOI5p23RVedjQjFDwXeQKt9g/z/cRK48tO03HYiSvkrTijWOm1sdmjM9TqZYejcogmKjaaWm+IYJSixHbAUeUypVx4kf0etabVbyl4LNj3VyBf1ijwyzIFRbFnq3UW9FW8UiZRE5Dzi/ZSpoV9zCv/GSYAwHlZxEQuz7+VGOqY9KuWvOO4JAZvtGmsd5nxq2LLI/q4Z/LM4m6lBnYcqg/RUnUDmYki0Q5by3mc0WO2lcVb7fgPb4aa/oXQHcVrTLuFRNoxpjvopGCNmtXfTwNNxYZb7y3sSJAc7QSK4sBMkSA6lh3oAeztEBqX8FccNEjioCdY5NNY5bKyz21jv0Nhi19Athf/d7ll4S5uG5z5VbHB4WwVTQjrnBHUKVD+QcqQEGQUjCkZUIKPm/7IKU6HvM7AdMLCXGtgPWp8DBo5DBo5yA5EQBSkFhPM0wvkaNfl2QqfYCOZqhHLtBHI0gjk2Ajl2HAU5VFfV1l/TMEBGBMYOMLaCjAoMHaRhylX/XRfWNpC6wIhts+5BRoV1Lw3fZdy9GdG4MnTrGOs7QA4H+Zgb+ITrOZPZ5JLaXGStoZS/4pgkjGnNr3OYCt5U9hqHbA2KvUQ3GKZHmRbUGR6JMixi8GppdtLyvKUa81125mU50aRkbDjKlJDO5KDOcN2gOZswXqFJw1IclgIwH35zu7S+G4ZA6o2Pb0557M8SVFa421aGYSmoOGUUr2hjCq2RwkpQaDGF11wZTRRaFDRpJxLpllRGEZFkR3VydJ1cQydP6uShk0ek0V83RpN6rUOjCgeVuKjETiUOKrBTZf2txEG1tGNUCKhoS4tpOXxSs0uEDTRb7G/j78Im0eygaQ3fhWadZ/3VbKDZwe42mi/DBsIGa/x2/sW/6q//Co+05SZSilL+ioynLGbN222sdWisd9jYYteIWNa8W0qGRAymBXWG6Yal6KPkx1nvoQgs29ZyJMXVJdkITRIVgoiA/cBLEmxRcEXAEQXNUu71it5I91RB4RGf2aCwJMLeWBk1UliWYkqmsDS7WYbNJc3vGmg2A49hkB3WKUDgqg6SHdbxBnW8wShZ0ShZIR1PsKlSNzQIZNsI5toI5tjYk+cklKcRyrMRKrARLtCIFGgYWVojOVw2SXcb9LRHEFq4XtEKzVLEzSpuSZeuhVRUljdsiztPs+qio1njz+r4iyaglL8iY4gAW+KtebuNdQ6Ng3HWfPeoqdynWNb8cN2gn240achSwro9dpZscPHhBhdfrHPSf6vGOS1cf69TEIpqGMKcUjAESE0iskHPkhhZEryQ4zHo7jHo54lSkmXgzTLqLboGBSMtKzHeOmyw/hItQS3hu7BBYVE+VVWH6xV3I4WV5PhYGTGr9EgQAWnNqSc4Tevn15uPWY92sRyjQzWi3e1UddeaOE2NAgGa2WEKwGN9rBKsT2rJK4KIs2lHdKJzXCv/Uq2cP2a/zBfOjSwse7z1E1qhvLycO+64gx07duByuejbty//+7//S5cuXeqPefjhh3n66ae54447+MlPflK/3TAMfvCDH7BhwwZcLhdFRUU88sgj9O3bF4BgMMgDDzzAhx9+iNvtZuzYsTz66KNHLXOmUq4J1lqKfp1lzW+2a/XOWJeUDI4YnBfSGR4x6qdtCmXzk/H7KzRT2a938eFGFwerbBRWwdn7o9y4VTSzyKaBJz7bRyAs2HNYY+9hG3vLbew9bGNPuY3dh+1sr7BxoNyGHrZBEPNzGGwOSdeCKP0KovTKiVJSEKWkMErPAvN7z4IoXnf7nQhFRZKysvYnN0yKLtEOyoYwxljMeqLTtKqFmPXuZsx6tLuoV+o5Qwsod1eZDlNnxzlMj3W8XaPUHmy6oMvb9dhP79CpxJT+XO/7SAzCIjUPUCyffyyt80MPPcTDDz/M73//e6BxPv/bb78dl8vVKNvn9OnTOf/889E0jZkzZ3L33Xfj9/sB+M1vfoPL5eKjjz5CCMHBgwdTInNnowNb7VqcJW8q+9I4a744ajA8YnBuMMwwPcrwiMGAJNZ8IrVBwaebnfXW/aZ9ZobToqwoUw2dXpsNQusc2Fwawy4PMObGWp67uGuLZXqckoHFUQYWJ38IpYTDtYLllXYWVTtZVuVga4Wd/YcEBw7Y+XKTg9BhgZSNFWF+lkHPwig9882OIbFzKM6P4jiSxZ2xmPXExUcxq91S7NrB1mPWQ2dpVkijsCJhTAUvc5pX6tlFHqJltUcg+InNbStL6793VtbRY0b535/7T9Y5drR4TJgIX9sOcsBWAUjin79vdbmv2fOGR/ryYNV3W5UhXfn8a2treeWVV1i+fDnCsny7dm1ZSWUihwWWcrexzabzZZGXTQ6NkHVPTikZpBucHWfND9cNurQxxDJqwFc7HfXW/RfbnUSiArdDMn5AiG8NDdBzncbe1z3U7FP5pRYAACAASURBVHfh6qVzxi+qOPnqOrIKTc13tBaXEFCYLZmWHWEaEQACAj5y2nnPbeddl509QoNyQd/9kiH7JMX7QRzS2FtuY89hG8u3O6mobTwvowlJcZ5hdghWxzCsGHpU2+kdidAjoJN3ONqg4EvbELPew5xuiQxz1Mep18esd9cwuqQmZl1xbHLMKP+2sMnxNdWijmZDM1JIKvP579ixg4KCAh5//HE++eQTvF4vd999N+PHj0/fDRwFUWCbXWNd3LTNOoeN/fHWvDQYGpHcVBtmmKXoB+oG7X37wM4yG0vWu1iy3sXHG11UBsxrjDwpzM2Ta5g4NMRJlZK1c7xseDyHTRFB33OCXPDbCvpPCaEl6Pl0WFweCVNDOlNDOhLYaNfMjmCwnUUjbUSFIN+Icl4wzA9DEc6p0/HukRzeDNXbILwL5D4D+1qDrEMG+ZU6XWuidAk37ZCCNkFZjp2qAjuhrhrRkRq2XgJ3H8gdAPZegmixBm6l1BUtc8wo/7ZY5ge0w9Z0z3sYCdM9rxx6KKXypDKffzQaZefOnYwcOZL77ruPFStWcOONN/Lxxx+Tk5OTUrnbS4WA9ZY1v95S9hsdGkHLmrdb1vyZIZ0REYPhujk3P6ywC2XlVe2/Xp3g442msv9wg4udZWYT7Vmgc9HoAJOGhZg4JEyuXbLuNQ9f3pnLZ2ucOHMMTr2hljEzaikc0HHzpolo1ZKRpRFO2Rfirv0GkVJJ6UFJ7UGw7zMo3hul+/4oDh36xZ0nNTC6WguOBpkO08qSfHZpYfa4HeywO9gkHGwJOthTYWdvuY3SKs2cXtqH+fkMCrxRehYYlBTq1pSS+b1ngUHPAp3iPAN7x+UOU2Qwx4zybwvdjAIervo+d9ZMb7YTSAWxfP6zZs064nz+c+fOrc/n36tXL+x2O5dffjlgTicVFhaybds2Ro3qmOSoUWC7TWuImXdorLfb2GNvuL8uUYPhusGMOGt+kG608pLMlgnr8MW2hnn7r3Y6MKQg221w5uAQN0+u4exhIQZ0iyIEVOy08eUfs1n1UhbBCo2ioRGm/baCEVcGcHrb6FQ9kpdohM20Adr+5E7TmONUSzL9XZRrpQ0o1jh4lp0PTnKzrJeDz/s42NvTRqS7YGSe5LxolLNDOrHbKCoqpEtZGV2QnEIYc3VDA5Eo7K9ocErviTmoD9vYfcjO55sbRkoxbJqkOK+xzyH2PfYp8MqW0tIojhOOK+UfI7ETWO7ckLKy05HPv7CwkDPPPJMlS5ZwzjnnsHXrVsrKyuojgZrjgHaYv+bOb3c0U1XMmrdr9Vb9BrtGwArBs0nJQN1gfDjK8LpwfaRNN0Me9YyalLBpnxmCuWS9i8+2OKkLadg0yal9I9x5UQ2ThoUY3Tdc7wCVBmx738WXs7xsfc+F0GDwRUHG3FjLSaeH26WobNuiFNxSi31bFBGAnMeCZL0cpupnHrCJJk7T+miYsiRpA5zUz6NHhtuITnY0OErjnKYyq0FAGzDc+lykRXnfJXjXbWeBy8EcTeCUkglhM5T1SmSLUf4OG5zUJcpJXZof6dQERX3nEOsYYt+/2unk7ZU2wnrjCvQ4jXpHdLxzOv7j6bjkk4o0IWQLoXMdjNy7t3FOi+rq6k6f9ohn48aNTJ48mf79++N2u4H25fMfOnQovXr1qr+nWD5/gJ07d3LXXXdx+PBh7HY7P/vZz5g8eXLSsrZs38qf187irRmbMZzmyGbP3nlNjjOAHfHWvKXsd8dZ8wWWNT8sYkbZjLCs+fZ1a02Jn08/UKnx4QYXSza4+GiDi/2Vplbv301n0rAQk4aGOGNwiFxP47YYrBCs9mfx5bNeDu+w4+0aZdS1dYy+rpacHkcWt118SgXa4aaRL4lEiyzlXdyQtTHmKI0WC4weVsx6ikzkCLDMaeNdt533XHY2WT1fX93sCKaEdE4PRY/6d0nEMOBQjdaoc4jvLPYetlFa2XSeqEu22Qn0626nyFtndhaFDR1Gt1wDWycsnkrGCfAO3yNqhEr5H4Ms2bWM+1Y/ztYZIaRlgf24agYG/ajVenJQK2SX3ctGu506y5rXpGSAboZUDrfCKYdFonRPgTWfSCAsWHegiDc/D/Hhehfr95pu3gJvlLOHhpk0NMTZQ0P0asZiPbDWzopnvax91YMe1CgZF2LMjXUMuTiA7Sgtzi7fqsb1adNpwMgQjYpHvWZK3q6i02PWd9kEn3fJ53U9yCcuO0EhyDIkE0N6fdqJjkpEF9bN6aU95U07h9IqJ7sOQnWwsaa3a5Lu+QkhrQnf8zwdM72klH9yjstpn+OfrsDZQF+gF9CfP+T2jNtfBazFbXxNiV7GSXo1A6JRekfz6RHtQs9oET2iXSg0uiDaHX/TlKgBa3Y7zKicDS6Wb3MS1gVOu51xA8L8/PIqJg0NMaJXhOZcJNEwbHrbzYpZXr5e6sLuNhh+RYAxM2opHpk6n03dNU4cq/RGc/OGF2p+5CZyWuY8Dr2jkjGGjenlAQICPnHaedcKJV3oMX+zYdZK5/NDOqeGo2l7mJ126F0UpXdR0846priqAgnTS3Ejhy+2OXmzwkYk2lhHeV1Go86hR2yayZpa6lEQxX30zVPRDJnT2hXtQAA9gCswExav5idVeXSLlpMt9xEW+9hnK2Of7RB7rb9vOQ9RodU0KakomtfQIRhd6Bnt0qiD6B7tgitJB7H7kK1e2X+00VUftz6sJMJ3zqnlkgkuhnUrw+Ns2Tqt3q/x1fNeVs7JovaAjfw+OufdX8kpV9Xhzk+9ZRuc6iTv3gDE50y3CYJTM3cS2yNhimXxS8yEdu+6zM7gqWwnf81xkW9Izg3qTAlFODcUpbCD01PneiS5Hp2hPZN31IYBB6u1Bue01THEVlGv/drBwaqm00tFcSumexQ0dVR3zTGaNSgULaOU/zGNAOkF4eWumiiQZ32GJj26TgTZqzV0CLG/+2yH2GUv5TPbWiqThKsURfMoruqNc/UEAl+NonTNIA6XmlkSi/LDTDm5jnOHRjh7aIiuueZkumkRJldAUsLXS52smOll09tujCj0Py/EmBsr6H9eKK2JtmSuYP/6/DgZM3c6IBkCGKwbDNbD3FobpkrABy5zgdl7LjuvZTkQUjImEmVyUOf8oM6IFrKSdhSaBsV5BsV5Bqf2jSQ9JhhpOr0U6xy2lNr5YL2L2lDjxuGwSXrkW47ouFFDSZz/Qb2xLTlK+R+j2ACXNOgTWYNkNvBAq+dkSTcDoyUMjJY0e0ytCJidgTzE5zs0lq/LY8vanqzd2ssMRncHYOQXcMlSGL2Usl47eFXAkmg+PeNGDAO1PuR53HEjiEJknYN18zysmOXl4HoHrjyDsTfVcuoNtRT067zY/GOZXAmXBnUuDeoYwCqHxnsuO++6Hfwu183vcs30GZMtp/HZIZ3sjHHzNcbtgL5do/RtZrW1lFAVEI2d03Hfl2118vphG3pCptVcj6RHfteG0UNC59AjP4rzBNSEJ+AtH/sIJL3YyjNluxggBtAWxd8aUsKW/XaWbChiyfoSPt3spDakoQnJqD4Rrr2wlklDQ4zpFybs6ME+20T22oayr6LxSGKbfS8fuVZTrdWBFc2at/kkTnniSkbMugRnZTahk7+Gv3xJzpX7qHbls8Uwp5qKo4U4VJM8YjRgdMRgdCTMT2rCHNQE71vTQ295HLzodeKICyWdHNQZEO38UUFbEQLysiR5WTrDeyWfXooacKBKa+R3KA9ks3WvboW3OjhU03R6qVtuNHnnYI0kio7D6SX1pB2DFMrD9OEt9mUdYkDgt0dcTlm1FYJprabdV2GFF3bVuWJ8gElDQ5w5JER+VmNT0Sk9DNJ7MUjv1WzZ9gIP7756gPUzC6h8rxvYowQvX8PmWxeydeJS9trLqNECjc4RUtDNyK8fLZi+hy70iBbRM1pET8PsIOyoJaptoash8QUi+AIRIsByp80aFdj5VZ6bX+VBX91gcjBSH0rq7myhjxKbBj3yDXrkG4ztZ04vFRV5KCs7XH9MIAz7Kho7pWPfN+218/5aF4FwY03vtEvTCZ3feNQQP82U3c7MrR9vdHL937oQighcjh7M/uEhzhrSSurZFHJchXpOK/IyNqxzZ004LS/lTldK5927d3PTTTfVH1tVVUVNTQ1r165NKsfqXUvwr76P/jfsYFr1c/QMT0K0wX4LhGHZVhcfWI7adV+bjtz8LIOzhoTqY+6TRXW0lUC5YNXcLFbNyaV8hyC7e5TR19Uy6tt1ZBc3Dq6vErWNfA97bYfYl+CTqE3IWqZJjW5GflznUNTkb7FR0KYOoqioiLXlm1Ka9juVpNMnsdsm6juCj512gprAY0gmhvX6UUFJG56hY8Fv0l4ZpTTTjMR3DomO6v0VNqJNppeMpiGtcQvlusdlbv14o5MZTxY26mQ8ToNnby1vVweg4vwtSnrm4pRm3PpVteGUdwKHDx9m/fr1jVI6V1RUNErpvGzZMh5//HFuv/12pkyZUp/wzTAMFi1a1Cil89tvv12f0jme+++/n2g0ym9+85ukcqze9SH+1ffS74ataC5Jrt6ffoHL6Be4nHx9YP1xhmG90GS9myUbXCzd4iSkCxw2yWn9w/XK/uTekaNekLN/tYMVM72sX+BBDwr6n21w8rUVDLowiO0Iw/UkkipRxz5bmdkx2BKc1ZrZYdQ100Ek6xhiowkJ/KPrWzyrvV2f9jvZQrnOpKMUawD41NUQShpbBBgLJZ0c0hnbTCjp8aj824IeNaeXko0eYquoD9c2NkCEkBTnGvQoiNanMUnE5ZBs+9O+NstxQsT535/rYl0bEp7HXgYy2+tkttdJt6ikV7Tl/DPDI1EerAq1Wna6Ujo3kj8cZv78+bzwwgstSGJ1aAI06cAd7cJX2X/kq5w/wL4plC3/IZtWncOn6/Mpt+Y3h/aMcMOkWiYNC3H6wDBZrqPvFPUQbHzLw4qZXvaucOLwGIz8Vh1jbqxl2Nn5lJUlyTXcDgSCPOklT/cyVO+T9BiJpLLJCKJhJLHesZN3XV8Q1MKJJyKAJM/fCYcHmBwylfyvMd+mliyU9ByrIzgvpLc5Dffxit2GlSzPAJJHL9W/GKjczt7DGntifw/bkyp+gFCk4xpkWpW/z+ezAcuBPX6//5J0XisRaXUCpTao0zRGRlL7GrdUpnSOZ+HChXTv3p2TTz652XMrDvZhyby7mf2fq7jm7hvoETiXvV++xeKNgt37zMC27IJ9jBzzb84ZonPZwIH0ySlotrz2UrVHY+UcL1+9kEVdmY2CfjpTflXJyOl1uPM6VikIBPkym3w9m2EtdBAVooZ9tkPcVvAHNtl3I0WjSH8AHsidyQXB8YwLDz1h/QoCGKQbDNLD3GKFki6JCyVdYIWSjrZGBd8SBiWYzmZFY1p6MVD/O3okVfQuR8c9P+m2/O8A1gO5R1tQWyzzkp4Nl3FKiUbD9E+3FFsqqUzpHM/cuXO5+uqrWzw/9pYoPeLmuYfnAwKP0+CMQWFuOrOSsSN2YB/wEjuyFnDYsZ7FUqN7+Cz6111O7+BFuGReu2WWEnZ94mTFLC+b/+NGGjDw/BBjvlNB37PTG5t/tAgEBTKHAj2Hlw49EJfxVRIWDVbbs963eSb7DfKNbKYEx3JBcDznhkbjlZ4WSj++yZVwSVDnEiuUdI2jYVTw+xwXjwmdbsXZjUJJc07sQUGbmP3DQ83O+XcUaZvz9/l8vYBngd8AP2mD5Z+yOf90Kn0wUzqvX7+eWbNmtTuz58yZM3nxxReZO3duo8yeAPv372fixIksXbqUwsLm8zkuWb6L+55czVbnDKRw4rBJNj6+D1eSufUK+ya2eV5ju2cB1fYdaNJJSfA8+gUu46TQVBwyq0V5QzWCta94WPGsl0ObHLjzDUZ9u5bR19eR37t5x3CmzwUf0A7zVNc3eFb7v/q03xv3Pc9i15f8x72M99xfUKHV4JIOzgqdzLTgOKYFx1FstJRnM7Vkeh2WaYLlRfksiAT5wGWnUhM4pGR82FpgFtIZkAELzDK1HhtH+8gjivbJSIevz+d7BfgtkAP8NJny9/l83we+D+D3+8eGw41vfPfu3fU579vCtCIvp1nRPulQ+mCmdF6+fDmzZ89ul2xgpnSeOXMmfr+/UYRQjD//+c+sXbuWp59+usVyEpU/QMjfcqORSErFCtZrc9movUKN2INdZjHQuJShxnT6yQuwxXlGDmyAT56ysXyORqhaUHKqwVk/NBg93cDRhtu22+3oemrfo5Bq7HY7X+ulPGx7ls/EGpbq/6rfp6PzsVjNG9pHvKF9xA5hOuHGGcO41JjIpXIiw2TfNkVZHY18x0Id6rqOjuRTIfm3ZvC2kKzVzOevn4QLDY2LDI1zpMDdCV1Bptfj0cjndDohk5S/z+e7BLjY7/f/0OfznUszyj+BjM/qmc6UzgATJ07koYce4rzzzmuxrETl394IAYlBqfNztnsWsMP9JiHbYZxGHr2rL8G+4BZ2/OM0dn7oxuaUDL0kwKk31tJzTKRdGRgz1dqKp60ySiQb7LtY6F7GQvdSVjq3ANBX787U4Li0+QmO5TrcYxP100MfWaGk7rhQ0ikhnZJox8wPZXo9HlcpnX0+32+B6wEdcGPO+c/z+/3XtXBaxiv/TCFe+btd9nbHBsdjEGFb9VKWzQ2x55+TiO7qja3XHnrf9BkTrvLSO//kI7JuM/2BgyOXcZ92iHfcy1noXsrHrtWEhU6+kc35wdOYFhyXMj/B8VKHQeBTl60+7cROK5R0aFwo6WlpzEqa6fV4XCn/eI4nyz9T+PCLXdz7t9V87b2B535UfcSKf++XDr6c5WX96x6iYUHvs+o46eaPCV/xZ77OfgdDhPDqvegXuIz+gcso0Ie3uSPI9AcOUiNjjQg06ye4IDieqcHTjthPcDzWoQS2xoWSfu60oQtBniGZFNLrO4NUhpJmej2qfP6KNtOrMMpdl0W56KL9OBztW0GlB2HDG2ZytX0rnTi9BqO+XcepM2opGqwDI6D6acI11ex2/4dtngWszX6KNTlPkBcZVN8R5Eb7p+fmjjGypYdLgmdySfBMIugsda63poeW8V7+U/wMODU8iGnW9NBg/aS0+gkyHQEM1A0G6mF+UBumWsCHLnNx2XtuO294YqGkBlOCESaHdE6OGCqUNA0cVyt8TxS2b9/O6tWrueiii9qs/Cu/trFydhZfvZBFoNxG4cAIY79Ty4grA7haic0LauXsdL/FNs8CSp2fgZB0CZ9irSq+FK/RNEtopltbkF4ZY36C/7iX8o572RH5CU60OjSAtQ6NRVZH8KXDhhSCblGD86y3l00K6eS2U2Vlej0et9M+7UAp/zbSVuUvJez80IzN3/KO6aAedEGQU2fU0mdi+158HqNW28cOzxts9yygzLkSgOLQBPoFLqNv8BLchhnFlOkPHHSsjMn8BAXRHKaEzPUE54RGNfETnOh1eEgTLLZGBYvdZiip3QoljTmNB7YhlDTT61Epf6X820xryj9ULVjzchYrZmVRvtWBpzDK6GvrGH19HbklqcubX2XbznbPArZ7FlDh2ISQNnqEzqZ/4DJO9V5LdVnyZe+ZQmcphbb6CTJdaUHH1aEOrHDa6n0F661ULyfphvmWs6DOGSGdZG72TK9HpfyV8m8zzSn/gxvtfDnLy5pXPETqNHqcGmbMjbUMvSSAPY25eiWSCvsGazHZ69TYd2GTLkqCk+kfuIxewfOxJ30sO5dMUAqJfoJd9lLA9BN8UzuXieUjMtpP0Fl1uEcT9SknPnTZCVihpGfFhZL2skJJM+F3bgml/JXybzMrl+ziuftW03XrDDTpxJVrUDwiwq5PXdhckmGXBRhzYy09RnW85S2RlDm+ZF/hf1iPn4DtAHbDS+/gBfQLXEZJ6By0FLw0PhVkmlKI9xMsdC/lK+dWwPQTmCuMMy/vUCbUYRD4LC6UdIcVSjokEmVySOcKl5dBB8szpNU15ZhR/j6fT/j9/nT0GClR/qlYMt0c6crnD/DOO+/wu9/9DiklUkp+8pOfcPHFFyeVI1H5A+T20jn1hjpOuaaOrMLUJrE7EoqKijhQVkqp8zNzMZnnLcJaBS4jnz6B/6Jf4DKKw6ejdaIiywTF1RKhIsncuoXt8hN0NJlWhxLYZtPM9NRWKGlECHLjQknPC+l0zaCspMeS8v+D3+//8ZFcrBWOWvmn6gUJzZGufP5SSkaMGMG8efMYOnQo69at4/LLL2fDhg1oSd4dl0z5/8+uvWiZYxA2adBRwux1fcB2z+vscv8bXavDEy2mX+BS+gUupygyusOnNjJNcSUSL1+NCPC+60szhDTF6wlSJWMmUiPgq6JC5ofreM9tp9R6ccXocLQ+lPSUTg4lPWbi/NOk+Fvl/pdz69881RyfbXHWZ7yMEQhrXPXnLpw+sHnlP7xXhAenV7UqQzrz+QshqK6uBsw3eXXr1i2p4m+OTFL8ybDh5KTQVE4KTUUXAXa73mG7ZwEbvM+xLvsf5Oh96Bf4Bv0Cl1OgD+1scTOObOnh0uCZXBq3nsCcHlrGe/kr6tcTXBAcz7TguIz2E3Qk2RIukxpnVQaRlbDW3jAqeDzHxe9z3XSNCyU95whCSY9VWlX+Pp/vn36//7vWdwE84/f7v5d2yY6ARMXf2vajIZX5/IUQPPXUU3znO98hKyuL2tpann322ZTLnCnYpYd+wW/QL/gNwqKKne632e5ZwOrsv7Eq5y/kR4ZaHcFl5Eb7dra4GYcDO2eFT+as8Mn8quqmRn6CR3Kf55Hc5+v9BBcEx3NahvkJOgsBjNQNRtaEuaMmTLkmWOyy8a7LwUK3A3+WE7uUjIsLJR2UAVlJ00VbLP/6pZx+v1/6fL4BaZSnWdpimbf0goRXfnwopfKkMp+/ruv89a9/ZebMmYwbN45ly5Zx6623snjxYrxeb0rlzjScMpdBgasYFLiKgFbGTvebbPMs4MvcR/ky91GKwqdaHcE3yDK6d7a4GYdAMEzvw7CaPtxZM73ReoJZ3rf5e/YbGecnyBQKDckVAZ0rAjo68KXTVr/A7Nd5bn4N9LJCSScHdc4K63iOo1FBW5R/mc/n+x7wCXAGkFotmkI66gUJDz74INu3b2fWrFntmpoB0+KfP38+c+fOrU8JvXbtWkpLSxk3bhwA48aNIysri82bNzN69OhWy8wuzNw51/bgMYoYWncjQ+tupMa2hx3uN9jueY1leb9iWe6DdA+fTr/A5fQJXIxbdvz89rFAD6MLN9RdwA11F1At6ljsWslCa5XxK1mLM8JPkInYgXHhKOPCUf5fdYi9muB9653GL3scPOt14paSM0MNoaQndVBW0nTRFuU/AzPn/o+AjcANaZXoKDhrSJhnby1vFO2TKmdvjEceeYRVq1Yxe/bsdr/IZc6cOcyZMwe/39/oRS49evRg3759bNmyhYEDB7J582YOHjxInz7JX0tY2H0HU656lBln/wCH3UFt/jVU8fBR3VemkR0tYWTtLYysvYVK2xa2e15nm+c1Ps3/GZ/l/YKS0CT6BS6nd/ACHDK7s8XNSHJkVpv9BBcExzNI76X8BBY9Dcm1dRGurYsQAj53maOCd9123sv38AtgsBVKOiWoMy4czdhQ0uZoNdrHeg/vdKAr8CRwqt/vX5YGWTI+zj+d+fznzZvHE088gbByLvz0pz/lwgsvTFrWrk1LWL3oPmacvRWHw0XpwE8x7N1ScYspIx1RIBJJuX1t/ariWvsebNJNr+AU+gcupyQ4GTttX82W6ZEq6ZKvtfUE7fETZHodQupljIWSvuey85nLRlgIchJCSdvzMqmMDfX0+XwvAh8AN/j9/jN9Pt8iv99//pFcrBUyXvlnCrs2fcjqRfeayt/uoLbgWuryr0F3j+hs0epJt1KQGBx0fME2zwJ2eN4gaCvDYWTTO3gh/QKX0zM0sdXFZJmuuDpKvn3aIRa6l/GOe1m71xNkeh1CemWstbKSvmeNCvZboaSjwrFRQYRRrYSSZnKoZ1e/3/+Uz+fzHckFFOlDCieB7Ml4K14g+/BMwu5R1OVfQyD3cqTt+O40BRrdIuPoFhnH+KoH2O/8xFpM9n9szXoFV7SQvsFLrMVk4xEqKXCz9DC6MKPuQmbUXdisn2Bi6JR6P0E3o6D1Qk8QvBIuDOpcGNSRlbAuLpT0T9lO/pDjoosVSjrFCiXNk+YrZ8dar5wt6iTZ22L5vwAsAO4EHgWu8Pv916dBFmX5t5GdW1awZuFdfOvS0wmc9FtE9DBZlfPJqngBR2g9hvAQzL2UuvxvE/acxhGl7zxKOssijBJij3sx290L2O1eiK4FyIr2qA8d7RI5BYGgTivlk27/zZkH/kKWkVlTZjE626pO9BPsth8AGvsJzsgfzaGyjI0BATqvHsuF4AO3GUr6vttGhaZhs0JJP3PZsUuJDZhh2LjlYCXFR7DqON3TPm7ge8AwYANmnH/wSC7WCkr5t5Ht27aw9v1fctG3f4fN07Nhh5Q4gl+RVfECnqrX0IxaIs6B5mggbzqGvelL49NFZysugIioZbf7HbZ7XmOPazGGiJCr96Nf4DIqbdvY4XmTIbXXc0ZVZjrLM6EOY0gk6+07699jHPMTDJAlTKkdk9HrCTKhHqPACoeN96xRwVpHQz1pEhxIrqoNc2dNuF2dQLqV/2i/37/S5/N1A24EXvf7/RuO5GKtoJR/G2lLPn9h1OKuehNvxQs4A8uROAjmTKMu/9uEvGeDSO9DmgkPXDwhUcFOz9ts97zGPufHIMx2r0kHlx/4gNxo8siqziTT6jCevVoZ77iX837uShaLFUQsP8H5odO4IDiOc0KjyZJpTCXbDjKxHkt65jbZJqRkQjjKq4fq2lxOuuf8fw9MAR7EdPzOxIz3V2QwUvMSyL+KQP5V2EObyKp4EU/ly3iq30K3l1CXiCs51gAAIABJREFUfzWB/KuIOpq+het4xCXzGVx3DYPrruHDvDvZljUPKaIYIsL8bucwtPZ6BtddR4E+pLNFPSboaRQxo+5C7sq6ju2HdtX7CRa6l/Jy1vvKT9AOnBK0OMu/o2iL8s/y+XwuwOX3+1/0+Xy3pFsoRWrRXYOpKv4lVV3vwV2zkKyKF8kpe5ycsscJec+lLv8agjlTQTg7W9S0U6eVsiPrdaSIf6lNlA3e2azP/hfFoQkMrruOPoGL2xU2eiLT0nqCd/O/ANR6gmQ4pUSjYc6/PeGhqaAtyn8OpsP3l9b8//b0inTk/HV0MbUHm05neLtGuW1l6VGXn86UzosWLeJ3v/sduq6Tn5/PH/7wB3r37n3UMjdCcxHMvZRg7qXYwrvJqpxLVsVLFO75PlFbFwJ5082QUdfA1F43g/gq+49IGj9kAjv9675Jvj6Yjd7ZfFhwO0tz72dg4CqG1F6rXlbfDhLzDq2376x/j3FD3qEeXFC/nmAItgz0E6SbEeEop1nRPsMLu1DWCSmmj6uXufxvSc9m9/1sz95m97WVdKV0rqioYOLEiSxYsIABAwbw6quvMm/ePJ5//vmkchzJC9ybRUZx1X5AVsWLuKsXItAJecabo4HcS5HakeWBycR5VoAFRdM47FzbZHtBeASXlS1EYrDP+TEbvbPZ5f4PUuj0CJ3NkNrr6R2c1qEvosnUOoynPTLG/ATm+wnWdJifINPrMWMXeXUgLSr/RffncmBdyw/e7k+bT7dw0hmhZvd1Gx7h/AdbTxyXyFtvvcVzzz3H3LlzeeKJJ1izZg1/+tOfcDqdBINBbr/9dsaPH8/NN9/c5NxVq1Zx66238vHHH7Ny5Up+/OMf8/777wNmJzNy5EhWr15NYWHT3CspVf5xaPoBsipfIaviBezh7RhaDoHcy6nL/zYRzyntKivTHzhoXcY6rZTNWS+xKet5au178ES7MajuGgbXXUt2NP2+kuOhDpsjfj3Bu+4vqNRq0+YnyPR6zLhFXj6fr8Dv9x8+IolOAFKZ0rl///4cOHCAlStXMnr0aObPnw/Anj17kir/dGHYu1HT5YfUFN6KM/A5WRUvkFX5Mt6K2YRdI62Q0W8ibXkdJlNnkmUUM6rmDk6uuY09rvfZ6J3Nquw/szr7L5SEJjOk9jpKQpM79W1kxyqJfoLPnetZ6F7Kf9xLeTf/C4QUnBoZVJ9uQvkJUk9Lc/6/9fl8BcBmYCHwid/v1ztGrKa0xTJvadrn269kbkrn3NxcnnzySR544AFCoRDnnXceeXl52O3tftdOahCCcNbphLNOp7L4ITxVr5F1+AXyS39B3oGHCOT+l7WAbEKnLCDraDRsnBQ6n5NC51Nj+5pNWS+wOesl3u1yI169hMF132ZQ3TVkGcWdLeoxiQM7E8MnMzHBT7BQ+QnSSlvi/AcBU4EzMYcXnwPz/H7/1ymWJePn/GM8+OCDrF+/nlmzZrU7s+fMmTN58cUXmTt3bqPMnvEcPHiQCRMmsGbNGrKysprsT9e0T2s4AqutBWTz0YxqdGc/6vK/TV3edAx710bHZvpQG45ORoMIu93vsDFrNnvdSxDSTu/gNIbUXk+P8MSUpJM43uuwLaTCT5Dp9Zhx0z4x/H7/Zkzr/29Whs8JQA+gWeVvRQUtAVzWNV7x+/2//P/t3Xl81MX9x/HXd6/skWNDLg7lkqNQbwURgSqCeFC1iIMI1NuWX9UqioBV61HxptoWpfUGBB0RBc8KKvVGBbwAERBQQHJvNsnuZq/v74/dQAghJGE3+yWZ5+PBA0iyu+98A7Mz8535TEsCNocrL7Lf1T6JkoySzgBFRUXk5+cTjUa57777mDhxYoMNfyqFHEdR4bgXb8Ht2L2v4/QsJLPoHjKK7ieQMRKfezw1rlOTvoHMCExY6RY4m26Bs/Gat/CD63k2Ol5gm+NNMsLd6Vs9iV5+gV3Vyz8otfsJ6tcdqt1PYNdtDKk5mjMCA9R+gmZKyg3f+HGPLilllRDCCnwE/FlK+VkjDzP8Dt9klnS+6aab+OKLLwiFQgwbNow77rhj92vUl6qef0PMNZtweV7AUSExR0qJWDrhc48jrcdkSqqMXWc/0T3CMAF+crzFBuc8CtNWYtJtdPePpq9vEvnBAc2eszZ6jxVSl7H+fYLtluLd9wlq9xP0CndBQzP8dWyzq32EEE5ijf9kKeXKRr7U8I2/URip8d9ND2KvXI7Ts5C06tiqpRrX0NiS0fRRYGreKKk1JLNRKLds4AfnfDY5FxEyeXGH+tK3ehI9/WNI05t2w9zojRYYI2Nt3aHa+wTf7D6fIHafQKSNpHdxR8PeJzBs4y+EOFNK+bYQohdwA/CilPKDAz1xfIpoFdALmC2lnHaAh6jGv4kM2fjXYQ7tICf4Oux8Ckt4BxFzNv6ssfENZMYpn9AaDVdY87PFvpQNrnmU2NZgjtrp6T+fvr5J5ISOaXQ0YISG9UCMmDFV+wlaysiN/3Ip5QghxNPAE8A/pJQDmvoCQgg38ApwrZTyu3qfu5rYEZFIKU8IBveua/Hzzz/vPudW2WPbtm18++23TJgwwZCNP4DFYiEcqkHzvIdp1zOYSpei6SGiGYOIdryMaN5YMKd2WshisRAOt94CtkJtDV+bnmC96QVCWjUF0eM4JnoVv4qOw8a+16K187WE0TN6qWa55UuW6P/jbdNneLQq7LqN4fqJ/DY6hHOigykgtfdlDuYa2mw2SGLjv5LYMY63SSmvEkKskFKe2pwXEUL8FaiWUj7UyJepnn8TGb3nD/v2ZkzhUhwVi3B6FmINbiRqcsU3kI0nZD+2XZ05ENQq+dGxmA2ueZRb12ONpnOE/wL6VE+kQ7h/yvM1x6GUsTn3CVKRryUOpufflPVo9wJ/Ax6Mr+Jp7KYtAEKIvHiPHyGEAxhB7CwAJQHMv/yCedYs8ocMwbF4carjNEnUkkN1zh8o7vk+xd1eJZAxGkfFYvK2jiZvy0hcZU+hRdrHnkKbnsGvfJdwbvEyzi5eQtfAmWx0vsjS/JG8mXsemx2LCONPdcw2p3Y/wV3eK/isaA7LimZxY+U4wkS4N3M+p+Zfx9D8a7gr81lW2tYRIXGrBI2oKT3/2nn+Ji+UF0IcDTwHmIm9wUgp5V0HeJjq+TfRTx98wLe33cYlmzdjsdupeOAB/GPGpDrWXprSm9EilTi8S3B6FmILfIWupeHPOBufezxB58mgJffoRSP1Wmu0cjY5F7HBOQ+vdTO2qJuj9N/TteQCsiLGLbRnpGu4P03J2NB9gg6RTEbUnMAZSb5PYOQ5//OITftkAa8RW7Nf1pIXOwDV+DdR3cbfpuuEu3Sh6PPPUx1rL839B20JrMXpeQFnxcuYohWErd3xuS+KbSCzdjRExtago7PL9mm8sNxbRLUQHWsGxwvLnYkZY5XdNuI1rK+5GSs1H++nreEd++e8Z19Nhal6936CUYEBjAwMIC/qTlm+ulplqacQogMwBzgT+C/wqJTyo5a86H6oxr+J6jf+uqbxy/ZEb7g+OC3+Bx3146h8C6dnAWm+T9ExE0g/PbaBLH04aIkreWH0hsuRG2Fl4HF+cM6nyvIz9kguvX0X0cc3gYxIgst9t5DRryEcXMb93Sc4PtRnd92hg71PYNg5fyHEWUKI54BngWVAV2IrdIx58GlEx/W4n4IjPbjm+CGSuH0MZWVlTJo0iaFDhzJixAiuvPJKSkv3rhk0c+ZMunXrxqxZs/Z5/IwZMxgxYgQjR47k7LPP5sMPP9z9ueLiYsaPH8+QIUMYMWIEq1evbnowiwXLDz+0+PsyFJMDf9YYSrstovCID6nKmYzN/xU52y+jYNNAMoruwxzcmuqUrcJFAUdXXcMFRZ8wonQ+ecET+C79MV7OH8yyDpP4Ke0dohh3pU1bUP8+wTtFD3Nj5ThChPe6T3B35nOH3H2Cpkz73Aw8L6XcUe/jfaWUGxKY5aB7/uYfI2T/sRrLjxFMfog6INzTTPkcF5GeB7/B42Dq+QN4vV4yM2Nnd65du5Zx48bx7bffomkaU6ZMoWvXrlx//fV8/vnn3HDDDXz00UdoDayCqdvzt1os6FYrWjhM5XXXUXXNNWCAFUAJ7RHqIdKq3sfleZ60qvfQiFLjPAWf+2L8GWeCqWVzsUbvtTaUr9q0kx9cC9noXIDPvAtnpBN9qifQ23cRrmgnQ2Q0mmRl3GkqiR9o/wWf1LtPMCowkGE1xzTpPoFhp32EEN2AGUA6cAlwmZTyyZa82AE02vhn3u7Duq7xd1XbF2EI730ldAALBAfsf7og1N+M967m19E5mHr+n332GVdddRXffPMNmqbRu3dvVq5cubuE8/Dhw5k1axbHHnvsPo/96ZNP+PaWW5jo9xOYNo2aYcPIuu02HEuXEurXD8+sWYSObl79/URL1n84U+gXnBUSp+cFLKGfiJrc+LIuiG0gs/czRMZEaSxflDDb7e+ywTmPHWkr0DBxeGAkfX2T6FwzLCGF5Q42o1G0RsaDuU9g2MJuwFPANcBjUsqIEGI8kIzG/6DpTjDVq/ysAdEk1EdraT3/Bx98kFdeeYWKigqeeOIJNE2jrKwMXdf3qt3fpUsXdu7c2WDjH+nUiciUKRTVWedf/vjj+M8/n6wZM8gdPZqqP/6RyhtugDa2SS5q7URV7p+pyrkWm+9jnJ6FuDzzSC9/iqD9uNiZA5nnoad4A1mymbDQNTCKroFRVJq3xctML+Qnx9tkhLvRxzeBXr5xOKK5qY7aLmToTs4NnMK5gVP2uU+w3P0lmj6H40N9GBUYwBkN3CcoNJXxSPpLrLJt4J2SfaeMk6Epjb9ZSvm9EKL2763TpainKT1zx8s1ZM3wYare87GoCyr+5sR/QWJry7S0nv/UqVOZOnUqH330Effcc8/ug1sSITBqFDWDBpF5991kzJ6N48038Tz8MMGTTkrYaxiGZiLoGkrQNZSKcBlO72KcngW4d91MZuEd+DPPjW0gc5zQ5s8cyIh044TKGRxbOYWf7G+zwTWPVZkzWZPxIN38Z9PXN4mC4CB1GEorqX8+wTrL1t3TQzMz5zMzcz49wp0YFRjIgJpf8Zl5PXML3kYnSlBrvXs4TWn83xNCzAE6CyEeJXbT15ACI21k3eqHugd0mzUCIxO7PO6uu+5iy5YtPPvss5hMLXsvHDJkCJWVlXz//fccHZ+iKSsr293737FjR+2Qrln0rCwqHnoI/3nn4b75ZnLHjKH6kkvw3nILenrb7A3rlg5Ud7iS6uwrsAbW4PQsxFHxKq6KFwjZ+sRPIBtL1NKB3B/PwFaz5wzf2iscTPs1JT3fSc03kCBm0ugROI8egfPwWDbxg3Mem5wvscW5hKxQL/r6JnGEbyxpeuKWKSqN09D4dbgHv67qwQ1VYvd9gtfsn/Bv11LmpC+JNVcpeF8+YMslpbwb+BdwG7EpoMeSHaql9EyNXevd7NyRvfvXrvVu9MzEXdnaev5PP/10s+r567rOpk2bdv/966+/prS0lK5dY0v2Ro8ezdy5cwH4/PPPCQQCu98UWiI4dCjF775L1RVX4Jw7l7zhw0mLnxHcZmkaIcfxVHR6kMLea/B0egjdnE5W0Z0UbDqB7O1/JGrphF5vrbyOjaDjxBSFTg53uBcDvXcidq1iSPnfsemZfJ71V2THE/jQfT1F1lXoGOb87najczSXS31ngVbn2qdoQLbfG75CiJeACVLKYJ2P9QXmSSkHJiGL4df5H0w9/2g0ytixY/F4PJjNZux2O1OnTmXYsGFA7DCXa6+9lu3bt+NwOLj33nsZMKDh+nnNre1j/fJL3DfdhHXjRnxjx1Jxxx3o+zlFLFGMdCPQEvgeZ8VCnBWLMEU8+3S0dM1OYa9PiVryUxWxQYm+hmWWtWxwzWez42XCpmqyQ/3pWz2JI/xjsOotGxUa6ee8P0bMWGQq55H0l3jR9R5RdIJaaPfnduxsesmWpKz2EUJcAEwGxkopPUKIUcDdwO+llMmo02P4xt8oWlTYraaGjEcfJX32bKJuNxX33ENg9OikZTTifziiNdir3iaz8G4s4V8A0NGocZ1K2eHPgJb6JbJ1JesahrQqfnS8ygbXXMqsa7FEXfT0/46+1ZPICR9piIyJZOSMRaZy5uS9xnOmN4nG5/xT3vgDCCEGAw8S29E7GLhISulpyQs1gWr8m+hgqnpavvsO9003Yfv2W/xnn03F3/5GtCDxB48b+T+cKVRIweZBaHpw9yggYs7D5xb43BcRsfVMdUQg+ddQR6fE+hUbXPPYYl9CxBQgN3gcfasn0SNwLhb9wCvFjPxzrmX0jLm5uawr28gj6S/xpe37Zq32ScoOXyHE3cRKOewApgBrgClCiAMVaFMMLHzkkZS8/jreW27B/u675J92Go4XX4Qkn+hmJFFrAdVZF6Fjoto9idLDniPoOIH00jkUbB5KzraxOCpehmjbrqypoZEXOo4hnlmIwlUMrLiLkFbFx9lTkAUnsDLzdjyWjamO2S7kR7OZ6b261ZZ5QuOrfZbHf38XmN0KWZTWYrFQ9ac/4R81CvfUqWRPmYJjyRIq7r+fyOGHpzpdq6jKvR5n9Eeq8qYQteRTkzECU6gQZ8VLOD0Lyd55HVmmW/FnjaHaPZ6wvXnTIYeaNN1N/+or6Fd9OYW2lWxwzmODax7r05+ioOZk+vom0s1/FmaMdxyn0jJJP8O3GdS0TxMl9DCXaBTn3LlkzpwJuo73llvwXXIJtHAJay2jD7WhkYy6js33aWzJaOUbaHoNQfvR8Q1k56ObM1Obr5UETKVsckg2uOZTadlKWqTD7sJymZHuhsjYFEbPaNjCbkobZzLhu/RSit97j+DAgbhvvZWcMWMw11mW2u5oGkHXYDxd/smu3qvxFPwNTQ/h3jWDgo3H4d55PTbf521+qswezeHI6smMKfqQM0oX0jE4iLXp/2ZxwSm80+FittnfUoXlDmGq8VcAiBx2GGXz51P+979j3biR/DPOIP1f/4JQ6MAPbsN0sxtfh8so7rGM4u5v4s8ai73yLXK3/Y68H0/FVToHU9i4vcpE0DDRuWYYp5U/wYWFn3Oc9yY81h94v8OV/MfamzUZD1Ft2nHgJ1IMpU02/qbCYnIuuApTUWL/UyazpPOcOXMYOnQohx12GMuWpWgTtabhF4Ki998ncPrpZN57L7mjR2P57rvU5DESTSPkOIaKTvdT2HsN5Z1mETVnk1V0NwUbTyR7+9WkVa0A/dAp6dsSzmhHjqm6gbGFn3F66TPk6UfzdfojLCoYxLvZl7E97V2ih1BZ4/asTTb+6Y88iW3lGtL/vm9htYOhaRqTJ0/mww8/ZPny5XTr1o2ZM/cca/DQQw/x9ddf88knn/DRRx8xe/be98lnzJjB8uXLWbZsGQ8++CCTJ0+m9p7LoEGDmDt3LoMGDUpo5paI5udT/sQTlP3nP5gLC8k75xwy7r8fAoFURzME3eTE7x5HafdXKeq5guoOl2PzfUrOzxPI33Qy6cWzMIfadk/YhIXDa87ggvASLij6lKOqrqHYtoblOb/n5fzBfJP+D3ymolTHVBqRuGORkizz9oewrmv8wBLbZ6vR6szDps9dRPrcReiaRnDQ8ft9XKh/H7x33XTADNnZ2btr+QMcf/zxu0syzJ49m82bNzNv3jxsNhsLFizg2muv5Yknnthd0rm2lj/EbmbXrdXfUPXOVAuccw41gweTdeedZPzjH9jffBPPQw8R2s/O4/YonNYbb8HtePOnY698B6dnIRkls8gomUWN61R87vEEMkaCZqzjFxMpI3I4x1dO45jKG/jZ/g4bXPNYnXk/azIeplvgTPpWT6Jj8BRVWM5gDpnGvymCxx+FZdvPmMo8aFEd3aQR7eAm3C3xyxcTWdLZyPTsbDyPPBIrF33zzeT+7ndUX345ldOmobtcqY5nHJqNQOZoApmjMYe2x84j9rxAhx1XEzHn4M+6MHbmQJpxD2M/WGZsdA+MpntgNBXmzfzgep5NzhfZ6nidzHBP+lZP5Ajfhdj1Dgd+MiXp2txSz6zpM3HOXww2KwRDVE+6AO+9MxKZE4BbbrmFXbt28eSTT7aosudHH33EvffeyyuvvILNtqdXOHbsWP7whz8wcuTI/T42oUs9m0GrqiLz3ntxPfss4cMPx/PAAwTjtYnqM/ryOmiFjHqEtOr/4fQsxF75DhphahwDY6OBzNHopsbLlLeFaxgmwDbHG2xwzqMo7QtMeho9/KPpWz2JvNCJrTIaMPp1VEs9E8RUUkb178dS/NpzVP9+LObi0gM/qJlqSzo//vjjCSnpfKjQ09OpuOceShYvBouF3PHjybrxRrSKilRHMybNTE36cMoPe4LC3l9SkX8r5kgJ2b/cQMHG48n6ZTpW/zdtesmoBTtH+C/g7NJXOa9oOX184/nJ/g5v5p3P0ryRfO98lqDmPfATKQnX5hr/8icfwjtzOuFf98E7czrlTz6U0OdPVknnQ0nwpJMoWraMymuuwfnSS+Sfdhr2t99OdSxDi1ryqM6ZTFHPDyjptphAxigcFS+Rt/Us8raMwln2LFokWWWzjCE73I9BFfcgClcz2PMQJt3GZ+6/IAuO5+OsqZRYv0l1xHalzU37JFMySzo//vjjPPnkk5SVleFyuUhLS2PFihUNfv+pmvZpiPWbb3DfeCPWdevw//a3sUJxubmGH2pD6qcDtEgFDu+rOD0LsQW+Rdfs+DPOxue+mKBzELl5eW3+GpZYv2aDcz5bHK8QNvnJCR5DX98kevjPw6on5vzVVP+cD8SwB7i3IsM3/kZhpMYfgFCI9NmzyXj0UXSnk4q77sJ19dWUlCZ+yi2RjNQoWP3f4qxYiKPiFUxRL2FbD+h8JSXWsw13zkBdibqGQc3LZsdiNrjm4bF+jzWawRG+sfT1TSQ7/CtDZEwWNeevHLqsVqquv57i//6XcM+eZF93HZbzz8e0o22vdU+kkOMoKjrOpLD3aso7P0rEUoBl618o2DiA7O1Xklb1bpveQGbTM+nnu5TzipdzVsmrHB4YyQ+uBSzJP503c37HZsdiwqh9JomUlJ6/EOJwYC7QEYgC/5FSPnqAh6mefxMZrudfVySC65lnyLz/fnSTCe9f/oJv4sSDLhSXDIbvETpLqdk6B6dHYo6UELF0xOe+CF/WRURsxqi+msxrGNDK2Ox8iQ2ueXgtW0iLZNPLP46+1RPIjDT9zAXD/5zbWM8/DNwopewHDAL+JITon6TXancMNFW3L7OZ6iuvJLRqFaFjj8U9YwY5QmD+8cdUJzv0OPtSmf8XCnt/QVmXJwil9Se95FHyN59Mh5/GY/cuhWhNqlMmjV3vwK+r/8Dvij5kVMmLdAqewjrXkywuGMp/c8ax1f46Udp37amDkZTGX0r5i5RydfzPlcB6oEtzn0fXdWM3dCmg6zqRyCEw/O/Zk9IXXsDz0ENY164lf+RIXHPmQFhVgWw2zUYg82zKus6jsNdKKnNvxFKzmQ47JlOw6QQyC+/AUrMh1SmTRkOjU3AIp5b/mwsLv+B47zS85q2s6PAHXioYyOqM+6kyb091zENO0m/4CiG6Ax8AR0opvfU+dzVwNYCU8oRgMLjXY6uqqigrK8PpTMxd/7bA4/Gwdu1aysvLmThxIhaLMTdpWywWwrUN/c6dWK69FtPrrxM98UQi//43+pGpPxxlr4wG1Gg+PYLmeQ/TrmcwlS5F00NEMwYR7XgZ0byxYG7ZgewJzZhEUSJs1d7ha/MT/Ki9hY5OT/1MjolcRQ/9TEyYU56xqQ4mX3yDqPFW+wgh0oH/AfdIKQ90KvE+c/4QewMoLCxs8WaqljCZTESj0VZ7vabSdZ2ysjK2bdtGdnY2w4cPT3Wk/dpnHlPXsS9dStZtt2Hyeqm69loqr70WbKmredNW5oJN4VIcFYtwehZiDW4kanLhzzwfn3s8IfuxkMQSIka4hlXmHWx0LuAH50L85kJc4c708U2gt288zmiBITI2ps0t9RRCWIHXgf9KKZtyMGWDjT/Eersff/wx9UcGyeJ0OvH5fK3yWi3hcDgYM2aMoTPu7x+0qayMzL/+FefixYT69sXz8MOEjjsuBQmN0XA1ptn5dB2r/0tcnoXYvUsx6X5Caf3wucfjyxqDbs5OfcYkihLiZ/syNjjns9P+PzTdTNfAKAZY/oSr+Gg0gy5ubFONvxBCA54DyqSU1zfxYftt/Fubkf5B74/RMx4oX9ry5binTcNUVET1VVdROXUqusPRigkP/WvYGC1SicO7JL6B7Ct0LS2+gWw8QefJoCWmITTqNfSat/CDcwEbnS9QYy4jI9ydvtUT6eUX2KM5qY63l7bW+A8BPgS+JbbUE+AWKeWbjTxMNf7NYPSMTcmneb1k3nMPrvnzCXfvjufBBwnWKZmdbG3hGjaFJbA2VmW04mVM0QrC1u7xJaMXErV2NETGZIlQQ1n+R3wZeYzCtM8w6Ta6+8+hr28S+cGBhigz3aYa/xZSjX8zGD1jc/LZPvkE99SpWLZupXrCBLy33oqemfxD0tvSNWySqB9H5Vs4PQtI832KjplA+un43OOpSR8OWvMXDxj9GsKejB7LD2xwzmeT8yVCJi/uUB/6+ibR03cBaXpWyvO1hBHX+StKkwUHD6Z4+XKq/vAHnAsXkn/aaaQtX57qWG2PyYE/awyl3RZReMSHVOVMxub/ipztl1GwaSAZRfdhDm5NdcqkcYf7cJL3LsYVruaU8llYdBcrs26LF5a7kRLrV+gYpjOcdKrxVwxBdzjw3n47JUuXEnW7ybnkEtzXXouprCzV0dqkiK0nlfkzKOz9OWWHPU3IfhTppbMp2HwKOdsEjopXIdo2yylYdAe9/eMYXfI6vy36L0f4x7LFsZTX887h9dyz2OCcT0irTnXMpFONv2IooeOOo/itt6icMgXHa6+R95vfYF8IjvyyAAAVBklEQVSypE3XvE8pzUogYxRlhz9HYa/P8ebdjDn0M9k7/0THjSeQuet2LIH1qU6ZNDnhIxlccT+icDWDPPcS1cJ86p6GLDieT7NmUGZZl+qISaMaf8V4bDYqb7yR4rfeItK1Kx3+7//IvvxyTLt2pTpZmxa1dqIq988UHfExJV0XUpM+DJdnHvlbRpC7ZTTO8ufRIlWpjpkUNj2DX/l+z7nFyzi7eCldA2exySlZmj+SN3LPZZPjJcL4Ux0zoVTjrxhWuF8/SpYsoeK227B/8AH5p52Gc8ECNQpINs1E0DWM8i6PU9hrFRUFd6DpPty7bqZg43Fk7bwJq39Vm/w5aGjkh05gqOcRxK4vGVBxB0Gtgo+yr0d2PJHPM++gwrzpwE90CFCrfRpwKK1gMKpE5zNv2YJ76lTSPv2UmiFD8DzwAJFu3Q7qOdvbNTwouo41sBpn+UIc3iWYdB+htL5oXa6kxHwmUYtxD2U/2Ouoo1No+4wNznlsc7xJVAvRsWYwfasn0jVwFmYObpe6WuqpGv9mMXrGpOSLRnE+/zyZf/sbRCJUTptG9eWXg9l84Me2VsYEMmo+LVKFw7sUp2cBtsAadM1GIONMqt3jCTqHJGwDWaIk8jr6TSVscr7IBud8qiw/YY/k0tt3EX18E8iItOxIVtX4q8a/WYyeMZn5TDt24J4+Hft77xE8/ng8Dz9MuE+fZj9Pe76GiZLn2EXNlsdwel/GFPEQth4e30AmiFo7pzoekJzrqBNlZ9oHbHDO42f7MnSidKk5lb6+SRwWOB0TTd8zodb5K0oTRbt0oWzuXMr/+U/MW7aQN2oU6Y88AiFV27216a4j8Xa8i129VlHW+TEi1m5kFj9IwaaT6PDz77FXvg162/u5aJjoUnMqw8ufYmzhZxxbOYVy63re63A5iwpO4qv0WVSbfkl1zEapxl85NGka/jFjKF6xgsCZZ5L54IPknXUW1m++SXWy9slkJ5B1HqXdXqTwiE+oyrkGa2AtHbZfQcHGAWQUzcQcbJsH+riinTm2agpjC1cyvOxpskP9+CpjFosKTuK97CvYnvY+OsarEqwaf+WQFs3Npfzxxyl7+mlMZWXkjh5NxsyZ4G9by/IOJRFbNyrzp1HYayWlhz1D0HEc6aVzKNg8lJxtY3FUvAzRtvfzMWGha2AUI8vmc0HRJxxZNZki25csz5nIy/mn8E36v/CbjDONpxp/pU0IjBpF0fvv4xOCjNmzyT/jDGwrV6Y6VvumWajJOIPyw5+JbyCbjjn0C9k7r4tvILsVS2BtqlMmRUakKydUzuDCwi/4TdnjpEcOY3XmvbxUcCIrsifzi+0TdHR8pkJesJyOz1TU6hnVDd8GHAo32oyeMZX5bB98gPvmm7H8/DPVl16Kd8YM9PR9T7ZS1/DgNf/MgSg236c4PQtxVL6JptcQtB+Dzz0ef+b56OaM1GdMEo9lEz/EC8sFTR4yQ0eQFs2m2LaKvtW/52TvzGY/p7rhqyh1BIcNo/i996i64gqczz1H3vDhpK1YkepYCsQ3kJ2Cp8u/2NV7FRUFd6PpQdy7plOw8TjcO2/A5vuiTW4gc4d7MdB7B2LXlwwpfwSL7qI47UvQdDa5Xmz13r9q/JU2SXc68d51FyWvvILucJAzYQLu669HKy/HsXgx+QMHYrXbyR84EMfiA50wqiSDbs6musPlFPdYRnH3N/BnjcFe+Qa5284n78dTcZXOwRQuTXXMhLPgoJf/QvKCx6LpsSWhOlG+Tn+kVXOoaZ8GGGWY2BijZzRUvkCAjEcfJf2xx9AdDrSaGrQ6R4JGHQ4qHngA/5gxKQy5L0Ndw/1IdEYtWo3d+zouzwJs/i/RsRLIOAOf+2JqXENBa/6GPiNeR5+pkJcLTiai1ez+mFm3c0Hhpzij+U1+HjXtoyiNsdupnDaN4jfeQAsE9mr4AUx+Pxn33ZeicEpdusmF3z2Oku5LKOr5PtUdLsPm+4ScnyeQv+lk0otnYQ7tSHXMg/Z1+iP7nB3Q2r1/1fgr7Ub4yCMhHG7wc+YdO0j/xz9IW7YM044dbXLO+VATTuuDt+CvFPZaRVmXxwmnHUFmycPkbzqJDj9NxO59A/TggZ/IgIpsq4hqe2ePakGKbF+2Wobmn9umKIewSOfOWHY00HM0m8m8//7df4263YT69SPUrx/h/v1jf+7bF1r5kHkFMKURyDyXQOa5mIM/4ax4EafnBTrsuJqIOQd/1oX43OMJp/VKddImO6/knd1/TtW0lGr8lXalcvp0sm6+GVOdTWC1c/6BkSOxfv89lrVrsa5fj3XdOpwvvIDJ5wNAN5kI9+xJuF8/QrVvCP37E+3cGbTUHwTeHkRsXanMm0pl7hTSqlfg9CzEVfYk6WVzqHEMxOceTyDzt+gm9SZ9IKrxV9qV2pu6Gffdh3nnTiKdO1M5ffrujwcHDCA4YMCeB0SjmLdt2/1mYFm3DuvXX+N47bU9X6JGCa1PM1OTfjo16adjChfhrFiE07OA7F9uIFp4O/7M8/G5LybkODrVSQ1LrfZpgBFXB9Rn9IxGzwcHl1GrrNxnlGD5/vuEjhLa+jVMOF3H5l+J07MAh/cNND1AMO1ITIddRbF5JLo5K9UJG5Sqqp6q568oLaBnZOx/lLBuHdb16xsfJfTvv+eNoU8fNUpIBE0j6BxE0DmIioK7cVS8gsuzAMvmP9NRs+PPPAef+2KCjpPUNB2q8VeUxDGZiPToQaRHDwLnnLP7ww2NEpwLFx5wlEBOTqq+k0Oebs7C1+FSfB0uJc/+M8Etj+HwvoKz4mXCth743Bfjy7qQqCUv1VFTRjX+ipJkLR0l6NnZ5PzqV2qUcJD09OOo6HQv3oLbsXtfw+lZSGbRPWQU3U8gYyQ+93hqXKe2aAPZoUw1/oqSCk0YJWRs2YK2enWTRglqxdGB6SYHfrfA7xZYajbFistVvISj8i0ilk743OPwZV1ExHZ4qqO2iqQ0/kKIp4HRQJGU8shkvIaitEV1RwnO2huB6l5CwoXTeuEtuA1v/jTslctwehaSXvIo6SWPUuMaGlsymj4KTGmpjpo0yer5Pwv8C5ibpOdXlPZjf6MErxfrhg2xewnxN4ZGRwnxkYIaJdSh2QhknkMg8xzMoR04PLUbyCYTMWfjzxqLz30x4bTmnxFtdElp/KWUHwghuifjuRVFidEzM9WKowSKWLtQlTeFqtw/k1b9IU7PAlxlz5Je9gRBxwlUuycQyByNbnKlOmpCJG2df7zxf72xaR8hxNXA1QBSyhOCQWPU6bBYLIT3UwPGKIye0ej5wPgZE5qvogJt7Vq0b76J/fruu9iv6mogNkqgd2+iRx2FfvTR6EcdhX7UUXDYYY2OEox+DeEgMwaLMBU9j3nXM2j+DejmDKJ5gmjHy9HTT0jICOpg8tlsNmjhOv+UNv71qE1ezWD0jEbPB8bPmPR8DY0S1q/H8tNPe77kAKMEo19DSFBGXcfm/xKnZwF271JMeoBQWr/4ktHfoZuzU5JPbfJSFKX5EnAvwTRgAGndurX9ewmaRtA5gKBzAFrBnTi8S3B6FpBVeBuZRX/Dn3E2Pvd4gs6TQTs0iiWrxl9RlL00+V7CV19hee01areitZd7Cbo5E1/2JHzZk7AEvsPlWYijYjFO7yuErd3xuS/ClyWIWgtSHbVRSZn2EUIsBE4FcoFC4K9SyqcO8DA17dMMRs9o9Hxg/IxGzweQa7Xi/fjj2JtB7RtDQzWO6uxJaO1RQqtcx6gfR+WbOD0LSfN9io6ZQPrpsQ1k6cNB238/u01N+0gpxyfjeRVFMZisLIIDBxIcOHDPxxoaJaxZg2Pp0j1f0tZGCSYH/qwL8GddgLlmc/zMAYmj6h0ilgJ8WQKf+yIitu6pTrqbmvZRFCWxGruX8P33e40SGryXkMJRQiJE0o6gMv8WKvOmYq96F6dnAemls8ko/Sc1zlOodk8gvfSf2GrW735M5/jvwbRfU9LznYafOMFU468oSqvQMzMPepSwe6RwKIwSNCuBjDMJZJyJKbQTZ4WMbSDb+X/o2NAxoRHd/eU6NoKOE1stnmr8FUVJnXYySohaO1OVez1VOddh832Eq+wZ7FX1eviaiaq861stk2r8FUUxnP2OErZu3XN4zvr1TRolMHhwCr6D/dBMBF3DCLqGkbXjBpzel9GIoGOjOmscUUt+q0VRjb+iKIcGk4lIz55EevZs9ighz4CjhMr86Tgrl4AeafVeP6jGX1GUQ9yBRglZW7cSXrXKcPcSotYCqrPG4fLMb/VeP6jGX1GUtqjOKCE9N5fy+Dr6Ft1L6N+faKdOSRklVOVejzP6Y6v3+kE1/oqitCOJvJeQiFFC1FpA+Jh3iaZgM59q/BVFad+aei9h3TqcCxZg8vuBeqOEuqeqJWmUkGiq8VcURWlAskcJjsWLybjvPsw7d5LfuTOV06fjHzOmtb491fgriqI0WYJGCVp5Oa7nnsMUCABg2bGDrJtvBmi1NwDV+CuKohyklo4S6jL5/WTcd59q/BVFUQ5pjYwSOvbvj9ZARWVzK1Y2PjROHVAURWkj9MxMIp07N/i5/X08GVTjryiK0soqp08nWu8GcNThoHL69FbLoKZ9FEVRWlntvH7tap+IWu2jKIrSPvjHjME/ZkzKTmxT0z6KoijtkGr8FUVR2iHV+CuKorRDqvFXFEVph1TjryiK0g6pxl9RFKUdUo2/oihKO6Qaf0VRlHZINf6KoijtUNJ2+AohzgQeBczAk1LK+5L1WoqiKErzJKXnL4QwA7OBs4D+wHghRP9kvJaiKIrSfMma9hkIbJJS/iilDAIvAOcl6bUURVGUZkrWtE8X4Oc6f98OnFT/i4QQVwNXA0gp6dyKtawPxEhZ9sfoGY2eD4yf0ej5QGVMhFTkS1bPv6Gj6/c5tkZK+R8p5YlSyhPjjzHELyHEqlRnONQzGj3foZDR6PlURsPka5FkNf7bgcPr/P0woPXOJ1MURVEalaxpny+A3kKIHsAO4CLg4iS9lqIoitJMSen5SynDwDXAf4H1sQ/Jtcl4rST5T6oDNIHRMxo9Hxg/o9HzgcqYCCnJp+kNnCCvKIqitG1qh6+iKEo7pBp/RVGUdkgd4F6HEOJwYC7QEYgC/5FSPpraVPuK76D+EtghpRyd6jz1CSHcwJPAkcSW+F4upfw0tan2EELcAFxJLNu3wGVSykCKMz0NjAaKpJRHxj/WAXgR6A5sBYSUstxgGR8EfgsEgc3ErqXHKPnqfO4m4EEgT0rZ+qel78nRYEYhxLXE7pOGgTeklDcnO4vq+e8tDNwopewHDAL+ZNCyFH8mdiPdqB4F3pZS/go4BgNlFUJ0Aa4DToz/5zMTW42Was8CZ9b72HTgXSllb+Dd+N9T6Vn2zbgMOFJKeTTwAzCjtUPV8Sz75qvt1I0EfmrtQA14lnoZhRCnEauAcLSU8tfAQ60RRDX+dUgpf5FSro7/uZJYo9Ultan2JoQ4DDiHWM/acIQQmcAw4CkAKWUwVT3BRlgAhxDCAjgxwB4UKeUHQFm9D58HPBf/83PA+a0aqp6GMkop34mv7gP4jNienpTYzzUE+DtwMw1sNG1t+8k4GbhPSlkT/5qi1siiGv/9EEJ0B44DVqY4Sn2PEPuHHE11kP3oCRQDzwgh1gghnhRCuFIdqpaUcgexntVPwC9AhZTyndSm2q8CKeUvEOuYAPkpznMglwNvpTpEXUKIc4lNj36d6iyN6AMMFUKsFEL8TwgxoDVeVDX+DRBCpAMvA9dLKb2pzlNLCFE7V7gq1VkaYQGOBx6XUh4HVJP66YrdhBDZxHrUPYDOgEsIMTG1qQ59Qoi/EJs2fT7VWWoJIZzAX4DbU53lACxANrGp5qmAFEK0uGxDU6nGvx4hhJVYw/+8lHJxqvPUcwpwrhBiK7FKqcOFEPNTG2kf24HtUsraEdMiYm8GRjEC2CKlLJZShoDFwOAUZ9qfQiFEJ4D4760yHdBcQohLiN3EnCClTPnUSh1HEHuT/zr+f+YwYLUQomNKU+1rO7BYSqlLKT8nNqrPTfaLqtU+dcTfbZ8C1kspZ6U6T31SyhnEb6gJIU4FbpJSGqrXKqXcJYT4WQjRV0q5ATgdWJfqXHX8BAyK9wr9xPJ9mdpI+7UUuAS4L/77ktTG2Vf80KZpwG+klL5U56lLSvktdabK4m8AJ6Zytc9+vAoMB1YIIfoANiDpGdUO3zqEEEOAD4kt/6udU79FSvlm6lI1rE7jb8SlnscSuyFtA34ktvwvZUsU6xNC3AmMIzZNsQa4svZmWwozLQROJdbjKwT+SqxRkEBXYm9aF0opG7qhmcqMM4A0oDT+ZZ9JKf9olHxSyqfqfH4rKW7893MN5wFPA8cSWzJ7k5TyvWRnUY2/oihKO6Tm/BVFUdoh1fgriqK0Q6rxVxRFaYdU468oitIOqcZfURSlHVLr/JU2RQjxG2LL50xABLhNSvmJEKICWA1YiZUh6AyMkFLeGn/cHcAKKeWKOs/lJFZOo0/8cf+RUj5HC8WrnQ434OZBpR1SPX+lzRBC5AJ3AudLKU8lVgjNH//0t1LK04AbidVGaoq/Av+LP9cQYMtBRnQDYw7yORQlIVTPX2lLzgbm19ZjildmXVPva76i6ZUnB0spp8WfSwc+ABBC/IPYhhwvMIFYAcARUspbhRCXxh+7gtjGnTJiJQbOA64GRgohVhDbsFXc/G9RURJDNf5KW9KZ2O5shBAXA/9HbMfpTXW+ZhiwoaUvEK+46JJSDosXhPsj+6/8mk2sltB44AJiB3V3NVpJDqV9UtM+SlvyC7E3AKSUC4CJ7CmQdZQQ4n1ibwj3AQFiZQlq2dkzRdSYI4jdO4BYTaBe7F0nvm41xnVSyiiwg9iUj6IYhur5K23Jm8AiIYSUUlaw97/v2jl/AIQQQeA4IURtB+h44IF6z/eJEGKClPL5eNG/U4jVKjoj/vkTiR1dWAF0in/sKOCb+J/rvymEiJ0cpigpp3r+SpsRn0O/E1gihHgPeIzYmcwNfW0psdLdHxAr5reogaJpdwK/ic/RfwwcES+56xdCfAhcDMwh1th3FkK8CeQ1EnEX0EEIsSh+Pq+ipIwq7KYoitIOqZ6/oihKO6Qaf0VRlHZINf6KoijtkGr8FUVR2iHV+CuKorRDqvFXFEVph1TjryiK0g79P/5pHXSXZbd8AAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEkCAYAAAAhJPoXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOy9eXxURbr//67Ta1aSkD0RkB1UUBHXcVd0xm1k9KijMy7jNqNevTpfBncHFR1/6tXrqDMu44LrUfG6jgLjLuqIjuAGAgICISGBkLWTXk79/jinO92drRO6kybU+/XKK93n1Kl6+iyfqnqq6jlCSolCoVAohi7aYBugUCgUitSihF6hUCiGOEroFQqFYoijhF6hUCiGOEroFQqFYoijhF6hUCiGOGkn9EKIm4QQqwbbjsFECLFWCHHdYNvRHfH2CSHeE0I8MkBlD1hZOxNCiMeFEIuSkM9hQggphKhMhl2K3rHP91k9pXEOlDGDiRAiD7gJOBoYBTQBHwPXSimXD55lQ4aZQDBdy7IrhrFSysNSYlEKsBs7T0kpbxqgIi8nDRt+iuSws1zYMmBX4AZgb+B4IBt4RwiRP5iGDQWklFullI1DrayuEEK4B6vsVCKlbJBS1g+2HYoUIaUctD/AAzwINAD19ufbgFVRaTRgLlALNAPPAVcAQXu/AN4APgdcUccswmq1O7spezgggRN6sO8j4Jao73+2jzkqatv7wB1R34+2y/UBG4HHgOFR+/cG/glstn/P58CxceWuBa6L+n6UfY6uSuCcuoC7gQ1AO7AJeC4uzWnAF0AbsMW2Jz/K/veArXaZ7wP79mLfe8Aj8d+B64FqO6/HgaxEr2sPv69PZWH15GTc3zn2vmzgXvs6tQL/AWZG5T3KTn8m8CbQAtyZ4Dk+HfjKPsdr7fRZcWkuAb6z89gMvBj1m+JtHtXN+Xgc616/EFgHNAKvAEVx6c6OKmsDcAtRz0Y4n37ke5mdXyvwNvBb297KqDTTgAX2da4F5gMjt+f57eKeeNT+TZuBbcCtdj43ADV2ubfGHfdr4DOs+7zOtmN8F9dfB16zf+OPwG/i8rncvtbNWPfgc0BZXJqjgK/t+2EZcKid91lRaUrs815Lh9fhkLh8DrePD+dzeHw+XZ6j3k5iKv+A/7EvzEnARKyHqJFYob/SPoG/AcbZ37cSJQhAEdbDeqf9/Vo7zYgeyh5tn6ADekgzB/gk6vuHtr232d8zsB6cY+3vR9g3w2W2rdOBd4EPAGGnOQzroZsMjLdvTn/cDbYWW0ixRKYZ+HWC5/RKrAfvMGCEbcMVUfvPBQJYwjgZmGLfqIX2/pOBU23bdsMS0a3EVlYR+6IetHjx3WZf34nAsfb3P/fluvbwUCdcFpaYPw0sBkrtvwwsgXnXPv5n9v1woX0tjox70DcAZ9lpdk3gHJ+D1XD5jX3MIVgP5byoNH+2f/+l9rneO+qaFwBrsJ6HsM2Obs7H41hC9SywO3AgljA/EZXmOCAEXG2XdZpt381x+cQLfW/5noTlRrvSzvd3WKIaEXqse6zZ/r0TgT2AF4AfAG9/n98u7okG4C+2HefZNrwJ3GFvO9ve9vO4Z+F4YAywF/AqsBJwx13/H7HEfixwu/2bx0XlczmWkO8KHIB1r70ftb8CSxcesc/HkVgNrYhAY92T3wEvAfvYZV2LpS+T7DTlWI2Nx+x8jsa6r9JX6IEsrFrpgrjtS4gV+o3RN6S97TniBAGrZgsCN2IJ2cweynYAbwH/BrQe0h1m55kLZNon/Srg3/b+o7GEIdx6fA+4PS6PEfaF2LOHcpZijRfECCnwR/sGProP5/Ve4B3siqWL/T8Bf+1DfhqWKJwZb1/cgxYvvsvi8vkbsZVmQte1m4e6r2U9ArzXxbVtA4bFbf8H8H/251H2tbu+j+d4LXBx3LZD7Lzy7XvfB/yxh9+5CrgpgevzOFYL0BO1bTawKer7h4ARd9zltg3uqHzihb63fD8Cno7L905ihf5xOvd2PFjC98uobQk/v93cE1/FbfsW+LqL5+zOHvIpsG0/KO76XxmVxolVcV3UQz572cdV2N9vte8JR1SaY4kV+nOwGg/OuLzeAe6xP9+CVdlG98SOJwGhH0wf/RisC744bvtH4Q9CiFysWuzTuDSfxGcmpXwXuAurq/6IlHJ+V4UKIRzAk1i1/EwppdmDjZ9gCfkhwMFYJ/lJYC8hxDCsFvy/pZQtdvrpwBVCiObwH1YtDVarFSFEkRDiASHEciHENjvNbsDIuLIvxLqwR0gpF/ZgYzyPYbWaVgkh/iaE+FXYryyEKAZ2wepGd4kQYlchxDwhxCohRCNWD2tYF/b1xldx3zdidU37dF23t6wemA64gY1x1+ss7GsVxb/jvvd0jouwztXdcfn+0z52LNb19tLDdegj30sp26O+x//+3bB6ldG8b9swZjvynUwPz6/NdODkuHOxxS47cp4TfX57YGnc92qs1m78tuLwFyHEnkKIl4UQa4QQTViNIOh8r0fuLyllEKvXEjkP9kyjt4UQ6+18wucgnM9k4HMpZSgqz/h7fTpWz21b3Lk6mI7zNBlLb6InI8Sf7y4ZzFk3wv4vtzONldAS8IOwuqhjhRBC2lVeVBo3Vld0KnCYlHJDT3lKKduFEIuxulp+4B0pZa0QYjlWi/AILL9kGA2r+zivi+yq7f+PY7XyZ2F1z31YLdn4Qb5PsFo5vxNCfBn/W3qw+SshxK5YvY3DsVqfNwsh9o9O1kMWr2P5Ky8B1mP97o+6sK83/PGm0TH4n/B1TUJZ3aFh9ZamJ5BfS/SXXs5xuNzLsVxD8WzAcpeF7UwGXf1+0cW2aBK5Bv3JNx4N63m4vYt9WyLGJPD89kKgC7u62qbZ5WViVbQfYbl6ws/nt3S+17u9v4QQI7BcRPOwXL11QCXWGIM77pj4PKLRgO+xXKfxtNr/RQL5dMlgtuhXYZ3Ag+K2Hxj+IKVsAKqw/F7R7E9nbsJqpR+ENfjzp+id9oV9FatWPERK+VN8Bt3wDpagHwH8K2rbyXY570SlXQLsJqVc1cVfs53mEOABKeWrUsqvsQbyRndR7tdYIjITeEgIEf+AdYuUsllK+bKU8r+w/H2TgEOllJuxhOaYro4TQgzHOj+3SynfllJ+h+XeKO4qfX/p43VNBn4sd100S4A8LD9x/LXq9d7o4RzXYFWQE7q5D9qwenltdHMderC5v3yLNfgXzSFYjYwftyPf7+j8/MZ/X4JVsa3u4lxEz/K5iR6e3xQwCWts4Fop5btSyu+x3GoJP2c207H861dIKT+WUq6gc2/yO2C6XZmFib/3l2DpQGMX56nKTvMtsF9cPj9LxMhBE3rb3fE34BYhxIlCiAlCiDuwBmyiuQvLHXKmEGKcEOIKYAZRNZkQ4lCsgaazpZSfARcAc8KtWCFEDlbLewLWQJQphCi1/zJ6MfUdrG76nnS00N7BGiQNENsFuwE4SQjxP3a3cIwQ4lghxKNR5awAzhRC7CGE2BOrh9HlAy2lDD+gvwAeE0L0er2EEP/PPle72a3O87BaST/YSf4MXCSEuF4IMclOd6kQohDLF18LXCCEGC+EOMC2z9dbuf2g1+uaRNYAE+3fWiiE8GBdw0XAfCHEyUKI0UKIaUKIy4QQF/SUWQLn+Frgv4QQ1wkhdrfv7V8KIf4OViVh//6bhBCX2Od6qhDi6jibDxJCjLBt3p5n9TbgV0KI2XZZOpaw3iWljG+t9oW7gNOEEJfb1/BcrAHoaOZiiepTQoh9bdfg4UKIe4UQo6H35zdFrMMac7vMfk6PxOqZ9fX+W2kfc5X9236JpQPRPIAl/g/az9zhWH57osp7GuuavyGEmCGEGCWE2E8IcbWdJ1izEouwGn6TbJtvJRESHfBIxR9WTfh3rC50A/AQXU+vvA2rSxSehncN0CQ7BlDWY9200Xn/3T5xw7DcLLKbv3N6sdFh27Y0alse1sDRoi7SH4wlIE1YXf7vgXuwB1CwKo3FWOK5FviDnf7xqDzWEjvYORbLf/g03cy+iEp7EdaIfiMd0zdPiktzJpZPsx2r+/wGkGfvO9Te14ZVKf2KuIHBLux7jy6mPMaVeR2wNtHr2sPv609ZBVjd64boa451/91u3yd+rO77W1jjItAxGPezfpzjX2I1AlrtdF8BN0TtF1junRV22TXAC1H797HL8JHA9Mq4bWcBMm7b2Vj3oh/L134rCUyvTCDfy+38fFj38dl0nl65B9bUzHo73SqsZ72ABJ7fBHSkq3sg5pmyt72FtQgt/P0ULKFuw5paeyjWcx2+P7q7/vHPQ9jN6cNyBYUHWg+LSnMU8A3WM7csKs2votIMxxLzjVHX6WVgr6g0R2L19tvt/I4ggcHY8JS/HQohxD+AqVLKaYNtiyJ5qOuq2FkQQhyCNSA+RVou3JSS9iEQhBDlWP7wd7G6xydgLcq4dDDtUmwf6roqdiaEEL/H6ilXYY2D/Q/w2UCIPJD+LXohRAnwPNaAjher23SflPLhQTVskBBCfEv3Ux2fklJePJD29Bd1XRWJIIQ4E8uN0x2TZeITKwYNIcTtWCtxS7BchAuBP0kpt/R4YLLKT3ehV8QihBiJtQS/KxqlNbNGoRgS2BMpeloTsVbGzitXdIESeoVCoRji7CzRKxUKhWKnRQm9QqFQDHGU0CsUCsUQRwm9QqFQDHHSah69ruv/wAq7udkwjN17STsCeAJrlaoDmG0Yxpupt1KhUCh2LNKtRf841tLgRLgOMAzD2AvrbT4PpMoohUKh2JFJqxa9YRgf6Lo+KnqbrutjgPuxgvm0AhcYhrEcK75Drp1sGNaKM4VCoVDEkW4t+q54CLjMMIxpWG9cCrfcbwLO0nV9A1bAqssGxzyFQqFIb9Ja6HVdz8aKT/+CrutfYS2FLrN3nwE8bhhGJVYY33m6rqf171EoFIrBIK1cN12gAdsMw9izi32/w/bnG4bxia7rXqAQ6+XdCoVCobBJ6xawYRiNwBpd108F0HVd6Lo+1d79E1ZsZnRdn4QVGKt2UAxVKBSKNCatYt3ouv4s1ktCCrFexHAj1puAHsRy2biA5wzDmKPr+mTgYSAba2B2lmEYyXrZskKhUAwZ0kroFQqFQpF80tp1o1AoFIrtJ50GY1XXQqFQKPqO6C1BOgk9VVXpseapsLCQurq6wTajR9LdxnS3D9LfxnS3D5SNyWB77CsvL08oXcqEXtf1POARYHes1vp5hmF8kqryFAqFQtE1qfTR3wu8ZRjGRGAq8H2yC8iYP5/iffelrLKS4n33JWP+/GQXoVAoFDs8KWnR67qeCxwCnANgGIYf8CezjIz58xk2axaazweAc+NGhs2aBYBv5sxkFqVQKBQ7NCmZXqnr+p5YMWq+w2rNfwFcbhhGS1y6C4ELAQzDmOb3J14XuMaNQ/zU+eXvcsQIAitX9t94wOl0Egym9/uG093GdLcP0t/GdLcPlI3JYHvsc7vdkMBgbKqEfh/gU+AgwzA+03X9XqDRMIzrezhMxg/GSilpaWmhKxvdn3Tv7vcfcEC/7A6TrjeGlBKv14vb7R7SA0wDRbrbmO72gbIxGSRhMHbQZt1sADYYhvGZ/f1FYHZfM2lpacHj8eByuTrvnDGj2+M8fS1oB0FKSVNTE01NTRQWFg62OQqFYgchJYOxhmFUA+t1XZ9gbzoSy43TJ6SUXYv8TooQgtzcXOrr61m1atVgm6NQKHYQUjnr5jLgaV3XlwF7AnNTWNZOhcPhYMmSJYNthkKh2EFI2Tx6wzC+AvZJVf47O+k4hqBQKNKTIRfrJpVz67du3cpvfvMbDj74YI466ijOP/98tmzZEpNm7ty5jBw5krvvvjtmu2maXHDBBZFjTz/9dNauXRvZf95553HUUUcxY8YMTj75ZL755puk2a1QKHZuhpTQh+fWOzduREgZmVufLLEXQvD73/+eDz/8kEWLFjFy5Ejmzu3wSN15550sXbqUxYsX89FHH3H//ffHHH/qqafy/vvvs2jRIo455hhm2fP+Ae655x4WLVrEggULuPjii7nqqquSYrNCoVCkVaybnsi94QZc3/U8nuv+4gtE3Fx8zecj76qryHzmmW6PC0yeTOOcOb3akJ+fz4EHHhj5vvfee/Pkk08CcP/997N69WrmzZuH2+3mmWee4bLLLuPhhx/mggsuQNM0ZkTNFJo2bRqPPPJIx+/LzY18bmxsRNOGVB2sUCgGkR1G6BOiuwVXfViIlSimafLkk09GxPuSSy6J2e/1enn44Ye7Pf6xxx7j6KOPjtn2xz/+kffffx8pJU8//XTSbVYoFDsnO4zQJ9LiLt53X5wbN3baHqqoYMuLLybVnuuuu46srCzOPffcPh/74IMPsnLlSl544YWY7XfeeScAL774Irfccgvz5s1Liq0KhWLnZkj5B5pmz8bMyIjZZmZk0DS7z2u1emTOnDmsWbOGBx98sM8ulscee4yXX36ZefPmkRFna5hTTjmFxYsXs3Xr1mSYq1AodnKGlND7Zs6k4Y47CFZUIIUgWFFBwx13JDXI2e23386yZcv4xz/+gcfTtzW4Tz31FE899RTPPvss+fn5ke0tLS1sjOqJLFiwgLy8vJg0CoVC0V92GNdNovhmzkxZ9MoVK1Zw3333MXr0aE488UQARowYwaOPPtrrsc3NzcyePZvKykpOP/10ADweD6+//jqtra1cdNFF+Hw+NE0jLy+Pxx9/HCF6DWGhUCgUvTLkhD6VTJgwIabl3Reys7PZsGFDl/uKiop4/fXXt8c0hUKh6JYh5bpRKBQKRWeU0CsUCsUQRwm9QqFQDHGU0CsUCsUQRwm9QqFQDHGU0CsUCsUQRwm9QqFQDHGGlNC/UjiDx8srOv29Utj9+2X7Qirj0Ye5++67qaioYPny5UmxWaFQKIaU0Bf7p6FJd8w2Tbop9ifnRVepjEcP8PXXX/Pll19SUVGRFHsVCoUCdqCVsZ/l3sBWV8/x6EP4MQnEbDMJssX1Df8cfkq3xxUEJrNf4+DGo29vb+eaa67h/vvv59RTT+3VFoVCoUiUHUboE8GBm4xQMT7HZhASpCAjVIQDd+8H95Fkx6O/8847+dWvfsWIESOSbqtCodi52WGEPpEWN0CrVsNLJQcQoh0HHk6oe4tMszjp9iQzHv2SJUv46quvuOaaa5JtpkKhUAwtHz1AplnC2JbTQArGtpyWEpFPdjz6Tz/9lNWrV7P//vuz3377sWnTJs4880zef//9pNuuUCh2PnaYFn1fmNp8BdtcPzC1+Yqk5x2ORz9v3rx+x6M3DCMm1vyll17KpZdeGvm+33778cQTTzBx4sSk2a1QKHZehqTQZ5ol/HzLS0nPN1Xx6BUKhSKVDEmhTxWpikcfz2effdavMhQKhaIrhpyPXqFQKBSxKKFXKBSKIU7KXDe6rq8FmoAQEDQMIznLUxUKhULRJ1Ltoz/cMIy6FJehUCgUih5QrhuFQqEY4qSyRS+BBbquS+DvhmE8FJ9A1/ULgQsBDMOgsLAwZr/P50uheTsumqahaVqn85VOOJ3OtLYP0t/GdLcPlI3JYCDsS6XQH2QYRpWu68XAQl3XlxuG8UF0Alv8wxWArKuL9fIEg8F+FVyjCf6Qn8GD9T6KTdmvPLpi69atXH755axduxaPx8OoUaP4y1/+wvDhwyNp5s6dy9///ncuv/xyrrzyysh20zS56KKLWL58OR6Ph8LCQm6//XZGjRoFWIukPB5PZBHWtddey2GHHdalHaZpYpom8ecrnSgsLExr+yD9bUx3+0DZmAy2x77y8vKE0qXMdWMYRpX9fzPwMrBvqsqK555sN5+5HdyTndxgZqkOU/zQQw+xcOFCFi5c2K3IKxSKHZ/CGWeQe/VtaDW1A1JeSlr0uq5nAZphGE325xlAYlHJuuGGXA/fuRy9pvMDX7odSCGYl+XmG1fvsSsnB0LMaWzvNe9UhilWKBQ7D+5vf8C1cg1Zz7+GefapaBefhVlSlLLyUuW6KQFe1nU9XMYzhmG8laKyYtjg6OikSPv76JCZ9HKSHaYYiMS7mT59OrNnz2bYsGFJtloRpkYTnOYMcJ8mkureUyi6Q7T60DZtxrGpxvrut96doT3yDCVPvEDLaSfQfMX5KRH8lAi9YRg/AlOTmWciLe4aTXBASTZSCACkEDQ44IG65PrqIblhigHmz59PRUUF7e3t3HjjjVx33XXcd999yTRZEcU92W4WC8k92W7mJnBvKRTdIiWioQnHphoc1bXW/02b0ao347CF3bFpM1pDU5eHC9OE9nay5r2E64cf2fJS9w3E/jKkYt3ck+0mXs5Ne3syH+ZwmOLHH3+832GKn3/++UiYYiDy+kCPx8PZZ5/drwpE0TvtwGKPg2ey3JgCnslyMyEYosSEDCnJkF3/90g1F3mnxDTRttRHBNtqkUcJeHUt2qYaNF9bzGFSCMzi4YRKiwmO2oX2A/bBLCsmVFpMqKyYwlMv6kjrdoMmaDntRJqvOD8lP2NICf0Xbid+uzUfxi8ES9xOrEd8+0lFmOLW1laCwSC5ublIKXnllVfYbbfdkmLvzkwAWOHUWOZ2sNTlYJnLwfcujUDUPRIQgmvyMhPKzysl3u4qA7Or7bHbuj02Kq0bEL1aokgKgQCOzXVomzpa4dHi7di0GUdNLSIQO/tPOh0RwQ7sNp7QUT8jVFZCyBZys7yYUHEhuFw9Fi/dLtA0zLN1ai8+C7M4dVMsh5TQL6hrSWn+qQpTXFtbywUXXIBpmoRCIcaNGxczm0fRO0FgpVNjmcvBUrcl6t+5NNptUc81JVMCIc5s8fN0ljtG7D1S8viWVjIk+AS0CYFPgC/8XxMdn7v43ypgi1OLfA8f3y76LtlalPhnaX7cRVndVgo9VxxdVzbh/0Pqwe8KX1vEjaI1t5K98seIfzyyffMWhIz1AZheD6Yt2v799rbEu6zYao3b283h+dDHnnw8/t3G499nKs1XnE/B5ImYKZ7+OeSvdzJJVZjikSNHsmDBgu0xbaciBKx2anYr3Wqxf+N00KZZwpptSvYIhDinxc9Uf4gpgRCjQhIBXJ3r6dRilsBbXmfSffUhoK2byqHz9s5ppNdLfTAQ+d6gCarj0rYKCPWjQnF1UQF4I5VH4hVIiTAJuB1dpvGmwt0lJaKp2WptV2/uEO+YVvlmtG0NMYflAuawnEirOzBpbETQw63zUFkJclgO9ON89pW6Bc/GfG+u0fj4nmyqvnBz7oLki74SekVaYwI/OjS+dmsR98vXLgettqhnmpLdAyHOavUzNRBiit9kdMjsVmAGwr0XxgFkSciKtBr7NiGg0JVNXf22XtMFoIveRmzF0amX0uV263OdpnWZVnYpgEEozOrWtp7cXZ22myaFdfWUbKimaEM1BZs2k7+xhmFVNWRX1ZC1aTMZVTU4W1o7lRMqLCBUVkxwl3L8++4ZJd7F5E6awBavC5mVmIsuVUgJZgBCAUHIb/1v2uRg4TwHy+aXICWE/KmpZJTQK9IGCaxzCEvQbb/61y4Hzbaoe6Vkt0CI01v9TAmYTA2EGBM06X11RQfR7r10XzGZKC7AJSG3nxVKIkisajC+UvDk57GpoSGmwuiqt9IeCuLdvIWsqhqGbdpM3sZqCqpqGF5VQ/GGGko3VlO2sQaPPeUwTNDhoKq8mDUVJWyYOoENxx3KhspSNlaUsKG8lE0lpWzOL8KFh0y/JLMdMtshw2/9edshp96JozmAxy9w+yVuv8Dll7jbBS4/uALg9IOzXeDwgzMAmt/+Cwg0P4iAQAYsIY6IdcD+7BeEAmAGOwQ8ki4oMO1tZrBrEZdIRIpHZpTQKwYFa42DiLTSl7otUW+wRd0tJbsFTH7lCzDVH2KPQIjxQTPpN2yqu8w7ClKCDNEhXoFo0bJFLPw5IHAEICMgyPI6kNUmzrpa3HU1uLfU4K6vwVNfg7ehBm9DNRnNNXhbatFk7HqWoOamxVtGk6eUJvc0lhWV0uAoY5uzjG2UUy9KaDCLCQadhNaCudISTRkQlASgJNQXccyI+RYCfPZfV4TckpAbTJf12XSD6QbplkgXVu3qlggPiCyJcEs0l8ThlmgucDglTjc4XBKXS+JxSVxOcLskbpek9vEM2tY5QIqUizwooVcMABKo0kRMK32pS6PeXtzmkpJJAZMTfAGmBEJM9VuintwAFhavFM6g3v0toU2ltFxxPb7HzgVTgD+5pUkJZtAWzm4EM0ZUo9MFLDHN8Gg0bM2MbSkGwPQLQsEO0Q2FW5rBjtZkOI/I/7htkVZnsMM2ZGfB8dBMHlUMo5o8NsV9tr5ns7XTcT5yaKCUWsrYxiQaHWU0OUtpdJXR7LbE3e/JQ3ODwwUOt8ThkmjO8GfIdEly3CEczhCavc3hsoXUFlWHC0tgw/vC25wShy20+YW5tPgaYraZLknADQG3den9Hmh3S9rcgjaXoE3remwl2t3V1s3YSvT4SdfuLkHmRe1Mv9nF5MdciBA4U+SyCaOEXpF0qjXBJ8Lk4xwPS13WTJg6W9QdUjIxaHJsW9AS9UCIiQGTvk1U7T+5PxzDmhvn0P7a8WBqEOp4BBZdnxsrpoFuxDEQK8jxgmkJeLIe3LxOWzSnjBEtLSyUTmyh6xBApwccOWaMQFotTpNMWU92ezXZ7ZvIaqshq7WarJZqMprtVnhjDa625k7lB3Ly8ReWECgsxqycyqa8PGtaYWkxoYpizIpitPwsNKck2w25XfrWAsDAxHkpLJTU1fkTSyyx4qgkgXBWXY6bOOHvd7l56joXe9/iZrfHXDhCIJSPXpGO1GqCZfbsl/C0xhqHBgTRnG7GB02OaA/as19MJgVCcZ3o1BDER4O5gfU/NFD1NdR9lcu2r8rxffX/0d1ckK9elmiukC2UAqdL4HRpOF1OHE4Nh1vizjK7bUXGtjbDrcrodFaXvrv0Vrkd2wpL8mlo3mofQ0TARW9TWUIhtNqtOKqjVmXas1Si54qLttjBZ6lp1iKfsmJCE0fSXjYdX9SMlFBZMaGSIvB2VMuFhYU019Wh2We1Q1BUWAkBeACPhDwpiT4nNZpgscdJexm8f7+fz68PcMAcFwd/oHFT9UYAACAASURBVIReMchs1YQl6FF+9U12S11Iydigyc/ag0wNmBySmU1F3VYyU/i8+0UTTY51NDnXso2f2LwyQN3SXLZ9VYHvi0kEl+4H7V7Lvvx6MvdeTsnvl9O8uoCWD6bZLhtvJL+xNfvQ4qgmoDV2KsthDiMzVEpmqIysUCmZZpn9vZSsUBmZZhkeMz+p/tb8QgjVxcVp8gdw1NT2uErTUVOHCIViDpNulzUTpbQI/9TdMI8tjppaaM8PLx4OTiUJA0H8Kv7WUsmH97dT2eKHzrffdjOkr2qyB9pSGY++ra2Nm266iQ8//BCv18u0adO44447ttvm/rJNYLXUo/zq650dTcnRwRD7t4eYEvAzJRBi90CI7Kg7tzBDo247RV4iadfqaXKspdG5libHWpqca2kQP1G/2kHzf8YSWLIPwSX7EPjPWeCzps85clrJ3bOKogu+p3yKycjdcygekYUmRgIjadVqMOREGm+Zhe+xc9FCXky/xi9r3wUgIFpo1TbR6qimxWH9b3VU02Jvq3d9h0+rBRH7AzXpsSoBu0LINO1KIKqCyDBL0Lp57ESrD62qJtLy1hqbGbZ6bdRc8c046jr7w83MDHtRTwn+g6bHzAsPlVsuFbMgb7sX+SiSx0BO84UhKvRhgf/m+aykzk0Nx6MPhyq++eabmTt3LnfddRcQG4/+sssuw+PxxES1PPXUUznqqKPQNI3HHnuMWbNmYRgGALfeeisej4ePPvoIIQS1tQPjvwRoFPB1eJDUdr+sjRL1UUGTvQIhzm6xRH2PQIjcJLXUJRKfVhMj5OHPjc51+EUjodVjLDH/fDrmkpn4v9wTs9kW9cwAhXs0U3FWO+VT2imd4qdgdAihZQJdz5vONEuYkHsYK/76X0y98gfab76VjUs6BmNdMothobEMC43t1m6TAD5tc6QiiFQIdmVQ5/4PLY5NmMIPErzbBDnVDnLWOxi+IY/89dnkrfeQs1EjsyqId2Mbroa2TuVoecMi88EDUyZZgh7nTpE52QOyyEeRPAZ6mu8OI/SLbshl83c9x44I+aFhg4OWzfboT9QsgmdOGd7NUVA8OcBRc3rvL6UqHn1LSwsvvvgiS5YsQdgPbFFRamJTtwj4xuWIcr9o/OjsGC3bJWiyRyDEGa1+ptjTGvO3U9RNgrQ4qiwRD4u5Yy1NznU0OdYS0iyBkxLMtaNx/vsYzM9Px//FVFr+M5pAg+XVd3hMiicHKTslQOmUekqnBhg+LojWl4n0NlObr6Al80emZ51L5ty+95U1XGSZFWQFy9DqtnYsq49bqSmqN+HcVIvmix3hk6KF1pJWmipDVI3303R4iKZKk8ZK67+vLAOtohyHs9h2DUX1FEKlZJnlSXcVKYYuO4zQJ8KWH1y0NwkGIixUMuPRr127lvz8fO6++24WL15MVlYWs2bNYt99t++lXD4B3zjD7hdr9ssqpxaZ8lUWMpnqD3FKa8BaVRowKehnOOcQ7TQ510da5QFHDZsLltPkXEOzYwOm6FgI45BesgMj8K6bhnvJZbR/MYXm/4xm63+KaNtqVeaaS1I8KcCoEwKUTt1G6RQ/hROCOHqu63ulcMYZ+KdNQbvifE4v+Bd1Zg8tqXDQq6rYGCkxIWi7C3pVUmQtsd9tMv6jYpfZxwe98opWTG0TwrEJp6Maj6OaVscmghlbqQ+uo961HJ+2uRdXUce4QdhdlBUqI8MsRmM7T5pih2eHEfpEWtzNmzUW35PN189nIc1Yl82vX9ySVHuSGY8+FAqxbt06dt99d66//nq+/PJLzjnnHD7++GNycnI6HR8C5mkme0e9NKMN+C5q9svXLgcrnBqmLerFIWsl6Um+AFMCJlMCIYr6KOoB0RoR8ibHOhqda+zW+TpaHBtjhMgtc8jWRlIQ2I2RvuNwbZpM+5I9aP7Prmz5qoCNy9yRnpdwSArHBxk3w0/ZlBZKpwYomhjA6e3Okv7T8WafVzF/+XM8Rx6E1tZutcarY6MYarVbew56te9ehMpt8Y6KmWIWFvTJH+6SmQwLjWFYaEzM9kJXR5e+w1VkVQIRd5FWbbuKvqLF8RamiPPvSkGGWRTVE4gbRLYrCJfsPoyBYsdnhxH6RMguNpkxt5EDr2juVvCTQbLj0VdWVuJ0OvnlL38JWC6hgoICfvzxR6ZO7fz+lkZNsB7J7/MzGB00Weq2RD1oi/pwW9SPbQsyxQ7qVZqgqLeLbRHxbnKusf3l1swWn2NzTFpPqIDc0ChK/PuSExpFbnAkOcFRuGrHYq4Zyw8ftlG9zMV3S100Vdm3mpAMHxdk10PaKZ0aoHSKn+LdgrgyBmA6nj2dMPxmH8fzrzD8+Vciu83c7IjfOzB5XCdfeKi0GJmXOyj+8IiryKywpqB3gUTSLupjKwJ73KDFUU2Tcx01jk/xaw2djnWZuXbvIH4Q2R5INsvwmAXKVbSDMqSEPky84EcPtG0vqYhHX1BQwIEHHsgHH3zAoYceyurVq6mrq4vMyImnWQikgE89Tr53muwVMDmiLRzUK0S5Kbt9HK3Bz1rbPx4r5E3OtbRrsUG0MkOl5ARHUdl2BDkhS8hzQ7uSExyJW+bS3iioXuaiepmbr5e6qF7mouGn8G3lJn/XIJX7+imd0kLZ1ADFuwfwZA/gHGt/AM+Hn5Hx6gK8b7/XZRIpBP59prDl//4xcHalAIHAKwvwBgsoCHb/PoOAaI0MGsf2DKxt21wraNVqunQVZYZKonoCpRRrY5HeHLuCKCMzpFxF6YiQMm0WNsiqqqqYDU1NTV26LgaLFStWcMQRRzB69Gi8Xsuv0Jd49BMnTqSysjLym8Lx6AHWrVvHVVddRX19PU6nkz/96U8cccQRXeb1wU9ruXbFt6w7eybTeIJz/P/gpLqOMMcSkxbHpk5C3uhcQ5NjHUGtI/qfkBpZoUpyg6M6CXlOaCRO2bG8yd8iqPnGRbUt6NVL3Wz9saOtMGyXIKVTA5RNDTDh4Ay8I+rwDhuE+ysYxL34CzJefZuMf76Ltq0Rc1gOvp8fQdZzHS34+Df7pPLFD/1hMIOumQRjZhW1RnoH1TEzjbp3FYV7AnGDyHaFMJCuonQPXrc99pWXl0MCg5JK6HdAPvhpHdeu+IYfzz4Dp/Rxu/83jA5URmaxNDl/sqb12WjSTU5wRIeQB3clJzSS3OAoskKVOLqIKhPwweZvrZZ69VIXm5a52LLSGZnJlFMWonSqn9IpAdsFEyCzoGNxz4A/XKaJ+7P/WC33N/6FY0s9ZnYWbTMOxXfi0bQfegC4XZRXTBvQN/tsD+kuUBJJdqHG+m3fdjluEK4Q/FrnUMsuMydmjCBmzCDJrqJ0P48DIfRD0nWzMyGF4AXPDGa6ricnNJK84Hh2aZsREfKc0K5khkrRegjmG2yH2uWxLfXaFU6kHR0wqyhE6dQAE0/wUTYlQMmUANnFZrf5DRhS4vriazJeXUDGG4twVNdiej20H30IvhNn0Hb4gZARO6I70G/2GcoIBBkMpyA4mYLg5G7TBYUvMk4QqRC0jp7CNtcP+LTNSBF7T2nSHTWjKHY2kbWtXLmKEkQJ/Q5OCC+t3x/EOT+9BC430uUCjwfpckV9DoFLA2EF4qpb4Yy01KuXudj8vSsShCsj3xL1MUe1RQZLc8rM9FmPIyWur5dbLfdXF+DcWI30uGk7/EB8Jx5D+9EHIzO7j6ZTt+BZMubPp/D4X+CoqqK4vJym2bPxzZw5gD9i58IpM8gNjSY3NLrbNGFXUewgslURtDiq2eJaxnrvAkIiblGZFHjNwo5KwIwdRM40y8glBdO3djCU0O+A5Mt1HMHtXNR6IX8YDdk1Xacz0ahhIhvYh5/Yh5/EdKrkVAJ2WDGvaKDSs5RJOd9Smb2cypwV5OVstYJwf+tCrvIgX3WBy2X5s91WRZLIZzF8OJ72dquycbuR9vaePuPoptchJc7lq8h4dSEZry7AuXY90umg/ZD9aZr1B9qOOdRaHZoAGfPnM2zWLDSfFYncuXEjw2bNAlBiP4hoOMkyy8kyyynqYVaRX2yLGTdoiaoQmp3rqXH8u2tXUWlO1BhB3JiBvc1rFiCS//LDtEAJ/Q6IuxkwYfw9pbxZcz1rHAdz/t+XsHVjBlU/FlC1ppCqn4qorirBb8dZd7vaKS/6iX2Hf0hl/o9UDFvJcPd6tKAfEQgg2tshEEAEQDQ325+jt8d99vcey7X7tchdIx2OjgrA5QLNBX4HWquJCEgkIHM9BMYWESrNRzqb8C58Fc/7b0UqI+l29/g557bbIiIfRvP5yLn9diX0aY5A4JH5eIL5CbiKOnoG5DRS17Y60lvo2VVUEjXFtPMgckaouMsxrXRHDcbugKz5+GMefehT8hbdhomGxIk7x8TfZLVGnF5J8W4ByvbsGCwtGN2/UAHdImVE9Glvt8Q/EAC/H+H3k5+VxbbNmxF+f8x27Eqiu89aXT3OH37CuXojjvpmJGAWZBMqGYZZkInAtPIKVzbRn+O3xUVw7PHnAK3nnENg7FiC9p9ZWjqoMWTSfRARdlwbLVdRbcdsoqhxA6uCsAaVw+E5ovGGimLWHMSPG2SFSnHJnnuY4RfgxJPv3y1mBl1vqMHYIUxDaBfafmwlFNWy2O1kX2QWTOH4IFqqr6wQEdcLWVmdoo/LwkICCQqAtrGajNcW4l24APfS7wDwT5tC84lH4zv+aMzSfsb9CYU6VQpFxx+Po6YLX5fLRcZLL5HV1BTZZGZnW6I/ZgzBceOsz+PGERw5MhK+QLFjYrmKrNk9PS1A84ttncYNwp+bnevZ7Ph3p7UnYM0qih9Ejg5xXRCYTINrZafZccX+fVLye1MqB7quO4AlwEbDMI5PZVnQEcek+YrzMUuSHxQsVWGK169fz3nnnRdJ29jYSHNzM99+27nG744Zt3Ve7ZjOaDW1ZLy+iIxXF+JeshQA/5RJNFx/OW3HH02osmz7C3E4ICMDmZERqYgar7suxkcPYGZk0HDHHfhOPhlt82acK1fiXLUK5+rVuFauxLN4MZkvvRRJL51OgqNGRVr+kQpgzBik6oEOGaJdRfnBSd2mC+KLXXwWFcW0xbGJKs8HXbqK4ltHAo2pzVek4JekvkV/OfA9kJvicoDoOCav0XLaCUkX/FSFKd5ll11YuHBhJN0NN9xAqAe3gxBWfJgdDW1LPd43/kXGqwtwf/olQkoCk8bS+KdL8J1wNKFdd0m5DWE/fM7tt+OoqiIUN+vGLCnBX1KC/2c/izlONDfjXL26oxKw/7yLFiGCHUHNQqWlBMeOJRDuAdi9AbOkRIUSHqI4ySA3tCu5oV27TdPhKqqODCSvzHiera7vQJho0s3YltPINItTZGOK0HW9EjgOuBW4spfkvZJ7w524vvuh13ThOCZZ814ia95LhIqHE6osB3f3Xe3A5PE0zvljr3mnKkxxNH6/n5dffplnnnmmWzuyy0KU7ykx18uUxPJJJmJbI9633iXj1QV4PvocEQoRGDOS5v++AN+JMwiO6/7hSBW+mTPxzZzZJ/+yzM4mMHUqgfjYQ4EAjnXrcIXFf+VKnKtXk/nCC2jNHe9bNXNyOrmBAmPHElJuoJ2CrlxFI33H8VLJAYRoT2lrHlLbor8HmAV025fVdf1C4EIAwzAoLIxdoeiLmx3RF8JRBx01dWitbQR2n9DvvLoimWGKo1mwYAGlpaXsscce3R7rdGns9gvB8XcGWDTXwbpPRadzN6g0NuF87hVKn38FsfADRCCA3HUE5lUXY556PHKPSXiEGLAXgneH0+lMznkrK4P994/ZFJQSNm1CrFiBWL4csWIFzuXLcX3yCeLFFyPppMsFY8YgJ0xATpzY8X/8+OTZl0KUjdtDIbubZ7NUe4TdzbMZUdD9TKLtJSVCr+v68cBmwzC+0HX9sO7SGYbxEPCQ/VXGt66CUV3iRFrc5RXTIp/Dy9xTFcckmWGKo3n++ec5/fTTezzeNE1M08TvrOOQG6xtgz3xQbT68Cz60FrI9M7HiHY/wfIS2s47Dd+JMwhMndzhutiS3JDR/SXlM0bcbthjD+svCtHU1NkN9O23ON94I8YNJCsrkbvuGjMTKDhuHGZxcdq4gXbUWTfpwgTtYrYUf8eE2ot7fjdCN9izbnolVS36g4ATdV3/BeAFcnVdf8owjLNSVF6EVAs8JD9McZjq6mo++eQT7r333mSamzra2vG++zEZry7Es/ADNF8boeLhtJx5Mp7f6tSOGaHeU9oFMieHwJ57Ethzz9gdgQDOdesiLqCsDRsQ33zT2Q2Um2u5gMKDwNFuIPVy7x2KTLOE04O9vAAnCaTkrjAM42rgagC7Rf/HgRD56DgmqQpUlYowxWEMw+DII4+koKAgWeYmH38AzwefkvHqQrxvv4fW3EKoIA/fr47Dd9IM/PvtBQ6H1VVO01ZU2uJyRVruHHss3nBLVEq06upI699lVwSeDz8kM6pXKF0ugrvu2nlK6NixyCz1YpGdmSFV/dcteDal+a9YsYL77ruP0aNHc+KJJwJ9C1M8e/ZsKisrI66Z6DDFYAn9zTffnBrjt4dgEM/iJXhfXRAb9vf4o2g78WjaD5quWpKpRAjMsjL8ZWX4Dz44dldjY4cbaPVqqzJYsQLv22/HLBgLlZV1zASKdgMVFaWNG0iROlL+dBqG8R7wXqrLGQgmTJjAxo0b+3VsdnY2GzZs6DHNRx991K+8U0IohPvfX5Hxytt433yn27C/isFF5uYS2GsvAnvtFbvD749xA4V7A5nPP4/W0hJJZubmxgh/wO4NKDfQ0EJdSUUHkbC/b5Px+iIcNXW9hv1VpClut+W6GTcOfv7zju1Som3a1OECCruB3n+fTMPoSBbtBooaCwiOGaPcQDsgSuh3dqTEtex7a7bMawv7HPZXsYMhBGZ5Of7ycvyHHBK7q7ExZjGYc9UqXMuXd3IDBcvLI8Kv7bkn7tJSyw1UWKjcQGmKEvodkO0ORCclzu9XWS/seG0BzrUb+h32VzF0kLm5BPbem8Dee8fu8Ptxrl3b2Q307LNojz5KeNqDOWxYpwVhwbFjCY0YodxAg0xan30pJVJKhGolRJBS4k8gRHBXOFetwfvqQjJeeRvXqrVITaP9Z9NpvvRcfMcejswflmRrFUMCt5vg+PEEx4+P3S4lhW1tNH3+eUwl4Hn3XTKff74jmdvdvRsoM3OAf8zOSVoLvdfrpbW1lSzlEwQskd+4cSOrV6/GmWALybF2feSFHa7vVyKFwL//3mw773TajjsSszCNp3Iq0hshYJddaM/IoD3eDdTQ0NkN9N13eP/5T4TZEdwrWFERCQgXXQkoN1BySWuhd7vd+P1+Vq9e3eeFSdKE5s0OpAnZxaGEwvbWa4IWIZBYAZ6zpCTf3D43SXuToK1BQ3NA5nATh7t/+YVb8qtXr6a1tZURI0Z0m9axcRPe1xZakSGjwv42/Pmq7Qv7q1AkiBw2jMC0aQSmTYvd0d4e6wayp4ZmfvZZbETRvDyCY8Z0mhIaGjGi+zeRKbolrYUerGmJwWCQjz/+uM8uC1+9YOmzWbizTKae7sPh6V5kW4TgkWw3wahtLuB3zX6y+uETD/gEK//ppX6Nk4LxQcYd3YazZvsqjcx5L6GVlVB49CHMmDGDhoaO0MQDEvZXodhePB6CEyYQnBAXe8o00TZtig0Ot2oV3n/9C8dzz0WSSbeb4OjRnaaEhsaMQWaoSQPd0Weh13VdGIYxoDFy8/LyOO644/p17EET3Bi/Hk4os52ZT27ttmV/da6HdVlu/FHdRbeUfNfiZ25je5/K/OkTN69dmo9nq8avb2xgr7Nbk9ILLb/0FqR7FfzzE8ylP9Jy2gnWXPdBDPurUCQFTcOsqKC9ooL2Qw+N2SW2bYt5P4Bz1Spc33yD9803Y91AlZWd3xEwdiwM7+tLLYce/WnR3w38d7INSRUjD/JzzF+28c+r8ll0/TCOntvQpeh+4XbGiDyAXwiWuJ1AYkJvhuCT/83m47tzyBsZ4pTXainZPdj7gX0gHIZZe/hpih56CgEERlUOathfhSKVyLw8AvvsQ2CffYiJZ9vejnPNmk5jAfFuIFlQQOHo0Z3dQLvsstO4gfos9IZh7DAiH2bK6T62rnby2QM5FIwJss/5LZ3SLKjr2NafaHfNNRqvXZrPT4s9TJ7ZyozbGvBkp67jEw7DLIXALCmi6aqLUlaWQpGWeDwEJ04kOHFi7HbTxLFpU8T9k71+PfLbb/EuXIjj2Y4wKdLjiXEDBexB4aHoBupV6HVdf9QwjN/ZnwXwsGEY56fcsiRz6NVN1K9x8s6fc8kfFWTMUX1zx/TEj+95eOO/8vC3CH5+dz176L7kThgIBsl+6OmYTfFROhUKhY2mEaqoIFRRQfthh5FRWMgWu+Em6us7u4G+/hrvG29E3EBSCELduIHMHdQNlEiLfnT4g2EYUtf1MSm0J2UIDY773208M3M4r/4hnzP/r47iydvnVgkF4KM7c/j0rzkUTgxwxov1FI5PrqvGuXwVeVfNwf2V9f5Y6XKCw4F5tk7txWelLEqnQjEUkfn5BKZPJzB9eqwbqK3Nmg0U96rIzE8+QWtriyQL5efHvCM4XAGEKivT2g2UiNDX6bp+PrAYOABIj7dG9AN3puRXj2/lyeOKePHsAn77eh3ZJWbvB3ZB40YHr/w+n6ov3Ew9s4Uj/9yIKyOJrppAgOz7nyDnnocxc7LZ+uBtZP/1sUgY5oLJEzFVGGCFIjl4vd27gaqqOioAe0qo9+23cUS9QEd6vR2LwsLB4caOJTh6NHTjBsqYPz/y7uLiuHcXJ5tEhP5srNf9XQKsAH6bEksGiJxSk1Oe2MLTvyxk/nkFnPHilj4L9Mq3vbx5ZR5mEE54YCuTT2rr/aA+4PxmOflXzsH17QpaTzqGxpv/H+bwfNpOnNH7wQqFInloGqHKSkKVlbQffnjMLrF1K65waGi7EujSDbTLLp3cQM4VK8j9858jg8bOjRsZNmsWQErEPhGhbweqgRDwILAX8HnSLRlASnYPcsID9cw/r4A3Ls/jpL/VIxJYjxVsh/duzeWLR7Mp2cPPSQ/Wk79rqPcDE6XdT869j5B9/+OY+XlsffRO2o49vPfjFArFgCMLCvAXFOCfPj12R1ubNRsoakGYa9Uq3IsXx7iB4tF8PnJuv33QhP4p4H3gDMMw7tN1/TbgqKRbMsCMm9HO4dc18u7Nw/jwjiCHzG7qMX39GstVU/O1m2m/a+awaxtxJvHt1q6vviXvyj/jWrGa1lOOo+Gmq1TsGYViR8TrJThpEsFJk2K3myaOjRtxrlxJwW9+Q1fzNRxVVSkxKZG4AkWGYfwNSK5/Ig2YflELU89s4ZP7cvja6H461XeveHn82CIa1js5+dGtHDUniSLvayPn1v+l8IRz0Bqa2PLkvWy7d44SeYViqKFphHbZhfYjjiBUUdFlklCCL/vuc9EJpNms6/ppQIau6ycDm1JiySAgBBx9awMjf9bOW7PyWP+pO2Z/wCd4a9YwXvtDAUUTg5y7oJbxxyavvnN9vpSiY35NzgNP0HrGSWx+16D9yJ8lLX+FQpGeNM2ejRk3SGtmZNA0e3ZKyktE6M8DioAvgUrggpRYMkg4XPDLv28lb0SQ+b8roH6NNUXqp0/c3D+tmKVPZ7H/pU2c8WIduRXJ8ceLVh+5N9xJ4cm/Q7T7qXv2ARruuA6Zm5OU/BUKRXrjmzmThjvuIFhRgRSCYEUFDXfcMaizbiYahvFXXdeLgXOAUcDylFgzSHjzJKc8uZV5xxdi/Ho4ebtorP24Y2HEoVf37L/vC+7FS8j74xyc6zbSco5O4zWXIbNUTG6FYmfDN3Mmvpkz+7USv68k0qK/y/4/B1gPPJY6cxKncMYZ5F59G1pNbVLyc2VIKvb1s+0nB2s/1rACFSdveatobmHY1bdReOpFIAR1Lz1Mw61/UiKvUChSTiJCn6nrugfwGIbxLNC/1xslGfe3P5D13CuUHHBSUgT/1T/ks2qBl2SKexjP+59QdIRO5ryXaL7wTGoXPYd//717P1ChUCiSQCJC/xTwCvA3Xde9wJrUmpQ4wh9AtLeT9fR8Sg44cbsE/8QH69nrty04vbLfLwfpZF9DE8OumsPwX1+KzPBS93//oPHGK4dcwCSFQpHe9OqjNwzjfuD+qE3npMyafiJCJoT8ZM17CdcPP7LlpYf7nEd2scmMuY0ceEUzX/6tiM+f0JAmhPz9a+F7Fn5A3uy5aLVbabr0HJr++0LwJnHivUKhUCRI2r9hKhGk0wHBEEhJcMwoRKsPmdm/VnN2scnM/w2x98W1LL4nm41L3L0fFIWob2DYDXeSOf9NApPGsvUfdxOYOrlftigUCkUy6NZ1o+t6/kAa0h+k24X0emg5cyabP5xP629PIevp+RQdeRruD/+9XXmHW/jnLkh8NNz75jsUH34qGa++TdOVF1L75lNK5BUKxaDTU4v+NlvsVwILgMWGYSQUg9f25X8AeOwyXjQM48btNTYa/27jI5Ecw6F6G267Gt9Jx5D3x5spPP33tJx+Eo3XX4HMy01m0Z3Q6rYy7Lo7yHhtIf7dJ7Dl6b8S3G18SstUKBSKRBGylxdf67o+DjgaOBBrSspnwHzDMDb0cIwAsgzDaNZ13QV8BFxuGManPRQlq5IV58HXRs49D5P94DzM4Xk03Dqbtl8ckfDhCc9rlRLvqwsYdt0daM0tNF15Ic0X/wZcru0wPsk2DhLpbh+kv43pbh8oG5PB9thXboVM6HUgMZHB2JVYrfoHdF13APsBZUC3Qm+/PLzZ/uqy/wbuheIZXpquvgzfCUeTf+UcCi74f/h+cSQNt85K2os6tJpahl19Gxlvv49/r93ZcveNBMeP7v1AhUKhGGD6NBhrGEYI6wUkvWJXCl8AY4H7DcP4rO/mbR/B3SdS+8YTZP/9KXLu8iERgAAADipJREFUfgjPx/+m4cYr8ekn0O93/UlJxguvM+ymuxDtfhquv4KWC36d1m+XUSgUOze9um62F13X84CXgcsMw/gmbt+FWC81wTCMaX5/CtdirViN8w+z0T76N+aRBxO8fy7sOqLLpE6nk2Cwi+GI9VU4L7ka7e33MA+aTvBvd8AgteK7tTFNSHf7IP1tTHf7QNmYDLbHPrfbDQm4bhLx0R9rGMZbuq6PBf4beN4wjA/6Yoyu6zcCLYZh3NlDsuT56LvDNMmc9xK5c++DUIimP11Cy3mndWqNd/KZSUnmMy+TO+ce67hrLqPlHB20RNabpYah7HccKNLdxnS3D5SNyWAgfPSJKNUf7f/XYK2SvauHtADoul5kt+TRdT0D60Ulgx8ITdNoPftUNr9j4D9wH4bddBeFJ52Hc8Xqbg9xrK9i+Ol/IG/WrQSmTqb2X8/Tct7pgyryCoVC0RcSUascXddHACHDMD4BWhI4pgx4V9f1ZVivHVxoGMbr22FnUjErStn6xD3U//UWHOs2UHTMrynZcwa5f7q1I4SCaZL5uEHRETqur75l21+uZcvzDxIaWTm4xisUCkUfSWQw9jbgFuAWe358T1MkATAMYxnWu2XTFyHwnfxz2g/Zn9wb7yTz5bfIemo+WcZrmCf/guErf8Tz5de0HXYADXdcS6iibLAtVigUin6RiNDvCsw2DCPsQE/NK1AGCXN4Ptv+eiuZL79lObr8AbTnX8ENtB24D9vuuhGztGiQrVQoFIr+k4jQ/wjcoev6MOA1rFWuW1Nr1uASHtnwfPIF+Zdc068gaQqFQpEu9OqjNwzjFcMwzgLOxhpUXavr+gu6rg/Zl5tKt9uKofPbU6h/8LbBNkehUCi2i15b9Lqu/xw4HcjHatFfiNXofQU4JKXWDTDS7QJNwzxbp/bis5K2ilahUCgGk0RcN3sA1xiGsTF6o67rQ+ol4dFB0gomT8RM43m3CoVC0RcSEfrnget1Xc/Gct+caxjGI4ZhrEitaQNL3YJnB9sEhUKhSAmJzKN/FLgHKLdj3ZyRWpMUCoVCkUwSEXqHYRjRq1rVklCFQqHYgUhEtN/Rdf1vQLmu6/cCC1Nsk0KhUCiSSCLTK28G/gpcj+XGeSDVRikUCoUiefT0ztgXdF13AxiG8Y1hGC8A7VivFVQoFArFDkJPLfrngDejolAeA8wDfjsQhikUCoUiOXQr9IZhvATcALyh6/oNWLHoZ8QNzCoUCoUizenJdXMzcCywEbgS+A9wpa7rcwbINoVCoVAkgZ4WTC2y//8LuH8AbFEoFApFCuhW6A3DeH8gDVEoFApFalCLnxQKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKIo4ReoVAohjhK6BUKhWKI01NQs36j6/ouwJNAKWACDxmGcW8qylIoFApFz6SqRR8ErjIMYxKwP3CJruuTU1SWQqFQKHogJUJvGMYmwzC+tD83Ad8DFakoS6FQKBQ9I6SUKS1A1/VRwAfA7oZhNMbtuxC4EMAwjGl+vz+ltiSK0+kkGAwOthk9ku42prt9kP42prt9oGxMBttjn9vtBhC9pUup0Ou6ng28D9xqGMb8XpLLqqqqlNnSFwoLC6mrqxtsM3ok3W1Md/sg/W1Md/tA2ZgMtse+8vJySEDoUzbrRtd1F/AS8HQCIq9QKBSKFJESodd1XQCPAt8bhnF3KspQKBQKRWKkZHolcBDwG+BrXde/srddYxjGmykq7/9v715j7KrKMI7/kQEVjAEyKAyXgAgoFgRSlQByEwwqAtH4KpcERdLgXUMFERT7rRGjYqKYBrAoiL6pRExExYClKBflIhIhGAUCreVS0GIULEj9sPbA6XTOdAqzu3dX/r+kmZkzZ/Z+Opl5zpp19lpHkjREK0Wfmb9lGvNGkqT2uTJWkipn0UtS5Sx6SaqcRS9JlbPoJalyFr0kVc6il6TKWfSSVDmLXpIqZ9FLUuUsekmqnEUvSZWz6CWpcha9JFXOopekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXIWvSRVzqKXpMpZ9JJUOYtekipn0UtS5UbaOGhEXAIcAzyambPaOIckaXraGtEvBI5u6diSpPXQStFn5hLgiTaOLUlaP61M3UxXRMwB5gBkJqOjo13Ged7IyEhvsgzT94x9zwf9z9j3fGDGmbAh8nVa9Jm5AFjQfLh6xYoVXcZ53ujoKH3JMkzfM/Y9H/Q/Y9/zgRlnwkvJNzY2Nq37edWNJFXOopekyrVS9BFxBXATsGdELI2Ij7ZxHknSurUyR5+ZJ7RxXEnS+nPqRpIqZ9FLUuUsekmqnEUvSZWz6CWpcha9JFXOopekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXIWvSRVzqKXpMpZ9JJUOYtekipn0UtS5Sx6SaqcRS9JlbPoJalyFr0kVc6il6TKWfSSVDmLXpIqZ9FLUuUsekmq3EhbB46Io4ELgE2BizJzflvnkiQN18qIPiI2Bb4NvAvYCzghIvZq41ySpKm1NXXzVuCvmXlfZq4CfgQc19K5JElTaGvqZgfgoYGPlwJvm3iniJgDzAHITMbGxlqKs/76lGWYvmfsez7of8a+5wMzzoS287U1ot9kkttWT7whMxdk5uzMnN18TS/+RcRtXWfY2DP2Pd/GkLHv+czYm3zr1FbRLwV2Gvh4R+DvLZ1LkjSFtqZu/gDsHhG7AsuADwEntnQuSdIUWhnRZ+azwCeBXwH3lJvyz22cqyULug4wDX3P2Pd80P+Mfc8HZpwJrefbZPXqtabOJUkVcWWsJFXOopekyrW2BcLGKCJ2Ar4PbAc8ByzIzAu6TbW2ZuXxrcCyzDym6zwTRcRWwEXALMpltadm5k3dpnpBRHwOOI2S7S7gI5n5dMeZLgGOAR7NzFnNbdsAPwZ2AR4AIjP/0bOM5wPvBVYBf6N8L//Zl3wDn5sLnA9sm5krusjX5Jg0Y0R8ivK85rPAzzPzzJk8ryP6NT0LnJGZbwQOAD7R060bPkN5kruvLgB+mZlvAN5Mj7JGxA7Ap4HZzS/appSrwrq2EDh6wm1fAK7NzN2Ba5uPu7SQtTP+GpiVmfsAfwHO3tChBixk7XzjA7ijgAc3dKBJLGRCxog4nLJzwD6Z+SbgazN9Uot+QGYuz8zbm/f/RSmoHbpNtaaI2BF4D2XE3DsR8WrgEOBigMxc1dUIbwojwCsjYgTYgh6s8cjMJcATE24+Dri0ef9S4PgNGmqCyTJm5jXNVXYAN1PWzHRiyPcQ4BvAmUyyaHNDG5LxY8D8zPxvc59HZ/q8Fv0QEbELsB9wS8dRJvom5Yf2ua6DDPE64DHgexFxR0RcFBFbdh1qXGYuo4yYHgSWAysz85puUw312sxcDmUQArym4zzrcirwi65DDIqIYylTnHd2nWUKewBvj4hbIuL6iHjLTJ/Aop9ERLwK+Anw2cx8sus84yJifG7vtq6zTGEE2B+4MDP3A/5N91MOz4uIrSkj5V2BMWDLiDi521Qbv4g4hzL1eXnXWcZFxBbAOcCXu86yDiPA1pTp4s8DGRHT2tpguiz6CSJiM0rJX56ZV3adZ4KDgGMj4gHKjqBHRMRl3UZay1JgaWaO/yW0iFL8fXEkcH9mPpaZzwBXAgd2nGmYRyJie4Dm7Yz/ST8TIuIUyhOMJ2Vm59MjA3ajPKDf2fzO7AjcHhHbdZpqbUuBKzNzdWb+nvLX+uhMnsCrbgY0j6IXA/dk5te7zjNRZp5N82RXRBwGzM3MXo1GM/PhiHgoIvbMzHuBdwB3d51rwIPAAc1o7ylKvlu7jTTUz4BTgPnN26u6jbO25gWGzgIOzcz/dJ1nUGbexcB0V1P2s7u86maInwJHAIsjYg9gc2BGM7oydkBEHAzcQLnkbnwO/IuZeXV3qSY3UPR9vLxyX8qTxZsD91EuuevsssCJImIe8EHKVMMdwGnjT4R1mOkK4DDKSO4R4DxKASSwM+UB6gOZOdmTjV1mPBt4OfB4c7ebM/P0vuTLzIsHPv8AHRf9kO/hD4BLgH0pl6nOzczrZvK8Fr0kVc45ekmqnEUvSZWz6CWpcha9JFXOopekynkdvaoSEYdSLll7GfA/4EuZeWNErARuBzajLNUfA47MzHObr/sKsDgzFw8cawvKlhN7NF+3IDMv5UVqdvU8oocL8VQ5R/SqRkSMAvOA4zPzMMomYE81n74rMw8HzqDsFTQd5wHXN8c6GLj/JUbcCnjfSzyGtN4c0asm7wYuG9+fqNmB9I4J9/kj099h8cDMPKs51mpgCUBEfIuyuOVJ4CTK5ndHZua5EfHh5msXUxbBPEFZhn8cMAc4KiIWUxY/Pbb+/0Vp/Vn0qskYZVUzEXEi8HHKSs25A/c5BLj3xZ6g2Vlwy8w8pNkM7XSG73C6NWVvnROA91NeBHrnvm1bofo5daOaLKeUPZn5Q+BkXtgcau+I+A2l/OcDT1OW7o97BS9M80xlN8pcP5Q9cl7PmvucD+46eHdmPgcso0zbSJ1wRK+aXA0siojMzJWs+fM9PkcPQESsAvaLiPHBzv7AVycc78aIOCkzL282vDuIsnfPO5vPz6a8fN5KYPvmtr2BPzXvT3wAeIbyilbSBuWIXtVo5rznAVdFxHXAdyivATzZfR+nbEe9hLKR3aJJNgybBxzazKn/Dtit2Ub2qYi4ATgR+C6l2Mci4mpg2ykiPgxsExGLmteDlTYINzWTpMo5opekyln0klQ5i16SKmfRS1LlLHpJqpxFL0mVs+glqXL/B5rQoGyf8akaAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Unmanaged Memory Plots\")\n", + "system = \"dgx2\"\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_nomanaged\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_nomanaged\")\n", + " \n", + "print(\"Managed Memory Plots\")\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_managed\")\n", + " \n", + " # plotSpeedup(system, exp, exp_type, \"index_managed\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unmanaged Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAEkCAYAAADNfV1EAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgUxf3/X9Vz7X2xgALCgii3F4JcGg8kxuSriUdLEo2aiMQkBKP5EeI3MXk0QUyUqEi8g4oX7RW/iZoAJh4cHqiInAKyqKBcC+zJHN31+6N7ZmdmZ3dnd2fYmd16Pc88M1NdXV3VR7276vOpKiGlRKFQKBTdG62zM6BQKBSKzkeJgUKhUCiUGCgUCoVCiYFCoVAoUGKgUCgUCpQYKBQKhQIlBjEIISqEEFIIMamz89IdSHS+nf+Xd8axO5BWpRDiN1H/XxdCPNzKPr8XQmzt6LGTIVXnVAjxqBBiWSrypDhyJHuvuY9EZrKIz4Gjgf0AQoh+TthZUsrX25uoU+G8BQyUUlZ2PJtdmqOBg52diTYyBqjv7Ey0QDaeU8URRolBFFJKE/iqs44vhPAAIdmNRwJKKTvt/LcXKeXezs5DS2TjOVUcedLaTSSEmCSEWCGEqHE+Hwkhvu5sS9hMF0JsFUL8Puq/FELMEEIsFkLUCSE+E0JcIoQoFkI86aT7qRDi4qh9wml/TwjxbyFEvRBikxDia0KIvkKIV5y0NgghTk+wXzhPnzvf/3XCK1so64VCiA+dYx0UQrwrhDhZCFGB3SoA2O6k87qzz6NCiGVO+SoBP5AvhPAIIeYKIXYKIQJOPr8XdzwphPiJEGKRcw4+F0LMiovTQwjxrFPW3UKIW4UQjyXb1BdC9BNCPC+E2CeEaHDO8/+L2u4WQtwshNgmhPA7+Z0ftX2mEGKNEKJWCPGVEOIZIcTRrRwzpkvjCJRzoBDiNad824UQ349KN9l7NKabKEGZfEKI+4QQh4QQB4QQ9wG+1jKWZNkLhRAPCCH2CiEOCyFWCyGmJEinree0VDQ+c7uFEH8ARII8znCercNCiC1CiP8VQridbYOFENVCiF9ExR/mpHldEuUPn39dCPEP59n6VAhxRVy8o51766BzHV8XQpwatf1MJ53zhRCrnDjvCyFGOJ/lTtrvCiGGx52DJ4Rd5zQIITYLIW4UQoioOOFn+FohxA6nvC8JIXpGxRkohHhBCLHLOc7HCcqQK4R4MOoe+asQ4jYR170jhJgq7GfqsHPfzRNC5Edtb9e9BoCUMi0fwAVUAfOA45zPd4DTne0VgAQmxe23Ffh91H+J/bZ+JTAY+Ct2k/xV4ConbD5QB/SIS3sb8G3geOBFYBewzMnH8cDz2BW+J1GegJOd/xcBRwE9mynrUUAAmAUMBIYB3wNGOefhAiedMU7cMme/R4FqJ28nOfHdwJ+xu6oudfJ5E2AB58Sdl93ANOBY4OdO2FlRcf4P+AQ4CxgBLAQOAcuSvIb/55yvk5xzcxbw3ajtjwF7gCucPIwDfhG1fSYw2Tkn44GVwBtR25vcA87/y9Ndzqhj7wK+DwwB/uCc51PbeI9WAr+J+v868HDU/7845+lCYChwh3Pdt7Zy/pMp+7PO8b+Ofd/djX0vDu3gOX3RKefZzjl9wsnzsqg4vwd2YD9PA4Hzgc+AW6PifB/7JecUIAdYCzyf5P0XPv+fAjr2sz4XCAHHOXEE8A6wBpiE/QwtBg4A5U6cM510PnTKMxxY5eTlTeAc59wtB96Je65/5eR9IHA5UAtcHRXnUex77WlgJDDBOSePRcUZBfwUOME53zOcMkSf73uca3IB9r14m5Pu1qg4VznlugIYBJzhlGFRR+81KWVaxaDUuQBntnKhkxGDu6L+93TC5ic41rfi0r4+Ks4YJ+zGqLBwZT8yUZ6Afi2VIUE6Fc1sn5Rou3MjHQQKosLysB+en8TFfRH4T9x5uScuzibgNuf3cU6caAHxYItfsmLwUfS1iNs22En/kjbcE+Hz1Le5e4DEFVfKyxl17FvjwlcCT7TxHq2kGTEA8oHDwLS4NFaTnBi0VPbwNTg/Ls4HwN86cE7D6Z4btd0L7AyfU+c+rQfOi0vnB8DBuLCF2GK90DlXJUneL+Hzf0NUmBu7Qp7u/D/HiTM8Ko4P+BK42fl/phPn21FxLnXCLo4K+44TVtBCnu4GlsY9w3sBX1TYbODLVsr2EvBQ1D3iB34UF+dtYsWgEvhxXJwznDyXduRek1Kmr5tISnkAeBj4txDiVSHEbCHEkHYm91FUunsBE1sRo48VAHo1tx+NtoC1CcLi92sra4F/A+uEEC8Ku3vkmCT33SilrI36Pxj7wXszLt4b2G9o0ayJ+78T6O38Djd33w5vlFIGsW+MZLkLuEkI8Y4Q4nYhxBlR205xvpc0t7PTPP+30w1Rg/3mBTCgDXmA9JZzVdz/FVFppoJjsSunlXHhyxPETUQyZY+/V96k6b3SnnQjeZZSBoD3ouKPAHKB54XdDVgrhKgFHgCKo7tJgJ9hV+I/wG5ZttWYHcmrlDKE/QYdzusIYL+UckNUHD92ayH+HLSpPhBCaE69tUbYXaW1wI9pev9udI4ZJvpcIoTIE3a373ohRJWTzvlR6YSf+beJZVVUGj2d+PPizverUWl06F5Lq81ASjkNGA0sBb6GXVlOdzZbznd8P6QnQVLBJMIkTcsTjNveXFiHzoO0Dc/fwG6CvgdcDHwihPhWErvXNZds3H+RICyQYJ/4ssTvkzRSyoXYN+D92B4prwohnkhmXyFEf+AV7LeZqcCp2E1gsG/8tpDWcsYRfT+25R5tLb325i+Zsic6ZmvHayndJraBBITjXordjRj+jMJurVVFxR0M9HGOMTiJtNuS1/D/eBKdg7bWBzcCv8buhj4Xu3wP0/T+TZS/6HP4Z+wupluwuzJPwn424tNp6ZqF8zST2PN9Ivb5/pgO3mtpH2cgpVwnpZwnpfwG8AhwrbMp7IHRJxxXCNEL6JvuPLWB8EV2tRZR2rwrpZwjpTwD+03+6ramg90F4ccWz2jOANYnsX+Y8JvS+HCAY9gb3YY0kFJ+KaVcKKX8AfAj4PtCiCLsrgiAKc3sOgb7zfF6KeUKKeVmot6WUkhHyzku7v94YKPzOxX36Fbs6z8xLnxCG9JojvD9cEZc+Om07V5pLt1IHoUQXuxrGh3nMDBISrk1wcd09ssDngGeA34B/FUIcVwH8pYor+Vxhl8fMJaOnQOwz+u/pJSPSCk/lFJuxa5425POk1LKxVLKj7BtIMdHbQ/fI+Pj9ovcm1LK3dhdn0OaOd+H6eC9ljbXUiHEYGwD1T+wC9EH+yb9AEBK2SCEWAHMEkJscvLyR+yKMFPYh90/OUUIsR7wO11SMQghJmD3XS7B7qs8DttY9IgTZQf2W+b5QojFTjqHEh1QSlkvhLgHuFUIsRe7iXwptkHo3GQzLqXcIoT4B7DAaY3txX7TKSLJNwchxL3YbzCbsY1/F2FfyxopZbUQ4knshzsHu0lbBkyQUt4NbHGOc6MT70Tg5mTznywpKOePnPtvNfbb23jgeiftDt+jUso6IcT9wB+EELuxz+WPsI17e5JNp5m0twkhnsW+BtOx77PrsA2Z32tx55bT3SqE+D8az+lu7H7wwqg4tUKIOcAcx7lmKfb5GQWcLKX8lRN1vhN+nZSyRghxLvCMEGK80/XUUf4DvAs8JYT4KbbR9bfY9+t9HUx7M3CFEOIs7K6fHwCnYRtx25rOhUKI57Hrkxuw68PdELlHHqDxHvkE22FmGI0vJAD/CzwihDgI/B27VTMM+IaUcnpH77V0tgzqsCvFZ7AL9zx2X9bPouL8EPvkrHTiPYhdmWYEUkoL2wtAx64EP2wm6iHsSuQl7Erwb8CTwK1OOruxm5uzscv3UiuH/l/gIew++/XYldTlUsrX2liEq4F12P2Kr2Pf0Eux3+iSQTh5WIfdD52PfeOFK9mrsfuI/4D9Nv0ittcFUsq12F4T07Hf3n+JU8mmgY6UczZ2a3Ut9sN+pZQyum88FffobOyHdxF2xVUCLGhjGs1xDba96gnsPvGJ2I4UmzqY7g+xX0T+id3K3Yl9fSNIKW/Fftu/xjn2cud/JYAQQse+d6dKKWuc3a7G9tKZ28H8hfMgsT0GNwEvY3fTHoVt/N7XweRvxS77S9gvO6XYXj9t5RfYQv1f4DXsc/lcXJxfYb84P4V9j5RiG6cj97CUchF2XfRNJ8572B5dO6PSafe9Jhqfa0VXRwjhwn5o/k9KeWNn5ydddJdyKro2Qoj/AAeklBe3GjkFqBHIXRjH+6cXdoumEPsNpQL7jaPL0F3Kqei6CCFGYXvorcI2LF+BbWw+/0jlQYlB18YF/AbbgyOI3ZVylpTyY8fbZ0ML+06XUj55BPKYCpotZ6fmStEiQohXse2IiXjLcTrpLkhse8892N33m4DvSClfbXGvFKK6ibopjsdNRQtRdkf18yoUKUcI0Rfb4ywRDVLKnc1sU6QBJQYKhUKhUOsZKBQKhUKJgUKhUChQYqBQKBQKlBgoFAqFggxzLdV1/W/At4A9hmGMbCVuf+z59EuwXQtnG4bxSvpzqVAoFF2PTGsZPAqcl2Tc3wCGYRgnY8+K+dd0ZUqhUCi6OhnVMjAM401d1yuiw3RdPxZ7bo2e2ItpTDMMYxP2II0iJ1ox9opVCoVCoWgHmdYySMSDwAzDMEZjT3YWbgH8Hrhc1/UvsGfWnNE52VMoFIrsJ6PFQNf1Auy5uJ/VdX0N9gyZ4QXVvws8ahhGP+z5Oxbpup7R5VEoFIpMJaO6iRKgAQcNwzgpwbYf4dgXDMNYpet6DlBOB+eIVygUiu5IRr9JG4ZRDWzXdf1SAF3Xha7rJzqbP8NeUAZd14dhL2axN2FCCoVCoWiRjJqbSNf1p4Ezsd/wdwO/w17J6D7s7iEP8IxhGLfouj4cewGYAmxj8izDMJpdnF2hUCgUzZNRYqBQKBSKziGju4kUCoVCcWTIJAOyaqIoFApF2xGpSCSTxIBdu9o3bqy8vJx9+zq69nV2ocrc9elu5QVV5rbSp0+flOVDdRMpFAqFQomBQqFQKJQYKBQKhQIlBgqFQqEgwwzI8Ugpqauro7WxEA0NDYRCoSOUq85DSklOTg5er7ezs6JQKDpA75Om4Nq7P/I/bAY2e/Zg95rOGTub0WJQV1eHz+fD4/F0dlYyAiklNTU11NTUUF5e3tnZUSgU7SRaCJIJPxJkdDeRlFIJQRRCCIqKijhw4ABbt27t7OwoFIouREaLgSIxLpeL1atXd3Y2FApFF0KJQZbSHWwkCoXiyNHlxCD3hRfoNXYsR/frR6+xY8l94YWUpV1VVcUVV1zB6aefzuTJk7nmmmvYvz+2j2/OnDkMGDCAefPmxYRblsW0adMi+06dOpXKysrI9h/+8IdMnjyZKVOm8J3vfId169alLN8KhULRGl1KDHJfeIHiWbNw79yJkBL3zp0Uz5qVMkEQQnDdddfx1ltvsWzZMgYMGMCcOXMi2++44w4++ugjVq5cyfLly1mwYEHM/pdeeilvvPEGy5Yt4+tf/zqzZs2KbLvrrrtYtmwZS5Ys4cc//jE33nhjSvKsUCgyD7NnjzaFHwky2psomqKbb8azYUOLcbzvv48IBGLCtIYGSm68kbynnmp2v+Dw4VTfckureSgtLWXChAmR/6eccgqPP/44AAsWLGDbtm0sWrQIr9fLU089xYwZM3jooYeYNm0amqYxZcqUyL6jR4/m4YcfbixfUVHkd3V1NZrWpXRaoVBEEe0+minzMWWNGCRFnBC0Gt4BLMvi8ccfj1TwP/3pT2O25+Tk8NBDDzW7/8KFCzn33HNjwn75y1/yxhtvIKXkySefTHmeFdmBWOHFfUUPqv0Cj+9oQov2Iyem/h5WKKLJGjFI5s2919ixuHfubBJu9u3L/ueeS2l+fvOb35Cfn8/VV1/d5n3vu+8+tmzZwrPPPhsTfscddwDw3HPP8Yc//IFFixalJK+K7EGs8OK+sgzht2clFn6B+8oyQo9VKUFQpJUu1RdRM3s2Vm5uTJiVm0vN7NkpPc4tt9zC9u3bue+++9rcnbNw4UJefPFFFi1aRG5cXsNccsklrFy5kqqqqlRkV5FFuK/ogWiIvadEg4b7ih5qxQ9FWsmalkEyNFx0EQCFc+fi2rULs08fambPjoSngrlz57J27VoWLVqEz+dr075PPPEETzzxBIZhUFpaGgmvq6vj4MGD9O3bF4AlS5ZQUlISE0fRPQi3CBKFewYeDWUWstQC5yPLWv6mSKZo6RNFV6dLiQHYgpDKyj+azZs3M3/+fAYNGsQFF1wAQP/+/XnkkUda3be2tpbZs2fTr18/pk6dCoDP5+Of//wn9fX1TJ8+nYaGBjRNo6SkhEcffRQh1FPc3ZA+mVAQpFtiTa+FKg1xQIMqDba60ao0OKAhzMT3inRLKIkSh0TCERdOiexifQaKZOhyYpBOhgwZws4ENolkKCgo4Isvvki4rWfPnvzzn//sSNYUXYTQov22zSCqq0jmWi3bDCRQLWxRcISiuW92uNE+dAQk0IyAaBKKLacVIqHMdL6bCkejgFiqNsly1OVTKDIIOTFA6LEq23bgF0ifbN14LIBiCcUmssK002n1QECdIyDh1kXcd0REvnCjfexsO9x8a1UWR7U0ypr/johIiQVqAt6MQYmBQpFhyIkBgp9+mV7/cwEUSCgwkcckKSAADcIRCdEoFolaIXs0tM1u+399831OsjBWQBqOcuHKK2oqHNGtkJyUnAFFHEoMFApF8uRK6GsibV+H5ATkMDFdWAlbI46ohLZraPvzEDUtCEheMyJRKpFlpvMdty1XGdJbQ4mBQqFILznA0RbyaCsS1JyIlIRbQwHgYIIuqwRCon3mtuMebEFAcmIN6RGRaKk7K797CYgSA4VCkXl4gV4WslfrAhIhBBxq2YBud2+50DbYXVgc1BCyGUO6R8a1PJKwh7TBlTfTRporMVAoFF0DN9DDgh5WRDhaFRATOCQSdlnF/E/Wldclm7rwJhAO8YUL1x+KM2qkuRIDhULRfXEBZY777LFt8MSqbtkTK9K9Vem48lZpiGDLTYbwSPPgp1+momRtpkuJQWnRFDzu9U3Cg6ERHKju+CLTVVVVzJw5k8rKSnw+HxUVFdx+++306NE47eycOXN44IEHmDlzJjfccEMk3LIspk+fzqZNm/D5fJSXlzN37lwqKipijjFv3jzuvPNOXnvtNYYOHdrhPCsUihTTEVdep8vKc37PxEk3MwL9SNClxhkGQ6ORMtZxWUovwdCpKUk/nesZAHz88cd88MEHkWkpFApFFyHsytvfRJ4YRPoSy0dz4UeCrGkZFOTdjNvV8noGtgtCMC4shNu1jpLCS5rdK2QOp7a+c9cz8Pv93HTTTSxYsIBLL7201bwoFIrspaWR5p1F2sRA1/VKoAbbRBMyDCM1r+ct4sWyeqFpexBCIqXAsnqSjmGOqV7P4I477uDiiy+mf//+Kc+rQqHILNo10jzNpLtlcJZhGCkZQpnMmzuAJnbTo2Q84Ad8HKj+F5bslYosxJDK9QxWr17NmjVruOmmm1KdTYVCkaEckZHmbaBL2QwALNmbBv9lSClo8F+WFiFI9XoGb7/9Ntu2bWPcuHGcdtppfPnll3z/+9/njTfeSHneFQqFIhFCyvQYLHRd3w4cwLajP2AYxoMJ4lwLXAtgGMboQNzylJ9//nmzC8C0hCZ2U1TwE6pr70u5GMydO5fVq1e3uDhNczzxxBMsXLgQwzBiPJDiOe2003jsscea9SbasWMHmzdv5nvf+16bjp/tuN1uQqFQZ2fjiNHdyguqzG3F6/VCisZJp1MM+hiGsUvX9V7AUmCGYRhvtrCL3LVrV0xATU0NhYWFaclfe9i8eTNnn302gwYNIifHni2rLesZDB06lH79+kXKFF7PIJ7WxGD79u1s3ryZ8847rwOlyT4ypTl9pOhu5QVV5rbSp08fSJEYpM1mYBjGLud7j67rLwJjgZbEIONJ13oG8bzzzjvtOoZCoVC0l7TYDHRdz9d1vTD8G5gCrEvHsRQKhULRcdLVMugNvKjrevgYTxmG8a80HUuhUCgUHSQtYmAYxqfAielIW6FQKBSpp8u5lioUCoWi7SgxUCgUCoUSA4VCoVB0UTHYo+3muz0uZq+2J6XpVlVVccUVV3D66aczefJkrrnmGvbv3x8TZ86cOQwYMIB58+bFhFuWxbRp0yL7Tp06lcrKysj20047jTPOOINzzz2Xc889l9dffz2leVcoFIqW6JJicG/BXbznfYf5BXelNN10T2H94IMPsnTpUpYuXcqZZ56Z0rwrFApFS2TNFNa3Ft3MRk9rU1hDgABrvB8gheSp/EVs8KzD28qspcOCw/ltdedOYa1QAEy5uJzRJwa4/rpayss7OzeK7kTWiEGy7HRFj/KV7HR9wUBzUMqPk+oprAF+9rOfATBmzBhmz55NcXFxinOtyHTWb/KyZZuHxX/P58rvWvz4Ko3ePa3Wd1QoOkjWiEEyb+57tN2c2Xs8UtjzLUkhqXYd4u59f6WnldoJ61I5hTXACy+8QN++ffH7/fzud7/jN7/5DfPnz09llhVZQsBZK/fhxzUeWdSbk0cFuPD8Bgb2NyktsSgrtT/5eRLReaskKroYWSMGyXBvwV1YcauRmljML7iLW6rnNLNX2wlPYf3oo4+2ewrrxYsXx8x6Gl7q0ufzceWVV7ZLZBTZxd59Gus2eli3yWN/b/TEbLcsu6ZfvcbH6jW+Jvt73DIiDCXFjSJRGv27xKKspPF3Qb4SEEViupQYfOB9n6CInQY7KAJ84F2dsmPMnTuXtWvXsmjRIny+pg9oSzzxxBM88cQTGIZBaWlpJLy+vp5QKERRURFSSl566SVGjBiRsjwrOhcp4fOdrkiFv26Th/UbPXy1xxWJM+CYECOGBqn8rPGR9HolmoALz69n6kX1aBpUHdA4cND+VB3QqIr6vXmrO7ItLCTxhAWktMSKtDLiBaOs1P4f/q0EpHvQpcTgn/uWpDX9zZs3M3/+fAYNGsQFF1wAtG0K69mzZ9OvXz+mTp0KNE5hvXfvXqZNm4ZlWZimyXHHHRfjpaTIHkIh2Fbpbqz4N3pYv8nDoWq7BelySQYPDDFxnJ+RQ4OMHBZkxNAgxUV2i7bviFy8Homm4dgM9tKrjTYDy4JD1YIDhxzBiBOQAwdtEak6oPHJVndEUFoSkGjhSCQY8S0SJSDZR5cSg3STrimsBwwYwJIl6RUyReo57IdNn3hiuno2fuLm8GG74s/xSYYeF+R/zmtg5LAgI4cGGXp8kNyc5tMcMTTAqScFuP7HtQwfVsa+fW03HmsalJZISktMBg0wk9rHsqC6RkREIlo44lshW7bZAnLwkIZptk1AmrRCon4XFqRnbRVFcigxUCiSoLpGsH5T7Nv+J9vckcqwqNBixNAgV+j1dsU/LMjggSHcbXzCljzfOQu7aBqUFEtKitsnIBHBOKBx4JDGgajfVQdsATlwyI7XnIC43ZLyMigu6tlyKyTqd2GBaoGkCiUGCkUcYcPux+GKf6OHys8bH5Ve5SYjhwWZfOZhRjkVf/9+ZrerlKIFhDYKSHRXVfj3gQMa9Yfz+PKrEFUHNLZ+2tiF1ZKAlMYZz0tbMaQrAUmMEgNFtyXasPvxhkbD7u69TQ27+nfqGTXc7uppax++opFoARmYQEDKy33s23cgJsyyoKZWNDGYh38fiPq9dbubqg+TF5CwSJS2YkgvKuz6AqLEQNEtiDbsfrzBfuPfsDnWsHvcoBCTxtuG3VHDbcNuUaHqx+5sNA2KiyTFRYkFJBFSOl1YUV1V8a2Q8O+t290c+ND+3ZqAxItEaQuG9JYEJBNHmisxUHQ5og27HzvdPBs/8XDYbz+ZOT7JsOMbDbujhgUZclzLhl1FdiFElIDQNgFp0oWVoBWyrdIdCW9JQEqKmxrMS0st1m/ysmmLh6dfyOdbUyx++/86f6S5EgNFVhNt2A1X/Fs+TWDYvazOrviHBzm2ou2GXUXXJ1pAKvonLyAJu7CiWiQHnN+fVrpZ7fwGME2BacKLL2u8sqw3l327juuvq+00UVCPhCJr+GoPvLnCF+PDvyPKsNu7p8mIYUGmnHU4UvEf07f7GXYVRw4hoKhQUlTYNgHpN7JPdCr4/bDIyOeTbR6ef2x/s/umky4tBrs1wV0FXt73ulmyr67D6VVVVTFz5kwqKyvx+XxUVFRw++2306NHj0icOXPm8MADDzBz5kxuuOGGSLhlWUyfPp1Nmzbh8/koLy9n7ty5VFRUAHD48GF+//vf89Zbb5GTk8Po0aP505/+1OE8ZyNSwmdfxI7YXbfBw559LsA+1xXHhBg5LMjUi+ojPvzKsKvIBuJfTsIjzS/7Th3X/7i2czJFFxWDsAgszvcigUCKXg3D6xmEp7G+9dZbmTNnDnfeeScQu57BjBkz8Pl8MbOZXnrppUyePBlN01i4cCGzZs3CMAwA/vjHP+Lz+Vi+fDlCCPbu3ZuSPGc6oRBs3d50xG51TaNh9/hjQ5wxwc9pp3oZ2P+gMuwqugQdHWmearJGDG4u8rHB42oxTgD4wqWxx2VX/jJKBC7pkdfsfsODJrdU+1vNQ7rWM6irq+O5555j9erVCCfPPXv2bDU/2UbDYcewu6nRf7+JYXdIkAvPb4hM1RBt2C0vL2ffvkALR1AosoNUjDRPNVkjBsnwiUejRoim7bA0kMr1DCorKyktLWXevHmsXLmS/Px8Zs2axdixY9NXgDRzqDpqxO6mpobd4iLbsPuDqXWREbvKsKvoLnTWSPOWyJpHL5k39z1R3UMWsd1Dz+2vT2l+UrmegWma7Nixg5EjR/Lb3/6WDz74gKuuuooVK1ZQWFiY0nyng917tSbdPPGG3ZHDg3z97MORil8ZdhWKzCJrxCAZelmSOdV+rq8NNCsKqSDV6xn069cPt9vNt7/9bcDufiorK+PTTz/lxBNPTGneO0LYsBs9WnfdxrBh16bimBCjhgf57sX1ka6enuWd3wRWKBQt06XEIEy8KKz2pq6Y6SMDLmEAACAASURBVFjPoKysjAkTJvDmm2/yta99jW3btrFv376Ip1FnEDbshkfrrt+U2LD7tYn+yNv+8CHKsKtQZCtCyox5eOWuXbtiAmpqajKqm2Tz5s2cffbZDBo0iJwc26rZlvUMhg4dSr9+/SJlCq9nALBjxw5uvPFGDhw4gNvt5le/+hVnn312wrS2b9/O5s2bOe+881JSrrBhN3pitk1bogy7ORbDjg9FJmULG3Zz2qaFHcY2IGdeX2u66G7lBVXmttKnTx+AlHR9dMmWQbpI13oGYK9p8Nxzz7U3a0kTNuxGd/Vs3Z7YsBuemG2QMuwqFF2etD7iuq67gNXATsMwvpXOYymaEjbsfrzB6d/f5OGzLxov+VG97BG7551z2K74hwXp10cZdhWK7ki63/dmAhuBojQfp1thxo16lxJ2fO6KmZ9n3UYPe/dHGXb7hzhhRJDvXVIf6e4p76EMuwqFwiZtYqDrej/gm8AfgRtaia5oA1/udvGkofHaymI+3eFm/SYPNbW2YdfttqdiPnOSPzI/z/AhQbWkYJaS6ilVFIrmSGfL4C5gFtCsBVjX9WuBawEMw6A8bmLvhoaGNGYve5ESKj8X/OedPHqVw4XnW0wYG+LkEyTDh0hs27bb+eR2bmZTiNvtbnKPdFU+x+InHpMnexc67tF0i7J3p2scJlPKnBYx0HX9W8AewzDe13X9zObiGYbxIPCg81fGW9RDoVA6steFEOzdL9n6aYjbf2fPdFhba3+6ItnqaWIB1QKqNI0qTVClCQ443zEfl2CPJvjKpRGAJiPpd+zfR34Xb+Bl6zXuCCnwJkoJ6WoZTAQu0HX9fCAHKNJ1/QnDMC5P0/G6HS6XJMcnO32mw+6GBOoFVGmC/ZqIqeCbq+gPaAKzGau8V0rKLEmpZX/XCZFQCACGH1XIKQGTSQGTSf4QJwdMvGktraI7kRYxMAzj18CvAZyWwS+PhBBELyWXjgUi0jWF9eeff84Pf/jDSNzq6mpqa2tZv359wnwIASeNlBi/2t3pMx1mO4chcUXualrJh7f7m6nYXbKxUi+zJINDVsz/xo8V+Z0vY53EY6dUEQSiNv64NsBbPjd/KfAyr9BHniU5zRGGSf4Qw0MWbRsPr1A00qW8x9dv8rJlm4fFf89Py6pB6ZrC+phjjmHp0qWReDfffDNmvMtQFEf3NjlvslRCEEcQOBhVce9P+MYeW8nXa8370ZZEVdz9QhYnxFXqpXEVfJGkw5Vx9Oj5+3sW85hmRqZU+XWNn1/X+DkoYJXPzXKfm+VeF7cW2wMgy0yLCY44nO4PMcCUqRmNpOgWZM0I5JtvK2LDZk+LCax6r3FIrBB2uXqVm/TrY+JtoT09fEiQW35d3eYMv/zyyzz++OMsXryYBQsWsG7dOu6++268Xi+HDx9mxowZjB07lmnTpjXZd+3atVx33XWsWLEiJjwQCDB69GieeuopRo0alfC4qR6BnIlYwEHhVOIu+ztQVMhn9XXNvLFrHGqhYi+Iq7gTv7E3fkos2elvSuXl5Wyo2h+ZUqU5b6IvNWELg8/Fcp+br1y2JPULWXarIRBikt+kp5Uxz3qzKJtB21AjkJNASvv87N7ror5BY+SwYErTT+UU1tEsWbKEo446qlkhyEYkUCuiu2O0yJt7QkOqJjioCawm3TEmFOWQIyVlZmPF3T9kUWaFErytN77BH+GZM1JGuKUAzc/ae7QlubQhyKUNQSSwza2x3GsLw6u5Hp7Jt9+EhgZNJvptYRgfCKGmkVJEkzVikMybe98RjZb18CpCYQNrqrtUUjmFdTSLFy9m6tSpqchi2mgQJKjAWzakBpvpZ3fL2Ep7SMiiuf71Y4tLEfv3kytT9CrUBRHA4JDF4JDFVfVBTGCdR+Mtp1vpyXwvjxQIXFJyUtBkkt/uVhodMLNWMBWpIWvEIFnSLQKQ+imsw3z11VesWrWKu+++O6m0UjEgKQDNvp03V8kfbqY7RsQZUAeELE5O2L/eWMkXtqFiL0ewT73NtgkXcGLQ4sRggJ/VBjgMvO+0Gt7yuZhf4OXuQh85YWN0wDZGjwhatLyuoKKr0aXEIHopuXQZV9MxhXUYwzA455xzKCsrazEdE3hVWPykd0HMGs8msQbU5j4Hoir4mhb62YsiFblFb8tiWKiFfnZTUiylqkAynBxgYsBkYsDkVzX2+Ie3vba94S2fmz8W2cboEksywfFSmuQ3GWRaqjXWxelSYpDupeQ2b97M/PnzGTRoEBdccAHQtimsZ8+eTb9+/SLdQNFTWIMtBrfeemuraX3p0vhQyBgXxxFHFXBIiJh1n6PJi6u8B4bMFo2oJZZUPuzdgCIJU/whpvhDgJ/dmmCFY4h+y+vmlVzbaeNo04oIwyR/iKOywBitaBtdSgzSTTqnsAZYvnx5UmlJQMbV+Rc2hJr0sYeNrKWW7EKTUijSSW9LclFDiIsaQkhgu0uLeCktzXHzbJ79inBc2N4QCDHeH6JYaUPWo8QgCxGAK+7hm3PocKfkRdF1EcAg02JQvcUP6oNYwHqPxnKvbYx+Js/DwgIvmpScGLQcT6UQYwImOZ2deUWbUWKQhRxtWpwkBZ9LmZY1nhWKRGjAqKDFqGCA6+oC+IEPvS7HU8nFfQVe7i304ZOSMVEjo09QxuisQIlBFuICzpMaN+2uTfkazwpFsviAcQGTcQGT/1djjyUJG6OX+9zMdYzRRdHG6IDJ4JAyRmciqhbJYpIZkKRQHCkKJEz2h5jsGKP3aYIVETdWN/9yjNFHmZYjDnbroa8yRmcESgwUCkVaKLckFx4OceFheyr6Ha7wtBlu3vC5ecExRg8KNQ5++x+UMHQWSgwUCsURYYApGVAf5PuOMXqjW4vMqfRcrofH870IGWRUeX5kTqWxAZNcpQ9HBCUGCoXiiKMBI0IWI0IBptfZI+HXeF18UFLEEhnioQIvfxU+vFIyOmKMNjkpaKpKK020ecZdXdcz2vYjVnjxDDoab98+eAYdjViRuqFTVVVVXHHFFZx++ulMnjyZa665hv3798fEmTNnDgMGDGDevHkx4ZZlMW3atMi+U6dOpbKyMrJ96dKlTJkyhXPPPZfJkyfzyiuvpCzfCkWm4wXGBkx+Y7l4YX8967+qYdH+eq6uC1CtCf5clMOFPfMZcVQhV5bl8nC+l01uTXUqpZD2TL8+r/UonYNY4cV9ZRnCb+uV8Av7f4oEIbyewVtvvcWyZcsYMGAAc+bMiWyPXs9g+fLlLFiwIGb/Sy+9lDfeeINly5bx9a9/nVmzZgEgpWTmzJncc889LF26lHvuuYfrr78ey1LrFSi6J/kSzvaHuLnaz5K9dXz8VQ33V9VzYUOQrW4XvyvO4ZxeBZzcu4CfluTydJ6HL1wZ/Z6a8bS5xWUYxi/SkZHWcN1chNjQ8noG4m0vIm5ormjQcF/WAzku0Ox+cngQ85bWZ0UtLS2NLGwDcMopp/D4448DsGDBArZt28aiRYvwer089dRTzJgxg4ceeohp06ahaVpkumuA0aNH8/DDDzfmUwhqamoAe6WzXr16tXkSPIWiq1JmSf7ncIj/cYzRX7gEy722l9IKn4u/59l1Q0WocfDbpIBJmfJUSppWxUDX9UcMw/iR81sADxmGcU3ac9YO4oUgOjzVt0Qq1zMQQnD//fdz9dVXk5eXR11dHY899liKc6xQdB36mZKpDUGmOms4bI4yRr+U6+FJZw2HEcFGe8NpgRD5ShuaJZmWwaDwD8MwpK7rx6YxP82SzJu7Z9DRkS6iaKRPEnpuf4I92k8q1zMIhULce++9LFy4kDFjxvDee+9x3XXX8frrr5Ofn5/SfCsUXQ0BDA1ZDA0FuKYOQsAajysy+G1hvpcHCgQeKTklbIwOmJwcMGm5r6F7kYwY7NN1/RpgJTAeSG2tmkJCi/bbNoKGxu4VmWsReqwqpcdJ9XoG69evZ/fu3YwZMwaAMWPGkJeXx5YtWzjppJOapGFi8pS2iNHaKfS0enW8QApFF8INnBo0OTVocn1tgAYB73pdkTmV5hX6uFMI8izJuEDj4LdhIavDa1hnM8mU/UqgAPgpkA/8IK056gByYoDQY1VIn90WlD5p/5/YvL2grYTXM/jb3/7W7vUMnn766Zj1DI4++mi+/PJLtm7dCsCWLVvYu3cvAwYMSJhOjVbNF3zO/IK72l8QhaKbkCvha36T/63x8+o+2xj9YFU9lzQEqXRr3FKcw5ReBZzYu4Afl+byZJ6HHd3QGJ1My8APfIW9dsp9wMnAe+nMVEeQEwMEP/0yLWmnaz2DXr16cdtttzF9+nSEM+ncvHnzEi6AA1An6pBCsjj/SfqYfehj9aXQKqRQFlFoFVIgCym0CsmXBbjUFGEKRQylEr55OMQ3HWP0Tk2wwln5bYXPzT+caTOOCVmRwW8T/SY9u7gxOhkxeAJ4A/iuYRjzdV2/DZic3mxlJulcz+Ciiy7ioosuSiot6ZjDQyLEn4tva/m4VkFEHOzvIgplIQVWIYVOeLyIhP8XyiIKrAK8apkbRRemryXRG4LojjF6q1tjuTMb68u5Hp52jNHDgqbjqWQyPhCioItpQzJi0NMwjPt1XdfTnhtFm/FJH4/tfxqv9FKj1VAjqiPftVpt1P8aarRqDmoH+Fzb4fyvwS9aXwfBJ3MccbDFpEAWREQlWjSiRSReXHJkDkLNVanIcARwXMjiuJDF1fVBQsDHHi0yp9KifC8PFwhcUnJysHFOpVMCJm3rNM48khGDPbquXwbk6rr+HSA9fTCKdmEh+UfOS9xSPaf1yAkIEIgTDfu71hGLaDGpjRKVve491GjV1IpaarXaVo/jlu5GgQi3UKxCW1hkkdNSaaaF4ghRGS2vDa1QpBo3cHLQ4uRggBm1ARqA1c5MrCt8bu4p8HJXoY8cSzIu4qkUYkQw+4zRyYjBD4FrgA+AfsC0tOZI0SaCIsAH3tXt3t+LlzKrzK5ozfalYWJSJ2qbaZHEi4stJjWihp3uLyItlBpRjSVaHnEtpCD/qILG1ki4hdJsq6Vpl1iBLMSjHAoV7SQXOD1gcnrAhBo/hwSs8rkdTyUXfyi213AosSwm+E1Od7qVBpqZv4ZDMmIw1DCMe3Vd7wVcBVQAm9KZKUXL9DOP4Vfm/3LervM6OysAuHBRJIspMovbnYZEUi/qE7RIGsXFLDDZ3bA70iKp0arZr+2j0v2p03KpJSBaX9shx8pJ0KXVsojEt1p8pLfba4+2myvclzFPm6/chzOYYgnnHQ5xnmOM/koTkcFvy31uXnGM0X1CFpMCIU7328bo3nHG6N2a4K4CL+973SzZV3fEywHJicGdwDnALdiG5IXY4w0UipQhEOTLfPJlPr2toxLGKc8tZ1/1vhbT8eNvsbsruoVS67RQarRqvhJfRuLVa/Wt5tcjPY44FFEYMdIXtcmukifz0JrpTLi34C7eFiuZX3BXu7sAFUeeoyzJJQ1BLnGM0Z+6tIgwLMnxYDhrOBzvjIweGTRZ6wrydO8CJJ27hG0yYpCn67oP8BmG8bSu6z9Od6YUivbiw4fP8lFOebu7vUKEqHVaGsl0d4VtKZ+7P4uJJ0XL7iZCiib2kQJZgMfy8p/cpVjCwsh/mmvqptPfTDzmRJG5COBY0+LYeosr64OYwHrHGL3M6+axfC+mECAlZMA65sm6lr4E/E7X9Rxge2s7OPHexF4m1Q08ZxjG7zqS0WTofdIUXHubDpA2e/Zg95olHU6/qqqKmTNnUllZic/no6Kigttvv50ePXpE4syZM4cHHniAmTNncsMNN0TCLcti+vTpbNq0CZ/PR3l5OXPnzqWiogKAZcuW8ec//5lQKERJSQl/+ctf6N+/f4fzrGg7btyUyFJKzMTjPJJBIqkTda16d4W7uyKGeW0PO7yVmI6SBUWAs3tNZEzgNCb4JzE+MIkTAycpu0cW4gJOCFqcEAzwWg83EQtZ5+sAAELK1DvLOhPa5RuGUavrugdYDsw0DOPtFnaTu3btigmoqamhsLAw6eP26Tu62W27dr6fdDrNceDAATZu3BiZufTWW2/l4MGD3HnnnYA9hfV7773HvHnzmDFjBuecc05kAjvLsli2bBmTJ09G0zQWLlzIq6++imEYHDx4kEmTJvHSSy9x7LHH8vzzz/PCCy/w5JNPJszH9u3b2bx5M+edlxk2gyNFeXk5+/a13E2U7ezRdnNm7/H4o2wfLuni2OBxbPFsRgpJnpXHmMA4JvgnMsE/iaGh4c12N2Ub3eEaA+xxbASL871YCAJRgrBzV+vzsIXp06cPpEhO0rJokGEYEgj7G3qcT4dUp+jmO/Bs+KTd+/e45NpmtwWHH0/1Lb9sNY10TWFdWVlJz549OfZYew7As88+m5///OdUVVVRVqbcKbsT9xbchRX3qGi4GBM4jaf2P8s7vlWs9C1nlXcFtxX/B4BSs5RxgQl2y8E/iQpzoBrTkeH0siRzqv1cXxvg/p7FPKaZWGSozUDX9VLDMA60N2Fd113A+8BgYIFhGO8kiHMtcC2AYRiUl5fHbG9oaGjv4dNOKqewHjRoEHv27GHNmjWcdNJJvPjiiwDs3LkzoRhomoamaU3OV1fH7XZ3+TKvdX9EUMTOpRUUAdbmreE47/Ecx/FczpUgYWfgC97UXucN8V/eyPkvr+a+DEA/eQxfs87ia/IszrDO5Gj6dEZR2kV3uMbRlAN/FW5uCgaZ4zJ5W1idVv5mu4l0Xb8fKAW2AEuAlYZhhNp6AF3XS4AXgRmGYaxrIWrGdxNFc9NNN/HVV1/x8MMPt3nm0vvuu4+XX36ZZ599NjJz6Ztvvsm8efPw+/2cddZZPProozz//PMMGzasyf6qm6h70JbySiTbXdtY5VvBSt9y3vat5KB2EIBjg4OZELBbDeP84ymWJenMdofobtcYOlbmVHYTtWoz0HX9OOBcYIJz0HeAFwzDaH6inaZp/A6oMwzjjhaiZY0Y3HLLLWzcuJFHH320zTOXLly4kKeffprFixc3OxHd3r17Oe2001i3bh15eXlNtisx6B50pLwWFhs861npXc5K33JWe9+hQWtAkxojgqMY75/IhMAkTg2MJVfmpjjn7ae7XWPIHDFo1WZgGMYW7NbBX52un9OAo4FmxUDX9Z5A0DCMg7qu52JPbHd7KjLcEmbPHs16E6WK8BTWixYtavcU1oZhNBGCPXv20KtXLyzLYu7cuVx++eUJhUChSAYNjZHBUYwMjuLauusIEOAj74es9C1npXc5fyt4kAfFX/FKLycHRtvi4J/ECUHlqdRdSZc30QnAY9jeVBpgGIZxSyu7dbhlkG42b97M2WefzaBBg8jJsYedt2UK66FDh9KvX79ImcJTWAP88pe/5L333iMYDHLGGWfw+9//PnKMeFTLoHuQzvLWiTpWe991jNHL2eBZjxSSfCs/4sY6wT+JIaFhR9RTqbtdY8iclkFaxKCdZLwYZApKDLoHR7K8B0QVb/tWRmwO292fAlBmljEuMDHSchhgVqTVU6m7XWPIHDFotZtI1/XzDMP4l67rg4FfAIsNw3gzFQdXKBSZQaks4xuHv8U3Dn8LgF3azogwrPKt4JXcfwDQJ9SXCYFJjhvrRHpZvTsz24oUksw4g18C/wJuAh4C7gHGpDNTCoWic+lj9eXiBp2LG/SIp9JK33JW+lawNOffPJe3GIDBweMY75/EhIDtqVQk2z9ZoaJzSUYMCnVd7w+YhmGs0nW9c6bUU0TIoK49RTdAIBhkDmZQ/WAur78KE5MNnvWs8toth+fynmFRwUI0qTEyeELEU2l0YExGeSopWiYZMbgN+APwB2fOoZamlEgpUkqklJF1gRX2OQkEAq1HVCjShAsXo4InMCp4AtfWXYcff4yn0iMFD/CAWBDxVAp3KSlPpcwmGTEYCMw2DCNs3Z2dxvzEkJOTQ319Pfn5+UfqkBmNlJKdO3eybds23O60zCSiULQZHz7GBsYxNjCO6/kldaKO97zvOPaG5fyl6M/8hT9TYBXEeCodHxraZeZU6gokU6N8CvxJ1/Vi4B/YM5BWpTdbNl6vl0AgwLZt21oc5atpGgHLYr8mCAiBG0mpJclppjfFxKRWq6FO1GFhkSNzKZSF+GTmrmIabhFs27aN+vp6NaOpImPJl/mc6T+bM/1nA1ClVfGOd6Vjc1jOf3NeA6DM7MH4KE8lNU1355K0a6mu62XA/cB5wL+Buw3DWJ7CvDRxLQ1z8OBBVqxYkbB7pE4I/pvv5RPROBPez2r9+BIU64Co4n3ve2zybkQiOT44hNGBMZRbPVNYjPSR61uEy7WH4uIAXz93Fx6PXchgaAQHqjs+RXem093cDrtqeXdpOyNeSqt8K9jt+gqAvqF+nCUmM7r6VMb7J3abFd6yybX0G8BU7HmK/oE9sZzAXuPgjFRkojVKSkr45je/GRO2WxPcUehlcZ6zQEQUU+OmgF3rWcP9BQtYkvMqXnzo9VP5Ue10jjGz6+26IO9dcn0fIqImMpPSSzB0aifmSqFoG32svlzScBmXNFyGRPKpe1tk2ox/5PydJ0ofBeC44PERT6XT/OOUp1KaSaabaBRwk2EYO6MDdV2flp4sJcdPSnN5x+tCNmNclkhW+N7i/oJ7WeVbQZFVzHW1M7iy7keUW9k2K6Ifj3sN0soD4ucKtAgEJyJENVIWdUbmFIp2IxAcGxrMsaHBXFF/FaXlpbx56I3IyGgj7ykeL/hbxFPJtjdMZHRgDDkoT6VUksxEdQOAXwMFwJXA1YZhPJyGvDTbTZSIlhaHeKjqSe4vWMB678f0Mnvzw9prmVr/fQpltoxmtit/j3slXs8qPO73EeIwUgosWYImDiGE1WS1vJA5kFDoBILmCYRCowiZo7qUQHTVbpPm6G7lhaZl9uNnjfcDewCcdzkfeT8kJEJ4pY9THE+lCf5JjAqeiDs9y7OknUzpJkpGDJYBPwP+ahjG2bquv2YYxjmpOHgcbRKDMBvc+7iq53r28U1CWJjCTQ6FVIQGcm3tT/h2/cX4yFzDsE3zlX/IHE4wNJ5AcALB0FgEAXqUjEcIP1LmsP/QK7hdu3C71uJ2f4zHtRaXq7ER15UEortVjt2tvNB6mWtFLe9532GVY4ze6NkAQIFVwNjAuBhPpWxZ4CdTxCAZKXUZhrFJ1/Xw/4zyBXs8/zaqWIxP9iek/Zxc60zuPfgg5x4+Dxeuzs5eM7Rc+Tf4L49U/lLGzm4qgQb/ZeTmPEGD/zIsawgBawiB4FmROELsx+Ne2ygQ7tXk+F6KbO9KAqHoXhTIAs7yn8NZfvt9tEqr4m1n8NtK33L+k7MMaPRUCouD8lRqnWRaBr8F+gJnYnsR7TYMY04a8tLmlsGnrm2c2+sMEKBJjbuqFnC+/38y8I0g+Tf/+Mo/EZrYTY/Sn7P/wHwsmZzHRROBaLEFcQIhc2TGCUR3e1PubuWFjpd5l2snK73LnZbDCva4dgPQL3SMPfgtMInx/gkZ5amUNS0DwzBu1XV9JPAasBn4LBUHTgWP5j+MCxcmJi7cvON7m2/6L+jsbNGRN/9ksGRvpPYalkz+BpKyB4HgWa20IN5roQWRmQKhUETTx4z1VNrm3uqIwwr+lfsKRv7TABwXHBJpNZwWGEehuq9bXPbyWeD7hmEEosKGAIsMwxibhry00YC8mzN7j8cv/JGwHJnD67tXdYLqp/bNPxnS9dbYtAXxES5X43VpIhChUUiOjGG+u70pd7fyQnrLbGKy3rPObjV4l7Pa+y6HtcNoUmNU8EQm+Ccy3j+J0YFTj6inUja0DJ4BXtF1/RJnxbKvA7cCP0jFgTvKvQV3YRErZCYW8wvu4pbqdPRiRdNS5T+CBv8VUZV/5q43m4jWWhAe99pmWhAnEjRHHXGBUCiSxYWLE4InckLwRKbz04inUtje8GDBfdxXeC9e6WN04NTInErZ7KnUFlq0Gei6PgH4M7atYAIw1TCMg2nKS5taBt8qn8JG7/om4cMCI/jnvlSPxm258o998z8ylX9nvzXGC4TbtTZBCyK1AtHZZT7SdLfyQueW2fZUejuyjkOjp1IhpwXG2QPg/JM4PjQkpXbJTGkZtNRNdCu288pQYApwHxAEMAzj5lQcPI52uZZCOm6gzKv848nEikKIfXjcH+N2feR8xwvEIKeLqX0CkYllTifdrbyQWWXer+2P8lRawWfuSgB6mOWRabon+Cd1eCaDTBGDlto+y5zv14AFqThY5uLH4/4Qj3sVXs9KPO4Puky3z5FEyvIEXUz7oloQH+Nxv0uO7++R7R0VCIUiXfSwevDNwxfwzcO2U8pO1xeRNRxW+pbzzzy7q/SYUP9Iq2F8YELWzHUWT0avgZwsbVfWliv/THjzb41MeoNqK/ECkWwLIpvL3B66W3khe8oskWx1b4mIw9u+ldRo9pxoxweHMsGZiXVsYHyrMx9kSsugm4hB9lf+8WTLQ5MsyQiEyzWG2vrju00Loqtd42TI1jLbnkofO3MqreA937v4xWFc0jZaj3PEYXTgVHzkRPbbo+3mxl4/Z96e+e3yglRiAJQWTcHjbmpAtqdz/keXq/zjydaHpi3EC4TXuw7BF5HtsS2IEwmFRnYpgegO1zierlJmP34+9L4f6VJa61mDKUx8MifiqTTBP4lnc59hcf5TfLfuinZ5QSoxAAryfk2u75m46ZxdWFY5mnYoqvIfSTA4nkBofFZX/vF0lYemLZSXl7N//6a4FsRHuFxfRuI0CkR4HET2CkR3vcZdscw1oobVzupvK33L2eTZaG+QgACfzOGNdoyRUmKAMyWDM2FbJAEJIXMoweAZXa7yj6erPjQt0VyZY1sQYTfX7BcIdY27Lvu0fdxQ8jNW+VZgCQuP9KLXfbfNrYMjPVFdRmLJ3vaEbb5FCCGR0sVh/6XU1N/Z2VlTHGFsL6azCQTPjoTFC4TH/U4CQInjSgAAFOtJREFUL6bocRDZIRCKroGFyWrfu1jCAiAoAjyfv5gZtdd32rxJWSsGAPUN15PrWwz4AQ91Db/q7CwpMoSWBeIjx831bXJ8L0a2K4FQHCk6dwaFxGS1GERaB+HpnJOcwVPRPUksEHvjxkE0CoSUAtOKd3NVAqHoOB943ycoYtd0D4oAH3hXd1KOslwMwGkd5HxKfcP1nZ0VRRYiZU8CwXMIBBvXa1ICoUg30VPmZIqdJC1ioOv6McDjwFGABTxoGMbd6ThWe6ZzVihaQgmEojuSrpZBCLjRMIwPdF0vBN7XdX2pYRgb0nQ8hSKtJCUQnlUtCER4HERBZxVBoWiRtIiBYRhfAl86v2t0Xd+IvVqaEgNFlyEdAhEzmNKCXmX2T3swZapn41UoGkn7OANd1yuAN4GRhmFUx227FrgWwDCM0YFAoGkCSeB2uwmFQh3MaXahypxFyN3AB8CHCPm+/Y295KhEAMcBpyDFKQj5FvBvBFGDKfECVyO1e4541o80WXuNO0BHyuz1eiEbBp3pul4AvAH80TCMF1qJnkFTWGc+qszZTWwLYi1u91pc2leAPXhSRD3eUno4WPsQodApSFlGip79jKQrXeNkyZSJ6tLmTaTrugd4HngyCSFQKLoVLXUx5eXcice91hlMCUIEKS28CgDLKsS0BmCaFZhWRcy3JXsDWucUSJH1pMubSACPABsNw5iXjmMoFF2NsECEQiPpUTIeezBlDlWHnkLTqnFpO3C5KnFplbjd6/Fp/0KIUNT+OZjmAEcc7O+QORDTqsCy+tAFPMkVaSRdd8dE4ArgY13X1zhhNxmG8UqajqdQdBniB1OGzNPATBQzhKbtxKXtwO3ajkurxOXagUurxOt5AyEOR2JK6ca0jsE0ByZoWRwD+I5U8RQZSrq8iZZzBDo2c194gcK5c3Ht2kWvPn2omT2bhosuSvdhO5XuWObuSHKDKd1Y1gAsawDB0Blx2yw08VVEHMItCperEo/nXTRRG4kppcCy+ka1KAYSMisiogF5aSihItPI2nZj7gsvUDxrFlpDAwDunTspnjULoMtWjt2xzN2Vjg+m1LBkH6xQH4KMj9smEaIKl2s77qjWhMu1HZ/3FTTtQExs0+od1f0Ua6uQsrid+VNkGlk7hXWvsWNx79zZNBFNw+rVC6lptktG/LcQ9rbocABNa7qP81smCIuk01L8qLTbk07MNiHIe+YZtNraJmUO9e3LnnffbddJzya6m6dJZ5VXiEOOfWJ7jJ3CFo2vYuJaVgmmNTBKJAYQsiowzYFI2YO2dhB0t2sM3cCbKN24mhMOy+LwWWchLAssy/bTC39LaYfHhYV/R/Zx0okJC8eVEoJBhGUhWkonLgzLig0PbwunExcWSSecF8tCJBACANfOnZT+6EeEhg0j6HzMAQPA5UrDmVd0daQsJmSeQMg8IcHW+qiWxA7c2na768m9Gp/2EsKZkhnAkvkJvZ5MawCWdTTK8ymzyFoxMPv0SdgyMPv25dAdd3RCjtJPs62h3FzcW7eSs2RJRECsnBxCQ4cSHDasUSSGDkWWlR3pbCu6FHmY5jBMcxgE47f5cWmfN7FTuF0b8XmWIETjDlL6GruewgZtcyDIk4B8srhqylqy9ozXzJ4d038OYOXmUjN7difmKr00V+ZDf/qTbTNoaMCzZQvujRvxbNiAZ9Mmcv79b1xPPx2Jbx51VKT1EBaJ0LHHgj2SUaHoAD5MazCmNTjBthCatisiEu4Yz6c3Gz2fJPQsdWNa/Zq0JmxPqGMgakF5RerIWjEIG0zDnjVmN/CsabXMubkETziB4AknEJELKdH27sWzcWOMSPhWrEA4039Ij4fQ4MFNWhHWUUfFDoVVKNqNG8vqj2X1T+D5JNHEblyuSoqL99HQ8HHEVuFxv4+m1TTGlALL6hMlDhUxxm1J/pEtVhciaw3I0SijUzsIBnF/+mmMSLg3bcIddQ2skhJbGIYPb2xFDBmCzM1NQQnaTne7zt2tvJCozBIhDuBybBORMRXOb03bH7O/afWMMWY3GrcHIGXpkS1MkigDsqJz8XgIDRlCaMgQ+Pa3I8Hi4EE8mzbZAuGIRN5TT0W6pqQQmBUVTUTCPOaYRu8phSJlCKQsI2SWETJHN90qqhN6Pnk9b+HyPRsT1/Z8iu52qnDEYgCW7ElXnvMpGZQYKGKQJSUExo0jMG5cY6Bl4frss8ZWhCMSOa++antCAVZeXsRgHRGJoUORxcoPXZE+pCwiZI4iZI5KsLXBsUuExaISt2sHHvcH+LT/i/N8ymvG86mi23g+KTFQtI6mYVZUYFZUwDe+EQkW9fW4N2+OEYncl18m/8knI3FCffo02iEckQgNGgRudesp0k0upjkU0xyawPMp0Izn02Z8nqVNPZ+s/lG2iegpPfoBniNZqLTx/9u799g677uO4+9z8fElpElde44dp7k0CYnVpEmVLINA15ZSBajoFMQX2pVp7I8IMdAQQyGwVaF/IFX8gShiUKo1u7bbvoRsaFNUtmqk7dJBQ8lGy1xG20yqL0m6OclM5trnxh/Pc47PsY9DYuc5j8/x5yUd+fg5z+X3kxN//bt9f/ofKfNW7Oggu3Mn2Z07Kw4WSY6O0vLaa1VBovW550iEOduLmQy5TZumZzUNDJDdupVCd3dMNZGlJ0O+cAv5wi01PsuTTI5WjVOUZkBlWk6SSEzP5isWUxQK/eFCu4pZT/l15As3M9fMp8W4iZGCgVxfiQSFvj4m+/qYvPvu6eOTk6TfeCPoYgqDROsLL9Bx9Gj5lHxX13RXUylIbNoEbZpKKPUU/IIvFPrJ5n5+xmdFkonzVbmeyjmfMqdJJqv27yKf763RmlhHLreddOp/SCQqNjEqZsjmdtWhfrUpGEh9tLaSGxggNzAwPe0VSI6NlVsPpa/LPvc5Eu8E886LySS5DRvIbd1KctcuWm++mdzAAPnVqzXtVWKQoFDsoZDrIcueGZ+VZj5V53tKJ39Aa+brJJPVM4ZmT+RM/j+JCaOlYCCxKnR2MrV3L1N7904fzOdJnTlT3dX03e+S/upXual03fLlZLdsqUrBkduyheLy5bHUQ6R65tPtNT4drxqjaMv8A6nUG+EmRhkmJn+DQvFdMZQ7LJ/WGTSmJVnnTIZLJ0/OGo9Ijk8vSsqtWVO9eG7rVvLr1zdknqYl+TNeQnVOJs5x08qfIZGYpFhs40cXv33NwUDrDGRpuuEGsrt3k929e/pYsUhqeJh0uLK6FCTann22nKep2NZGdvPm6lbEwAAF5WmSGM3cxCjOVgEoGEijSyTI9/eT7+9n8t57p49PTNDy+utVQaL12Wfp+NKXyqfke3pmdzVt3Ait2vVL6uPqNjGqDwUDaU7t7WS3bSO7bVv1gHUpT1MYJNKDgyw7cmQ6T1M6HeRpmhEkCr29GrCW627hmxhdPwoGsqQUuruZ7O5m8o6KZGnZLOkzZ6ZXVw8Okjl1io6vfGX6ulKepoogkduyhWKHtoSU5qBgINLSQm7zZnKbN/PO/feXDycuXarO0zQ4SIc7ycuXgTBP09q1swes165VniZpOAoGInMorljB1J49TO2pmE9eKJB6663qPE2Dg7Q988x0nqb29tobC924OLNmioCCgci1SSbJr10b/PW/b1/5cGJiYnaepuPHST79dPmcfG9v7Y2FWqpz27QfO1bes+JdS2CfDlkcFAxEroNiezvZHTvI7thRcbBI8ty5qhQcLWEajkQ2SIRWbGkJ8jRt2UJ2YIDk2BjLjhwhGa7ATg8Ps+LgQQAFBImUgoFIVBIJCqtWMblqFZN33TV9fGqqnKepHCBefJGOY8dq3iY5McENDz9MYcUK8r295Pv6gtTgmt0k15GCgUi9ZTJBKu+tW6sOJ8bGWLV9e3nsoVLq4kVu+sAHyt8XOjrI9/VRCIND1asUMJSaQ66BgoHIIlHs7CTf10d6eHjWZ7lVq7jw+OOkRkdJjYwEr9FRUqOjtJ44QfL8+VlBpLB8eVVwKL0qA4imxkqJgoHIIjJ+6BArDh4sbzMKweyk8Y99LEjFMdeFU1Okzp2rChbJUsAYGaHl1VdJ1cj5U1i5MggWM1sYpe97eyGmPa+lvhQMRBaR0iBxaTZR/mpnE2Uy5NesCfainsvkJKmzZ6dbFhXBIjUyQsvp06QuXJh1Wb6z84rdUflVq5TCowlEEgzM7AhwH3De3W+N4hkizWpi/34m9u+//hk8W1unp8XO+fCJ2V1RpfdDQ2ROnSJ58eKsy/Ld3Vfsjsr39MyaQiuLS1Qtg08DfwN8NqL7i0gU2tvJb9hAfsOGOU9JXL5cDhLJymAxMkL6zTdpPXmyKq04BKu1Cz09s7ukKt4Xenqirp1cQSTBwN2fN7N1UdxbROJVXLaM3MaNQYbXOSTGx2u3LkZGSH//+8Gg909+Un3fVAp6e+nq6andHdXXF+yTrVQfkYh1zMDMDgAHANydrq6ued0nnU7P+9pGpTo3v4aub1cXrF9f86MikCsW4eJFEkNDMDREInwlh4dJh+k++MY3ytuflq9Np2H1aor9/RT7+6G/n2L4PWvWBF+7uxtqDcZi+TlHttNZ2DL42jWMGWins2ugOje/pVZfmFHnYpHkhQvBrKiZrYyKr6X04yXFTKZqNlSt6bXFG29cNAFjIT9n7XQmIs0vkaDQ2Umhs5PcrXP8TVkokBwbq+qGqhzHyLz0EqmzZ0nkctWXtbVVD3DXmFq71FZ5KxiISONKJil0dVHo6iK7fXvtc/J5kj/84ZxTalu/9S2S586Vt0ktKa3yrjk7qhQwmmiVd1RTS78A3Al0mdkQcNjdn4ziWSIiV5RKUejpodDTQ3bnztrn5HIkz5+vPeg9OkrLiRNBwJhrlffM1kX4vnCFVd6LLTttVLOJHojiviIikUinKYS/vOdc5Z3NBqu8a0ypTY2OBqu833571mXlVd4VQSI5MsIy9/J4x2LITqtuIhGRq9HSQr6/n3x//9znlFZ5zwgWpeDRcvo0qbGxmpcmJyZY/uijCgYiIg3vKld5927aVDs77TxnVF4PWr0hIlJP7e3kgymhs8x1vB4UDERE6mz80CEKM7LBFtrbGT90KKYSqZtIRKTu5p2dNkIKBiIiMYgsO+08qZtIREQUDERERMFARERQMBARERQMREQEBQMREUHBQEREUDAQEREUDEREBAUDERFBwUBERFAwEBERFAxERAQFAxERQcFARERQMBARERQMREQEBQMREUHBQEREUDAQEREUDEREBAUDEREB0lHd2Mz2AY8BKeCT7v5oVM8SEZGFiaRlYGYp4BPALwEDwANmNhDFs0REZOGi6iZ6N/C6u7/p7lPAF4H7I3qWiIgsUFTdRKuBtyq+HwL2zDzJzA4ABwDcnb6+vnk/cCHXNirVufkttfqC6hyXqFoGiRrHijMPuPsT7r7L3XeF18zrZWYvL+T6Rnypzs3/Wmr1VZ3n/bouogoGQ8Caiu/7gZGIniUiIgsUVTfRKWCTma0HhoHfBB6M6FkiIrJAkbQM3D0H/B7wz8BgcMj/K4pnhZ6I8N6Llerc/JZafUF1jk2iWJzVlS8iIkuMViCLiIiCgYiIRJiOoh7M7AhwH3De3W+NuzxRM7M1wGeBVUABeMLdH4u3VNEyszbgeaCV4N/rUXc/HG+p6iNcyf/vwLC73xd3eaJmZj8AxoE8kAunnDc1M1sJfBK4lWD6/Yfc/dtxlKXRWwafBvbFXYg6ygEfdfetwHuADy+BNB+TwN3ufhuwA9hnZu+JuUz18hGCCRhLyV3uvmMpBILQY8Az7r4FuI0Yf94N3TJw9+fNbF3c5agXdx8FRsP342Y2SLDa+3uxFixC7l4E/jf8tiV8Nf2sBzPrB34F+HPgD2MujkTAzG4A7gA+CBCm7pmKqzwNHQyWsjAI7gT+LeaiRC7sLnkZ2Ah8wt2bvs7AXwEHgeVxF6SOisDXzawI/L27L4oplxHaALwNfMrMbiP4N/4Rd78cR2EavZtoSTKznwL+EfgDd/9x3OWJmrvn3X0HwUr2d5tZU48PmVlpHOzluMtSZ3vd/XaCbMcfNrM74i5QxNLA7cDfuftO4DJwKK7CKBg0GDNrIQgET7n7sbjLU0/ufhE4QfOPE+0FfjUcUP0icLeZfT7eIkXP3UfCr+eBLxNkP25mQ8BQRUv3KEFwiIWCQQMxswTwJDDo7n8Zd3nqwcy6wxkXmFk7cA/wWrylipa7/4m797v7OoJULt9094diLlakzGyZmS0vvQfuBV6Nt1TRcvezwFtm9tPhoV8gxvG/hh4zMLMvAHcCXWY2BBx29yfjLVWk9gK/BbxiZt8Jj/2pux+PsUxR6wU+E44bJAlSm3wt5jLJ9dcDfNnMIPi99LS7PxNvkeri94GnzCwDvAn8dlwFUToKERFRN5GIiCgYiIgICgYiIoKCgYiIoGAgIiI0+NRSkZnM7L3AYYI/dPLAw+7+opldAv6DILfRh4A+4B53/3h43Z8BJ9z9RMW9OgjSQmwOr3vC3T+zgLKtJEi6t6QWC0pjUMtAmoaZdQGPAO9z9zuB9wET4cevuPtdwEcJcv5cjcPAc+G9fg44s8AirgT2L/AeIpFQy0CayS8Dny/la3L3ceD0jHO+Q5Dj6Gr8rLv/cXivIsG+CpjZXxOk0/4x8H6ChIH3uPvHzeyD4bUngCPAGLAeuB84APyimZ0Aft3d3772KopEQ8FAmkkf8AqAmT0I/C7wr+7+RxXn3AH893wfYGa7gWXufoeZPQT8DnNnjr2RIH3GA8CvEWx8fnOzp5aQxqRuImkmowQBAXd/GngI6Ao/22Zm/0IQIB4F3iHYPa2kjekupSu5hWDsAYJdyDZSvb9CouL999y9AAwTdBGJLFpqGUgzOQ4cNTN390tU//sujRkAYGZTwE4zK/1BdDvwFzPu96KZvd/dnwqTBO4lyB9zb/j5LuAN4BJBDiWAbcB/hu9nBokskFpIBUWiopaBNI2wD/4R4J/M7JvA3xLsGV3r3B8RpAJ/HniBYG/lsRmnPQK8N+zjPwnc4u4vARNm9gLwIPA4wS//PjM7DnRfoYhngU4zO2pmnfOspkgklKhORETUMhAREQUDERFBwUBERFAwEBERFAxERAQFAxERQcFARESA/wOhRw4G7MAIIQAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAEkCAYAAADHDTFTAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd3hUVfrHP2d6MukkIGURsSCgFFlFUSkWVnf9udaLvddVFxQWsbuggI3FiisIqKhwUewNC2BhLSDNAkix0BPSk8nU8/vj3iSTZJJMymRSzud58kzuvae8986Z95z7PU1IKVEoFApFx8ISbwMUCoVC0fIo569QKBQdEOX8FQqFogOinL9CoVB0QJTzVygUig6Icv4KhULRAVHOvw6EEL2EEFIIcUK8bYkGIcT9Qogt8bajNqrbJ4S4QggRiEfeTUhnpFkmepjHUZURIcSvQoi7m5p/FPY12zM17+uS5khL0XJEW9aU86+bP4CuwDcAQoge5g9iZFytaj8sArrH24gGshKjTOyKtyG10BafqSIO2OJtQGtGShkE9sTbjvaKlNIDeOJtR0OQUvpoxWWiLT5TRXxo0Za/EOIEIcRXQogi82+dEOIv5rWIr89CiC1CiPvDjqUQ4hYhxCIhRIkQ4nchxHlCiFQhxMtmutuEEOeGxSlP+yIhxEdCiFIhxEYhxAghRHchxPtmWj8JIU6MEK/cpj/Mz2Xm+V9ruc9rhBA7IqSzIOzclUKIvUIIYR53EULMF0Jkm/fwlRBieFh4IYSYLYTYKoTwmPc4VQjhrON5Z5jprBBCpNX13Zjh/y6EWGM+n3whxLdCiMFh1w8WQiwWQuSaYdYLIc4wr6ULIRaY34dHCLFJCDG+/P5qya+KRFF+LIQ4XgjxvZnHd0KIIdXinSKE2CCEKDNtGBGtRGGWgW1m3E+EEAeFXashDZllVgohepnHVWSfWvIYKIRYaeaxWQihRWFXtPd+rBDic/MZ5wkhXhFCdK6eTiPSHWU+y/JnOiqCjfWV0dvNctMr7Nx9Qoj9dT2vsLD3C+P3/ndh/D5LhBDLhBAHVwv3VyHEaiGEVwixTwjxjBDCHXZ9vvnd3iKE2CGEKBZCzBFC2IUQNwghfjOf3XNCCEdYvFOFEMvN8l1g/m6OqZa3FEL8QwjxkvkM/hBCTKwW5iIhxDdmGjlCiPeEEIdVCzNYCPF1WBk5T1STa4QQSUKIx4UQO83vbY0Q4pxq6TS4rJXTYs5fCGEF3saQUI4y/+4HShuR3F3A+8BA4F3gRWAh8DEwGHgPeFEI0alavCnALGAQ8DPwKvACMNuM9zPwihDCXku+R5mf52K8+h9dS7hPge5CiD7m8clANnBSWJiTgGVSSimESACWAcnA6aYt7wMfCyH6muEFsBe4COgLjAOuBO6MZIAQoifwJbAbGC2lzK/F1vLwBwCLMZ5Jf+A4YCYQCLu+EkgHzgSOBO4BQmYSTmADcBbQD+NZ/xu4oq58I2ABpgFjMZ53HqALIWymHd2pWo5uBWZEmXZX4B/AGOBEjOf9phC1V1ANxfwu3wfygaHA5cC/gM51xTOp794PAJYCO4BjgP8DjgBeb2K63TB+R6vN6+OBxyPcV31l9GGM7+VVIYRNGA2pu4ErpZQ7iI6uwI3AxcAwIA2YG2bHAIzv/3OM3/HlwBnAs9XSORr4M3Aqxm/mEuAtM83TgUvNv6vD4iQBTwPHmuF+AT6M4EfuC8v/EeChapWlE6P8H2XmHwTeK69ohBCJGM8uG+N7vAy4jbAyYpbJdzB83BiM73kWsFAIcbIZpillDaSULfKH4TQkMLKW673M6ydUO78FuD/sWAIzw46zzHNPRsjrjGppjwsLc7R5bnzYucHmuSMi2QT0qOseqtm9HfiH+f/LGI6wEOhnntsBXGf+f4V5bKuWxmfh9xohj1uBX8KO7zef1wBgJ0ZBtkT5/ZTfe69ark/BkDvcDfjOHwc+rm5f2PEVQKDasQSOCjt3rHmuj3n8IPArYA0Lc5oZ5pI6bLnfDHNI2LnDzHOnRLLPPHdC+HMBRprHPWopI9cAxUB6WBpHmGHursO+aO59illOHGFhBpphhjfhmT4A/BZe/jAcasUzJcoyiuF4dgPPYLwpP96A8nI/RmMjK+zcBRgNDJd5/BLwbbV4fzfDHGgezwf2VXtO7wE5gDPs3FvAa3XYY8GoKC8OOyeBJ6qF2whMqyOdDDPe8ebxtWYZSQ0Lc3h4GTHLWVl4GPP8XODNppS18r8Wa/lLKfOAOcBHQogPhBCTwlrGDWVdWLrZGDXr+mp5+ahZA64L+79ct10f4Vx0NWfdLKOypT8K+Aj4AjjJvO/uGD8cMCqiA4B88xW1WAhRjNE6PbQ8QSHEtebr5F7z+jTgwGr5ZmG0Sl6VUt4kpQwRHetNG38QQrwhhBgrhPhT2PUhwEopZUmkyEIIi/mdrjVfdYuBGyLYVx+Sqt/TTvOzi/nZD/hOGv0x5fwvyrSzpZQVso6UcjOGQ+jXQBvroh/ws1kGy/P5ASiIIm59994f+Foa/Q7laa8z0+7fhHT7YTjU8FFCX1ZLI6oyKqXcB1yF0XrfD0ykYewyf9Phtgoqf5P9Mcp3OCvMMOHf48/hzwnjt71JSumtdi68tX2QKedsEUIUYjTWUqlZhtdWO95J5bNECDHI/A1tF0IUAb+bl8rTKS8jFWVCSrkRowVfztGAA9hZ7XlfQuXzbkpZa9kOXynltUKIx4HRGK9DU4QQN0sp/0ulfFD9FTySBOOP4pykpqzlr3a9tnPNUSl+BjwuhOiP8ar8rXnuZIzK6o8wR2TBkJzOjpBOKYAQ4nyMlvwkjMJeCJyP0RIOJx/Dkf9dCDFTRvm6LaUMCiFOxyh0p2BIW9OFEOdLKd8tD1ZHEuOBOzBeX78HijDeTP4WTf5hhKo59kjfSXU7mrI0bXh5CxFd+asvvcba05h7r+98NOlGsrn6cb1lNIwRGGW8C4bz3FeHbdXxVTtu7DOI5A/q8xHvYjQGbsJ4a/FhVIKOavEi2WiBCklnqRnvKioblD9WS6e+MmLBcOKRpOXy/JtS1lp+qKeU8gcp5Qwp5enA88B15qXy2r5beVhhdGS1pmFr5Q/dGkXYTzFe924FPjdbVZ9h/DBOobLVD7AK6A0USim3VPsrH1I4HFhjPrvVUspfMCSH6viBczD09xVCiKhb3tLgWynlVCnlcIxK5krz8mrg+PCOtWoMBz6UUj4vpVxjVmyH1hK2KfwEHG32IZVzXJRxs0RY56HZCdcJw6mB4aQ6V0v7KBrGj0A/EdbBbjYAUhuYTm1pH1etk3KgmfaPTUx3aLX7rj5vIZoyihDiFGACRr/Qb8ALzdmnYto6otq5ERhO8KfGJmrq+v2A6VLKj6SUP2HILg1VAfpivH3fJaVcJqX8GUOGDn8GPwF9hRAVZcJUA8IHZawyj10Rnnf5m0STylpLdvgeIoR4SBijJw4UQhyH8cr4E1QMUfsKmGj2YA/B6Mj11p5qi5ODobGNFkIcIIRIry2glHI3sAmjE6bc0a/FaF2eSVXn/zJGH8F7QojRwhgdNFQIcYcQ4iwzzCbgSGGMhDhYCDEWw8lHytsPaBgFaIUQond9NyaEGCaEuMfMt6fZqTSAyh/UMxjl5S1hjBw5SAhxhvm2UG7fSGGMGjlMCPEARidUc/MMRotylhCir9nRVv72U18rqBSYJ4QYIoT4M0Zn/wbgE/P6MiAR4430YPNt66YG2vcKxlvPArMcH4uh0zbH8MungBRgvhDiCGGMQnsJ+FJK+UUT0p2F4bCeM5/pydR8o6y3jAohskx7HpVSvg9ciNFxelsTbKvOI8BRQogZQojDhRCnAU8CL4c5xcaQh9EAvdYsv8dhDH5o6Pf2G4bPusUsQydj9H2Fl82XMfzIi0KIAUKIoRgNYU9YuM8wyuUSIcTZQojeZrm9RQhxrRmmSWWtJVv+JRgtwYXAZowRCiuBm8PCXIXxUFaa4Z7D6DxqFZj6+U0YjvUPYE09UT7FkNY+M+NLjNZ0xTnzfBlG62UVMA/j+SzBGAnwmxnsvxg/rHlmvkMxOshqszWAMcrhS4wKoL5WeAFGC/otjFEOczEK6RQzvd0YrcEijBEGP2I4iPIWzRTz3t7C0ODTgSfqybPBSCl3YlSewzAq08cxRpSA0VKri90YZep1jIaGBzhblvfkSbkJozPuAuAHjPIYcTRVHfaVAn/FeKP4FuMZ/oeGSR+1pb0XQzLtAXyHIVP8gCHRNSXdnRgjh46h8pneVi1MnWXUbN3Pxyiv95hxtmP0+0w1K9smI6Vcj/H9j8Dox3gJozP3hiamG8KQUQ/GkE3nY4x2a5D/kVLmYOjyp2L8Rh7FeBMKhYUpLyNdML7HBWZexZhl2CyTZ2I84xkYncrvYcioW6ul06iyJsxyr1C0WYQx1nwFMEBKuSHe9igUDcWUZ38FzpRSvtMieSrnr2hrCCFuxGj17cLQaf8D5Ekpj42rYQpFlAhjQuJODCntQIw5El0wht+2iNSt1vbpIAgh7gwfMlb9L972NZADMWTBTRh69Rc0fFSRogUx+5FqLX9CiIvjbWML0wlj6PtGjL6F3zHmarRYH6dq+XcQhBAZGKOPIhI+/l2haG6EMZu4Vx1B9kopi1rIHAXK+SsUCkWHRMk+CoVC0QFRzl+hUCg6IMr5KxQKRQdEOX+FQqHogLSqnbw0TZuLsZTsPl3Xj6gnbE+M6flpGGvtTNJ1/f3YW6lQKBRtn9bW8p+PsTZ7NNwN6LquD8aYjv9MrIxSKBSK9karavnruv65pmm9ws9pmnYwxlLGWRgLc12r6/pGjAWQUsxgqbTeDbUVCoWi1dHaWv6ReA64Rdf1IRgLJJW38O8HLtE0bQfGQmO3xMc8hUKhaHu0auevaVoSxuqNizVNW4uxsmVX8/KFwHxd13tgrGz3kqZprfp+FAqForXQqmSfCFiAfF3XB0W4djVm/4Cu6//TNM0FZNIMS+cqFApFe6dVt5R1XS8Etmuadj6ApmlC07SB5uXfMbZERNO0voCLyt3AFAqFQlEHrWptH03TXsXYtT4T2Avch7HpySwMuccOLNR1fbKmaf2A2UASRufvRF3Xl8bDboVCoWhrtCrnr1AoFIqWoVXLPgqFQqGIDa2pw1e9gigUCkXDEfUHqUlrcv7s2tW4eVqZmZnk5OQ0szWtG3XP7Z+Odr+g7rmhdOvWrdH5KtlHoVAoOiDK+SsUCkUHRDl/hUKhiDOWvdmk3DGNzNEXtlierUrzVygUio6EZW821n//hy4vLAYZQvj8LZZ3q3b+UkpKSkqoby6Cx+MhEAi0kFXxQ0qJy+XC4XDE2xSFQhFOMIgo9SA8ZWGfZQiPx/w0//eUYSn1YMnOxfHVt9g3bQUJIhRqcZNbtfMvKSnB6XRit9vjbUqrQEpJUVERRUVFZGZmxtschaJtICX4/FWdc5nhhGs667Ia4ao7dUt4uPLrDWyxlzdnGzVGs5lo1c5fSqkcfxhCCFJSUtiyZQtbtmwhLS0t3iYpFE0nFEKUeRGeMij2YNu1O0Ir2lOl9SxKIznvmmGM4zJEMNggk6TVikx0IRMTkAkuZIL5mZhAMC2VUPn58jCusLC1frqQCS5CCQlYCotIevx53IvehpBE+Hwxeri106qdvyIyVquVVatWccopp8TbFEWMsezNJmnmHByr15Oz9NX4GBEI1N46DpMyIjriapJHJKduKfNWya5zFCZJlxPpchEyHWq5gw2lJiMPyKrirMMdb3WHHKoRzvjEEdtGZyjBReHUSRSPu4asZxdgeUE3KkGl+SvqoyP0cXRkou4IlBK8vkpJwlOL3lxr6ziClFH9ur9hZU0KEbnlm+AilJlR9Xy5EzY/kzpnURjw192KdjnBam2Gpxx/Qp0zCT7xANk3XGJU8qvWtVje7c75JyxZQvL06Vh37SLYrRtFkybhOeecZkk7NzeXsWPH8uuvv+J0OunVqxcPPfQQnTp1qggzdepU/vvf/zJ27Fhuu+22ivOhUIjrr7+ejRs34nQ6yczMZPr06fTq1QuAq666it9//x2LxYLb7WbKlCkccUSde9gr2iGWvdkk/Wc27oVvQTBUpSMw84zLIrS8yxrcWSjttjBHWtWxBjPSaraSa2kdR5IzZIILXE4QjVOzEzMzKetgM3zBqAQKp05q0TzblfNPWLKE1IkTsXg8ANh27iR14kSAZqkAhBDceOONDBs2DIApU6YwdepUHnvsMQAeffRR1q1bx8qVK7nllltwOp3cdNNNFfHPP/98TjnlFCwWC/PmzWPixInoug7AzJkzSUkxtiT+6KOPGD9+PB999FGTbVa0HURhEZlnX4P1tx0ROwJDqSnIrl0qHXZtUkatGrQRHtWPpqANOf+Ue+/F/tNPdYZxrF5do+PE4vGQNn48ia+8Ums8f79+FE6eXK8N6enpFY4f4KijjuLFF18E4Omnn2br1q289NJLOBwOXnnlFW655RZmz57Ntddei8ViYfTo0RVxhwwZwpw5cyrvLyWl4v/CwkIsFjX/rqNg+2U77nmLSFj8LpZSD4GsTljzC0CIKnJP7stPxdFKRXujzTj/qKitxzwGPemhUIgXX3yxwqGHt/ABXC4Xs2fPrjX+vHnzOPXUU6ucmzBhAitWrEBKycsvv9zsNitaEcEgzs++wj13Ia7Pv0E67HjOOo2Sq8bgP7Ivln05cesIVHQM2ozzj6Zl3vmYY7Dt3FnjfLB7d/a/9lqz2nP33Xfjdru58sorGxx31qxZ/PLLLyxevLjK+UcffRSA1157jQceeICXXnqpWWxVtB5EQRGJi97GPX8Rtt92EjygM4UT/0HpxWcTysyoCBfPjkBFx6DNOP9oKJo0qYrmDxBKSKBoUvN2pEyePJnt27czf/78Bssz8+bN44033mDRokUkJCREDHPeeedx++23k5ubS0ZGRsQwiraFbfM2Q9p57T0spR68xwyicNLNlJ0+qk4NPh4dgYqOQbty/uWdurEa7QMwffp01q9fz0svvYTT6WxQ3AULFrBgwQJ0XSc9Pb3ifElJCfn5+XTv3h2ApUuXkpaWViWMog0SDOL89CuS5i7E+cU3SKcDz1mnUXzVGAJHHB5v6xQdnHbl/MGoAJrT2YezadMmnnzySXr37s2ZZ54JQM+ePXn++efrjVtcXMykSZPo0aMHF1xwAQBOp5N3332X0tJSrr/+ejweDxaLhbS0NObPn49o5HA5RXwRBUUkLnwL93wd2+87CXbtQuGkmyi96GxCnVSFrmgdtDvnH0v69OnDzgh9CtGQlJTEjh07Il7Lysri3XffbYppilaAbfM23HMXGtKOpwzv0MEU3vVPyk4bCTb1U1O0LlSJVCiaQjCI65MvcM9dhPPLb5FOB6Vnn07JlWMIHNEn3tYpFLWinL9C0QhEfmGltPPHLkPaueNmSi86i1CGknYUrZ+YOX9N034FioAgENB1/c+xykuhaClsm7binruIhNdNaefYoyi8e6ySdhRtjliX1lG6rne8hToU7YtgENfHnxvSzlffIV1OSs8+jZIrLyDQ/7B4W6dQNArVVFEoakHkFRjSzguLsf2xi0C3LhTeeQslF56FzFB7KSjaNrF0/hJYqmmaBP6r6/pz1QNomnYdcB2Arus1dqfyhE3WUlRisViwWCwdbjcvm83WIvcsftiI5en5WF59A+EpIzT8WPyP3os841RcNhuumFtg0FL325roaPe8ey+ccradBf/N5IBoNjJoRkR9++M2Fk3Tuum6vkvTtM7Ax8Atuq5/XkcUuWvXrionioqKSE5Ojol9bZnt27ezadMmTjvttHib0qJkZmaSE6vlfoNBXEtXGNLOylWGtHPO6Ya00+/Q2ORZDzG931ZKR7vnSZNTeFl3c+mYEqbeU9jg+N26dYNG7gYZs5a/ruu7zM99mqa9ARwD1OX8m8wZmaP52fFjjfN9ff15N2dpk9OP5Xr+5cyYMYPHHnuMTz/9lMMPV7NAY43IK8D96pskvrAY247dBLofQOFd/6Tkgr8raUfRaAIByMm1sC/byt5s43NftoV9OVb25VjYm21l9x4Le/ZZAcGiN9yMu6GYzlktt5F7TJy/pmluwKLrepH5/2ig/pXZmshRviFssf+CX1Su4mmXDo7yNc9Ao1iu5w+wYcMGvv/++4plHhSxw/bTL7jnLSRhyQdYyrx4jxtC4f23UXbqcDVqR1ErnjJMR2448X3ZFvbmWCuc+17z/P5cC1LWbJCnpwXpkhWic1aQgkSBxQKhkPE389mkRrX+G0usSnkX4A1N08rzeEXX9Q+bkuCUlHv52V73ev4+fASouvRtgAA/2X/gok7n1Rqvr78f9xTGdz1/r9fLnXfeydNPP835559fry2KRhAI4Fr6Oe65C3H+bzUhlxPPuX81JmT1jY+0o4g/UkJhkahspedUOvLsnEqHvi/bSmFRzYUcrVZJVmaIzplBuh0QZPCRPjqbDr5LlnG+c2aIrMwgDocRZ2+2heP+0oVQyKggfP6Wb/3HxPnrur4NGBiLtOvCgYOsYGeyrfuQQiKkICuYhQNHs+fV3Ov5P/roo5x77rn07Nmz2W3t6Ijc/EppZ+ceAj26UnD3WEov+DsyPTXe5iliRDAI+3MtFTJLhSMvb6WbTj47x0qZt2Yr3eUKVTjvPocEGH6cl6zMEF2yDGde7twz0kM0dO+lmbOSkNV8fEu3/tvM+200LXOAfZa9jOxyHF68OHHyds6HZIWavxu9OdfzX7VqFWvXruXOO+9sbjM7NLYfN+Oet5DENz5ElHnxDvszhZMnGNJOO9kAvCPi9UF2TqWWXqGp51S22vdlW8nJtRAM1nTqaSmG487KDHH0YLOVnlkpx5Q79uQk2ditiOtl9ToHPn/VxH1+waq1zd9QrY024/yjpXOoC+eVjOEV90ucWzImJo6/udfz//rrr9m6dSvHHnssALt37+biiy9mxowZjBgxotntb9cEArg+WoF73qIKaaf03L9RcqWmpJ1WjJRQXCLYnw+bNjvCNHUre3PCOkyzreQX1vzNWSySzAzTeWeFOKKv33DimcEqEkxWZhBXw1ZijwlLX68c0RSvEU7tzvkD3Fw8jl/sm7mleFyzpx2L9fxvvvlmbr755orjoUOH8sILL6jRPg3AkptH4iumtLNrryHt3DOW0jFK2oknoRDk5lkqHXkETb3cwXs85U69cpy/0yErHPrBBwU47mhfpZaeFayQYDplhNTLXANpl86/c6gLr+5/vdnTjdV6/orGY/thE+55i0h805R2jj+awgcmUnbKiUraiSE+H2TvL5dbDKceLsWUa+o5+y0EAjW1k+SkSollkNlB2iUrSO+D3CS48uliSi+pKbGTXjo67dL5x4pYredfnW+++aZReXQYAgFcHy43Ru18s4ZQgovS884wpJ3DD4m3dW2akhJRoZ1XceSmDFPu4HPzalasQkg6ZYTobHaK9jk0UGXES7imnpAQeXJpZmYCOTm+iNcUzYty/oq2Q04uSU/Oxf3Ca1h37yXQszsF94wzRu2kpcTbulaLlJBXICrGp1dppYdJMPuyLZSU1tTT7TZJlum8e/YI8OfBIbqYWnpWmFPPzAjVtR2xopWhnL+i1WP7YSNJcxdhf/MjHF4v3hOOIX/q7XhPPqHdSjt7sy2MudrGk9MttY77DgQM6SWS3LIvbARMdo61xsgSAHdiqEJuObKfn6xO4Vp6eedpkPRUJb20R5TzV7RO/P5KaefbtYQSXIQuO5/9F55JoM/B8bYu5jz2dBJffSMYd1cap4zwVnSYRjOLNCO9crjiwQcFjE7RaiNeumSGcLtjs66Xom2gnL+iVWHZn0fiy2/gfmEx1j37DGnn3lspHXMmnQ7pTaAdL/qVm2fh08+dvP2hi88+dwGCFV+5WPGVC5tNktnJkFu6dw0yeICvykQjY0hjkKxOoYpZpApFXSjnr2gV2Df8bOyQ9dZHCK8P74lDyZ92B96Tj2+30o6UsHW7jY+XO1m6zMWqtQ5CIUGCK1Sx5ovNJjnnjFIem1LQ4FmkCkVdKOeviB9+P64PlhnSznfrCCUmUDrmTGOtncN6x9u6mBAIwHdrHHy83MXSZS62/2b8BPsf7mfs9cX8eZCPq/6ZUbHmSyAgePuDRO4YV9SiKz4q2j/t0vnvzbbwjwnpzHo0r1l/MLFc0nno0KE4nc6KiWN33XUXI0eObDbbWxOW/XkkLliC+8XXDGnnwO4U3HcbpWPORKa2v/0biooFy7508vFyF5+tcJFfaMFhlxw/1Ms1lxZz6ggv3bsFAbhjckrc13xRdAzapfOfOSuJb1Y7mv0HE+slnZ977rl2PavXvv5n3HMXGtKOz0/ZiGPJn34H3pPan7Tzx04rHy938fFyJ//71ok/IEhPC3LKyDJGjypjxPFekiJ0uLaGNV8UHYM24/zvnZbCT5vqH0Ts88H36x1IKXhpkZsfNtpx1BOtXx8/k++ov5KI5ZLO7Ra/H9f7n5E0dxGOVaa0c+FZhrRz6EHxtq7ZCIVg3Q92li5z8fFyFz9vNgrdIb39XHNpCaNHlTFkkK/eOq41rPmi6Bi0GecfLTt2Vf66JLBjp5XevYLNnk9zL+kMVKzvc/TRRzNp0iRSU9vumjSWnFxD2nnpNax7sgn06kHB/eMpHfN/yJT2Ie14PIIvvnbw8TIXn6xwsS/HisUiGTrExz3/KuDUkWUcHIOyp1A0B23G+UfTMi/fIKF87LOUgoJCK888ktPsnWXNuaQzwJIlS+jevTter5f77ruPu+++myeffLI5TW4R7Ot+MqSdt5ca0s7I48h/6C5D2mkHw1X2ZVv4ZIXRWfvF1w7KyiwkuUOMOtHLqSPLGHViGRlpavy8ovXTZpx/NLTUBgnNvaQzULF1o9Pp5PLLL29UpRI3yqWd5xfiWL2ekDuRkovPoeQKjeAhveJtXZOQEjb+YjPknGUu1mwwtPce3QJcdG4pp470cuyfvWpsvaLN0a6cf0t0lsViSefS0lICgQApKSlIKXnrrbfo379/s9kcKyzZ+0lc8Drul17HujeHQK8/UfDv8ZRqbVva8fng69WGnO3DNhUAACAASURBVLN0mYsdu4yfyeAjfUz8ZyGnjiyj72EBteSBok3Trpx/eGdZLIjVks7Z2dlce+21hEIhgsEghx56KFOnTo3pvTQF+9ofcT+/kIR3PzaknVHDyH/kHryjhrVZaScvX7DsS8PZL//SSVGxBZdTcuJxXsZeX8zJI8roosbZK9oR7cr5x5pYLel84IEHsnTp0qaYFnt8fhLe+wT33EU4vt/QLqSdbb9Z+dgcnfPt9w6CQUFWpyD/9xcPp44q48RjfbUuPaxQtHWU81fUiWVfTuWonX37CRzUk4LJEwxpJzkp3uY1iGAQvl/nYOkyJ0uXu9iyzRiO2fcwPzddXcypo8oYdIS/rb68KBQNQjl/RUTsa34w1tp5ZynCH6DspOPJv3IM3pHHtSlpp6REsGKlsXbOp587yc2zYrNJjjvax+UXGMMx/9RdDcdUdDyU81dU4vOT8O7HhrSz5gdCSW5KLjuPkss1ggcfGG/rombXHosxu3aZi6++ceLzC9JSQpw0vIxTR5Ux8ngvKclKzlF0bJTzV2DZm417wRISF7xuSDu9DyT/gYl4zvtbm5B2pIQffjZm1y5d5uSHn43RXb16BrjiImN27dGDfdhUaVcoKlA/hw6M/fsNxoSsdz+plHauugDviGNbvbRT5oWV3zorllPYs9eYXTtkoI+7bitk9KgyDj5IDcdUKGpDOf+OhtdHwruf4J63EMeaH01p53xj1E7vnvG2rk72ZcPiNxP4eLmLFV85KfVYSEwIMfJ4L6f+s4yTh3vplKGGYyoU0aCcfwfBsjcb90uvk7hgCdbs/fgPNqWd889AJrnjbV5EpIRfttoq1r5fvc6OlOl0PSDIeWd6GD2qjOOO8eJq2Fw7hUJBO3f+lr3ZJM2cg2P1enKWvtrk9GK5nn9ZWRn3338/X3zxBS6XiyFDhvDwww83zWApsX//A+65r5Lw7icQDOE96XjyrxqDd3jrlHb8fvj2e0dFh+2vfxhFdEB/H3dPCHLC0Fz6H67kHIWiqbRL51/u9N2L3gEZQvj8zZJuLNfzf/DBB3E6nXz55ZcIIcjOzm68oV4fCe98jHvuQhzrfiKU7KbkijGGtHPQnxqfbowoKBQsL9/s5AsXBYUWnA5js5PrryzmlBFldDsgZC5xHIi3uQpFu6DNOP+Uex/F/tPmugP5/Fh37MK6bz8AQlYO5+t03nW1RvP3O4zCyRPqtSFW6/mXlJTw2muvsWrVKoTZpM3KyqrXnupY9mTjfuk1Q9rJycV/SC/yH7zdGLXTyqSd3/4o3+zExderHAQCgk4ZQf5ykrHZyfDjvLgjbHaiUCiah5g6f03TrMAqYKeu62fEMi8A++ZtiKJiWkIRaM71/H/99VfS09OZMWMGK1euxO12M3HiRI455pg6bbDszSbpP3Nwfvkt/gF9SXjPlHZOPoH8qy/Ae+JQWos+EgrBmg3GcMxPlrvY+Isxu/awg/1cf7kxu/aoAf72tqGXQtFqiXXLfyzwM5DS1ISiaZlb9uWYcs/bEKoq9+x/7bmmmlCF5lzPPxgM8ttvv3HEEUdwzz338P3333PFFVfw1VdfkZwcYXXMYBDx/qd0uf4+CAQQgDVnPyVXXkDJFecT7NU6pJ3SUsEXXztZuszJpytcZO+3YrUam53cN9GYXXvQgWp2rUIRD2Lm/DVN6wH8DXgQuK2e4M1CqHMmhVMnUTzumlorgeagudfz79GjBzabjbPOOgsw5KSMjAy2bdvGwIEDa6Rh3b0PseYHRKBS/967+kOkO7EJd9U87Nln4ZPlLpYud/HV107KvILkpBAnnVjGqSO9jDqxjLRUJefUxhmZo/nZ8WPliW7GR19ff97NaeWL/ynaFLFs+c8EJgK1Luyuadp1wHUAuq6TmZlZ5brH42lUxtUrAceqdY1KJxKxWM8/IyODYcOG8fnnnzNixAi2bt1KTk5OxUigGkhZQ9rqdGB8xuhLCRt+Erz7keDdjyysXmtUhr16Sq65LMTfRoc44ViJw2HDKG6N73uw2Ww1ykh7Y5j1eLbKX/AJX8U5h3QwzHpCu7936BjfcXXidc9CyuZvhWmadgbwV13X/6Fp2khgQhSav9y1a1eVE0VFRZFljzixadMmTjrpJHr37o3L5QIatp7/4YcfTo8ePSruqXw9f4DffvuN8ePHk5eXh81m4/bbb+ekk06KmNbvX6xk4z1TueKX3RXndu1c3dTbixqvD77+rnx2rZOdu20IIRk8wM/oUWWcOrKMPoc0/3DM9r6huRcvK51fckPGVQRE5VudkBaO8Q4lWabgwIFDOnBIJ06MT4d04JRO85oTp3ndgXlemnFw1jiujGv8byW+nS7t/TuORFPuuVu3bkDjujlj5fynAZcCAcCFofkv0XX9kjqitXrn31rYvmULm5+azRXvfFEha8Xa+efmCz773JhsteIrJ8UlFlyuECOGeTl1pJeTh5c1+z7J1WlPjiFEiG22ray3r2W9Yy3r7Gv52f4jfmFKlBIQIKQgM5hFt1B3fMKLDx8+4cMrvFWOwyuLpmCV1ioVSfWKxag0wiuV6pVM5OOKdOo57prRlaL9xTikAzt2RIsM32h5ash7Jg2V95ri/GMi++i6fgdwB0BYy78ux69oCFYr8rRR7L3zX80ua4Wz9VdrxVaG361xEAoJumQF+ftfPZw6sowTjvWS4IpJ1u0KiWSPZTfrHetYb1/LOscaNtjXU2wpAsAdcnOkfyBXFl/LQP8gugV6cEHW2Xjx4sTJOzkfkRXqXGceQYL4hA8f3srKwfy/9mMfPuHFa57z4TUrFR8+wiqYaselopR8kR8WtzKt8vSbRFfjQ0hR8UZS/ufEGaESqr1SirbSqXwLKn+rqpqvAycWmm9S5FG+IWyx/4I/7FnZpYOjfH9utjzqo82M81fUpLxvo7kIBIx9kJeaDn/br0bx6NfHzz+vK2b0qDKO7Kc2O6mPApHPBsd6w9GbLft91r0A2KWdw/39OMtzDkf6BjLQP5jegYNryC3nlYzhVfcCzi0ZU6/jB7BiJUEmkECC8dYQRySy6htKvZVQ5bE9yUZuSV5FReKtXrFEqJSKLcVmfG/Y9crKTIrmeSB2aY+6Eqqv0ukUykRS9U3ZioVbisc1i63REHPnr+v6cmB5rPPpSDSnVFdULFjxlbGz1acrXOQXWLDbJMOO8XL1xcWcMtJLj25qOGZteCnjJ/uPrHOsZYN9Lesca9lu21Zxvbf/YIZ5T2CgbzAD/APp6++Hk/pfl24uHsf2xG0t6gyaC4HAafYvJMuGybaZiZnklDSftCeRBAjUfCsKO27om5JP+PBWXKt6XGQprFZJVc23NnnOLh1RV/TNRatu+Qsh8Pl8OByOeJvSKpBSkp+fT25ubpPS2bnLysfLjQ7bld868QcEaakhTh5RxuiRZYw43ktykhqOWZ0gQbbatlRIN+vt69ho/6niB9052IWBvsGcW6oxwDeQI/0DSZGpjcqrc6gLHwQ+JSfUPvo44oVAYMeOXdpx4477W1GIUEVlsNO6g3Oy/g+f8LZ4qx9aufN3u90UFBTw+++/Y61j6qfFYiEUagdL+QYCWLNzIRgklJGGTEyouCSlJBgM8scff1BQUNCgjvBQCNb/aK9YHfOnTcbs2t69Alx9qbHZyZCBarOTcCSS3dZdrLOvYZ1jLevta/nBvp4SSwkASaFkBvgHcm3xDQzwD2aAbyAHhLrG2WpFa8eCBRcJuGQCKYFUzm+gvNectOqfuxCC1NRUNm7cyLZt27DZbBVr34STkJDQ6DkBrQXrzj243v8UhKDsb6cQ7Bq5IJRXAiNGjKgzPU8ZfPm1sVjaJ8td7M02Njs5erCPeyYUcMrIMg45SMk55eSLPNY71rHOvob1jrWst68jx2osrueQDvr6+3NOqcZA/yAG+AdxUKB3s3YAKjom8ZT3WrXzB6MCGDp0KH/605/Iz8+P2MJPTU2loKAgDtY1D87Pv8H9whJCnbMomngjwc61T/iwWq1Y7V258V99eXL6virDK7NzLHz6uSHnrFjppKzMgjsxxMgTvIweVcZJw8vISFNyThkefrT/UDHEcp1jLb/bfgWMESYHBw5huHckA32Go+/j74sTtWmAovmJp7zX6p0/GBVA9+7d6d69e8TrbXb8dyhE8iPPkvzE83hPOIbc5x5GptYv59wxOYWV3wr+MyuJyy8srZBz1qy3I6Wg2wEBLjjb2Ozk2KO9ODtwl0mQIFtsm1nnWFvRqt9k20hQGG89BwS7MtA3mAtKL2KAbxBH+Ac0uJNSoWiLtAnn3y7xlJF+279JeHspJRf+nYJpd4DdXm+0vdkWFi5xEwoJXlrk5sVFxgbrA/v7GH9TEaNHldGvT8fc7EQi2WH9w+yQrdTpPRZDEkwJpTLAN5Dry26qGH3TOdQlzlYrFPFBOf84YMnJJePK23B8v4GCu8dScsOlUS297CmDMVd3wuc3wwo4doiXpx/J44DO7aDDu4HkWnKNGbIVo2/Wkms1RkI5pJP+/v5opRcZOr1vEL2CB7XbGaMKRUNRzr+FsW3eRsZlY7Fk7yd39iOU/TXy+j3V2bLdytX/zGDLtsq3AykFazc4sHQAf1YqSlkpvuQL94qKVv0ftt8BQ6c/JHAYJ3lPNXX6wRzm74ODDqx3KRT1oJx/C+L8/GvSr5uITHCx//XZ+Af1jyre6+8kMOnfqQSDYLNKAsFKbx8Kwcxnk5h6T2GszG5xAgTYbNtoOvl1rHesYbNtEyERglToHujBAP9ALiq5jIH+QfT3H0mSTIq32QpFm0I5/xYi8eUlpN4xncBhB5H7wkyC3esfE+7xCO6emsLCJW6GDvGSl29h89aq/QI+v2DV2rbbwpVIfrf+Zjp6Y+LUj/YNlFnKAEgLpTHAN5hTyv7Cia7h9Nrfm8xQw7e4VCgUVVHOP9aEQqQ8+ARJz75E2ahh5M2ahkyuv5W6aYuNG25L55dtNsZeX8Rt/yiqMgmrrY5wyrFkV3TIrrOvZYNjLfmWfACc0sURviO5sPQSBvgHM9A3iJ7BAyt0+kxnpprxqlA0E8r5xxBR6iHtlrtJ+HA5JZefT8HkCdQ3jVZK0N9M4M4HUklyS155Lpfhw7wtZHHzUiJK+MG+3lzczGjV77TtAMAiLRwW6MNoz18Z4B/IQN8gDg30wU79I54UCkXTUc4/Rlj2ZJNx5a3YN2yk4N/jKbn6wnpH9JSUCO54IJXX307k+KFennwojy4xXiO/ufDjZ5N9I+srlkNYxxbbZkOnB/4U6Mkg/1FcXnIVA0ydPlHGf9tJhaKjopx/DLD9uJlOl49DFBSSO3cG3tHD643z0yYbN4xPZ/tvNibcVMg/ry+mjuWM4opE8qt1e8UM2fWONfxo/xGfMN5QMoIZDPAP4nTP3zjSP5AB/kF0CnWKs9UKhSIc5fybGeenX5J+4x3I5CRy3phD4IjD6wwvJby8OJF7p6WSlhpi0fP7GXZMEzfDaGb2Wfay3r6uYiz9esc6Ci3GchoJoQSO8A/g0pIrKpZD6BH8kxpPr1C0cpTzb0YS5y0i9d5H8fc7lNz5MwnVsjhbOUXFgtvvT+WtDxIZMayMJ6bnk9kpvjJPkShig30dGxzrzHVv1rDHauwVbJVW+gQO56+eMxhormR5SOAwbKoYKRRtDvWrbQ6CQVLuf4ykuYvwjB5B/lMPIN1169kbfrJzw/h0/thpZdK4Qm66urjFd8jy4mWT/ecqyyFstW2p2PmoZ6AXR3uHVsyQ7Rc4ggSZUE+qCoWiLaCcfxMRxSWk/+NOXJ9+SfF1F1N491jqEuulhPmvJDL5kVQyMkK8Nm8/xwyJvcwTIsR22zZjcTP7OtY7jA3Dy/db7RTMZKB/EGd4/s5A/2CO9A0gXWbE3C6FQhEflPNvApade+h0xa3YNm0lf+okSi8/v87wBYWCCfem8f7HCZw8ooyZD+aTkR4bmWePZXdFa758ffryDcMTQ4kc6R/I5SVXM9A3iIH+wXQNdlM6vULRgWiw89c0Tei63uEXhbev/5mMK8YhSjzkvjgT78hhdYZfs97OjRPS2b3Xyj0TCrju8pIGyzxnZI7mZ8ePlSe6GR99/IdzV8F9YcshrGWvdQ8ANmmjj78vZ3rOMleyHMTBgUNqbBiuUCg6Fo1p+c8Abm1uQ9oSro+Wk3bTXYQy0tj/1lwChx9Sa1gpYfaLbqbOSKFL5yBLXsxhyEB/o/I9yjeELfZf8IswmUjCJvtGLsu8EICDAr051juMAf5BDPQNop+/f1QbhisUio5Fg52/rusd1/FLifu5l0mZMhP/oH7kzp1BqI5dt/LyBbfdncbSZQn85SQPMx7IJy218S9NF5dezqvuBVXOWbBwbdENDPOdyJG+AaTKtEanr1AoOg71Cg+apj0f9r/QNG1ObE1qpfj9pE6aRurk/1D215PYv/i/dTr+VWvtjD43i2VfuPj3pAKefyKv0Y5fIlmSsJiLO52HRGKRxtdmlw4uLLmUiUV3cYJ3uHL8CoUiaqJRnXuX/2Nq/QfHzpzWiSgsIuPycbgXvE7RzVeQ9+x0ZELkIY+hEDzzfBLnXJaJzQZvLsjhmktLGr2z1q/W7VzW6QL+lT6Og4IHs2C/XrH+jRVLXDZ+VigUbZ9oZJ8cTdOuAVYCxwH7Y2tS68L6xy4yLh+Hbeuv5D12L54L/l5r2P25FsbdmcZnX7j422gPj07OJyW5ca19P37mJD3Lk8kzsUs7U/KncUHpJViwcF7JGF51L+DckjFkheqeSKZQKBSRiMb5Xw5cB9wEbAIui6lFrQj79xvIuPI2hN/P/pefwnfCMbWG/Wa1g39MSCcv38LUe/K5bExpo1v7a+yruSvtdjbZf+Y0z1+5t2AKXUIHVFy/uXgc2xO3qVa/QqFoNNE4fy+wBwgCs4DBwHexNKo14HrnY9LH3UewSyb7X3yOwCEHRQwXCsGTs5N49KlkDvxTkBeeyeaIvoFG5Vkkings+SEWuOfTJXQA/90/j1O8o2uE6xzqwgeBT9Xa9gqFotFEo/kvANKAC3VdDwDTYmtSnJGSpCfnknHDJPxHHE7OOy/U6vizcyxcfH0GDz+RwpmnefhwceMd/1LXh5zWeSQL3PO5rOQqPtq3PKLjVygUiuYgGuefpev6s0BZrI2JOz4/aeMnkzL9aUrP+gs5i2YR6pQeMeiXXzsYfW4W36528si/83nq4XyS3A3X9/dYdnNj+jXcmHE1aaF0Xs95h3sLJ6s9aRUKRUyJRvbZp2naGCBB07Szgd31RdA0zQV8DjjNPF7Tdf2+JlkaY0ReARnX/gvn/1ZTdOu1FI2/PuLmK8EgzHw2mf/MSuLggwK8Mns/fQ9reGs/RIhXEl/kkZRp+EWAiYV3cVXxtWonKwUJS5aQPH061l276NytG0WTJuE555x4m6VoZ0TT8r8KyAK+B3oA10YRxwucpOv6QGAQcJqmacc22soYY93+B1lnXolj9XrynphC0YQbIjr+PfssjLm6EzOeSebc//Pw/sKcRjn+TbaNaJlncV/aXQzyHcUH+z7l+uJ/KMevIGHJElInTsS2cydCSmw7d5I6cSIJS5bE2zRFOyOalv/huq4/pWlaZ+AKoBewsa4I5nyAYvPQbv61yvWAHN+uIf2q8QDsXzgL39DBEcOt+MrJLZPSKPUIZjyQx5izPQ3OqwwPTyU/zuykWaSEUngs7wn+7jlHLaimqCB5+nQsnqply+LxkDxtGp6zz653K1CFIlqicf6PAScDk4EVwDyM8f51ommaFVgNHAI8rev6NxHCXIcxjBRd18nMrH3GbF3YbLZGxbW8+gbW6ybCgT3wvzmPlEN61QgTCMDkh608/ISFvodJXpkdoG8fN+BuUF4rxDLG2W5im9jKRcFLeSA4nU7uzIYmU0Fj77kt0+bvWUrIz0fs2AE7diB27EDs3Fn5/44dsHNnxKi2XbvoeuCBkJEB6enIjAzIyDA+w47JyECmp1d+duoEKSltptJo899xI4jXPQsp626Qa5r2P2Ak8Kyu61dqmrZC1/UR0WagaVoa8AZwi67rP9QRVO7atSvaZKuQmZlJTk4Dhj1KSfKM50ie8Rze44aQO/sRZHpqjWC79li46V/pfPu9k4vOLWHyHYUkJDTsBSZP5DItdQqvJ+ocGOjFlPzpHO87sUFpRKLB99wOaNX3LCWioADrrl1Yd++u8Wkx/6/eqpcWC6EuXQh27Uqwa1ecK1ZgKS6ukXwoNZWSyy7Dkp9f8Sfy8iqPI8SpyMNqJZSaikxLI5SWRig93fgM+1+GnzP/ZEoKLb3DUKv+jmNEU+65W7duQOOkg2ha/guAt4D7zI7c7Q3JQNf1fE3TlgOnAXU5/5ahzEvahMkkvvEhpeefQf7Dd4Ojptb+6edOxt6Rhs8neOqhPM4+o2Eyj0TyVsISHky5n0JLITcW3cLNRWNxoXbCanOUO/YITt26a5fh2HfvxlJaWjWaxUKoc2eC3boROPxwvCedZDj5bt0qPkOdO4Ot8mdYrvmHVxKhhAQKHnig7k5fvz9yxRBeQeTnI/Lzsezbh23zZuNcUVHtt22xRK40yiuM6hVJeaWRmtrilYai4dTr/HVdfxp4OuzUFfXF0TQtC/Cbjj8BOAV4qLFGNheW/XmkXz0e53frKLz9JopvubLG67DfDw89nsKseUn06+Pn2Rm5HNwr2KB8frf+xj2pk/jS9TmDfUfx4P6H6RPo25y3omguIjn28BZ7+Wdtjr1rV8OxjxpVxakHu3Yl1KVLFcceDeUOvny0TzDa0T52O6GsLEJZWQ3KD78fS2Ehlry8qm8SYRWHKD/evx/bli3G/4WFtSYphUCmptZeYUR6AymvNBQtRqx28uoKvGDq/hZA13X93RjlFRW2LdvJuGws1j3Z5M6aRtmZNSdQ7dhl5cYJ6Xy/zsGlY0q4//YCXM7o8/DjZ27SbJ5IegwbNv6d/yAXll6qNk6JF1IiCgtrl2Ja2LFHi+ecc/Ccc07LSCB2O6FOnQh16tSweIEAlsJCRG5u1QojUqWRl4dt2zbj/4KCWpOUQkBaGp3LK46wSkJWe7uoIlelpMTsu2jP1PrENE1L13U9rzGJ6rq+HmMZiFaB48tvybhuItJmI2fxc/iHHFkjzEefubj1rjSCQZj1WC5nntawOW3r7Wu5M+1f/Gz/idGe07mvYAoHhLo21y0oqlPu2OuSYnbtqunYhajQ2AN9+hiOPZIUY1fDbuvEZiNkdjA36L04GEQUFESUpCz5+SR6PPj27Kk4tv36q1GRFBQg6uifDKWk1HyTiCRLhVcaqakdutKo686naZqWDvwCLAVWmss7tCkSFr1N2sQHCPQ+kNwXZhLs2b3KdZ8PHngshecXJDGgv49Zj+bRq2f0xblYFDMj+WFedM+lc6gLz+TO4S9lpzf3bXQsanPseXl02ratUmMvKakaLdyxH3YY3hEjqjr1bt0IKsceX6xWZEYGwVoqDWdmJvmR3naCQURhYe39GOHn8vKw/f67IWXVV2kkJ1epICJ2ipdXFuXnUlObpQzFezJfNKN9DgVOBYZh9Cp/AyzRdX1HM9vSvKN9QiGSH3qa5KfmUzZ8KHnPPoRMTa4S5Lc/rNw4Pp11Pzq46uJi7p5QiNMRfb6fOpdyb9qd7LXs4ZLSyxlfOIlkmVx/xGagzY6KkBJRVFRna702x07XrvjDRsa0d8feZr/jJtDs9xwKVVYatUhSESuT/HxEKFR7sklJ9UpSVa6Vv2k4DAdTa8f+ww83qAKI6WgfXdd/wWj9P2Nq+EMxNP3mdv7Nh6eM9LH3kvDep5RcfA4FD06s4RTe/cjFhHvTsFhgzuO5nH5K9DLPPsteJqfewwcJ73GY/3Ceyv0vg/1Dmvsu2h71Ofby/yO12MtHxVRrsYe6dTP+79yZzK5dO5wzVDQRiwWZlkYwLa1h8lQohCgqilxpRBhJZdu1q7LyCNaeU8jtJpSWhnXfPoS/6l7eFo+H5OnTW6z13yDBS9f1IMamLq0WS/Z+Mq68Ffvanyi4Zxwl119SZURPmRcmP5zKCwvdDD7Sx6zH8vhT9+iKRYgQCxMX8HDKNLzCy4TCSVxTfEOHWZYhkhRjqX5cl2M/9FC8w4dXdexduxLs0qVdtdgV7QCLBZmaSjA1leCBB0Yfz2wA1Rh2W+3tImHx4ojRrY1UPxpDu+rtsG3cQsbl47DszyNvziOUnTaqyvVtv1m54bYMftxo5/oripk0trD8LaxeNts2cVfaRL53rOI47/FMyZ/OQcHe9UdsI1Q49gijYSqOq00kqnDsXbsqx65QAAiBTEkhmJJCsGfPWoM5Vq7EFmE2d9CQcVqEep2/pmmn6br+oaZphwC3Aot0Xf889qY1DNcbH5B+6/0gJTnvvIB/QNVx9W++l8DE+1Ox22HeU/sZPcobVbpeyngm+Un+m/Q0SaEkHsmbydme8+K2Hk9jOokiSTFROfasLKPFfsghhmMv19fL9fbOnYm69lQoFBUUTZoUUfMvmjSpxWyIpuU/AfgQuBOYDTwBHB1LoxqCZW82tjE3kv7lt4DR8xHu+D1lcN+0VF5+zc2fB/l45tFcunetvSMnnK8dK7k77Xa227ZxVum53Fl4H51CDRwP3YxU7ySy7dxJ6r/+hWXPHgL9+tUuxUTp2EPhnahduijHrlDEiEZP5mtGonH+yZqm9QSCuq7/T9O0knpjtACWvdkkzZiN+9U3a+1g2bLNxvW3pbPxFzs3X1PEhJuLolIg8kUe01MeYLF7IT0DB/LC/lc5wTu8me+g4URc8bGsjNQHH6xyLlguxRx8MN4TTqjaWleOXaFoFbToZL4IROP8pwEPAA+Ya/t8HVuToiP9H3fi+GZNrWN4F7+VwB1TUklwSRY8u59RJ9Yv80gk7yS8xQMp95FvyeOGopu5uXgcCTK+6/GIkhISFi/GrbMJyQAAEBJJREFUWsuKjxLYv2SJ4dyVY1coFFEQjfM/CJik63p5N3TLiVJ1kDdrGkkz5+Be+LbRw+7zVVy79a409DcTOfbPXp56OI+uXeqXef6w/s69qXfyuWsZA32DeWH/K/QN9I/lLdSLdedO3PPmkfjKK1gKCpB2u7H4UDWC3bvjGzo0DhYqFIq2SjTOfxvwsKZpqcA7GFsy5sbWrPoJdc6kcOokisddQ9azC7C8oBMKhLAG/Cx+K4FxNxRx641F9c7eDhBgnnsOjyc/igUL9xZM4ZKSy+O3Ho+U2FetImnOHFwffABSUvbXv1J8zTXY/vgj7p1ECoWifVDvDN9yNE3LAJ7FWJr5I+BxXde/bEZbGjXDd2+2hX9O6syZg7aR/Pgcjg+u4sdXFjL8OF+9cdfb13F36kR+dPzAKZ7R3FfwAN1C3euNFxP8fhLeew/3nDk41qwhlJJC6cUXU3LFFQR79KgIFj7aJx6dRPGko8147Wj3C+qeG0pMZ/hqmnY6cAGQjtHyv87M7C0g7r2gjzyZzJdfC778+hBOGPFvjp2ex/Csuh1/iShhZvIjzHc/T2Yoi2dyZzO67PS4DN8Uubm4X34Z9/z5WPfsIXDQQeQ/+CCe889Humtu8xXvTiKFQtE+iEb2ORK4U9f1Kr2NmqZFs5F7TNm8xcrCJYmAwGaVzJyaR+esuvX9Zc5PuDf1TnZbd3Fx6WVMKJxEskxpGYPDsG3ejHvOHBJefx1LWRneE08k/6GH8J50ktoIQ6FQxJxonP8i4B5N05KAy4ErdV2fo+v6ptiaVj9zX3EjhLE1qsUCTz6XxNR7Im8ykW3Zx+TUe3k/4R0O9fdhUc4bDPG38HSFUAjnihW458zBtXw50umk9NxzKbnqKgJ91WYvCoWi5Yimifk8MBPoZq7tc2FsTYqOvdkW9DfdhEKGVOPzCxa94WZfdtVbMtbjeZnRnUfyiWsptxVO5O3sD1vU8QuPh8QXXyRr1Cg6XXIJ9p9+ovBf/2Lvd99R8MgjyvErFIoWJxrnb9V1fWMD48ScmbOSkNUUnlAIZj6bVHG8xfYLF3U6j7vSJtLP35/3933MTcVjcdAy4+Atu3aRPG0aXf78Z9LuuAOZkEDeE0+w95tvKB43ruG7JykUCkUzEY3s85mmac8C3TRNexz4OMY2RcXqdQ58/qodtD6/YNVaB168PJv8FM8mPUWiTOShvBmc69FarEPX/v33hp7/7rvGUM3TTqPkmmvwHXNMjT2DFQqFIh5Es57/FE3TjgA+BTYBv8fcqihY+nrlSJfwkS/fOr7m/1JvZ6t9C38vPYc7C+8jM5QZe4MCAVzvv0/SnDk4Vq8mlJxMydVXU3LllXWu7qdQKBTxoK49fBcDF+u67tN1/QfgB03T+mBs6XhMSxlYG2dkjuZnx4+VJ8JWQv1ToCfz9r/McO/ImNsh8vNxv/IKifPmYdu1i0CvXhRMmUKppiGTkupPQKFQKOJAXS3/hcD7mqadp+t6vqZpfwGmAJe1jGl1c5RvCFvsv+AXYWP6JfTz90ff/1bM1+OxbtlC0vPPk7B4MRaPB++wYRQ8+CDek08Ga5xmBysUCkWU1Or8dV1/XdO03cB7mqZ9hLGH72hd1/NbzLo6uLl4HK+5F1U558DB3NwFsXP8UuL84gvcs2fj+uwzpMOB5+yzKb76agL947sOkEKhUDSEWkfuaJo2BWMph53AbcAa4DZN0ya3kG110jnUhXNLNIQ0OlDt0sH5JReSFerc/Jl5PCS+/DJZJ59MpwsvxL5+PYXjx7P322/JnzFDOX6FQtHmqEv2+cT8/BR4ugVsaTC3FN/K624dL16sWLileFyzpm/Zswf3Cy+Q+NJLWPPy8Pfvz/+3d+8xcpXnHce/xuvrksgNG5MOFzkkbQICAS51nboiCYUEUlQjJ324xLlAI6uCVKDStG6VCvxHpKhSEqiUXhzHJBbE4RG5NKpIaC41TkkIhIQUF7dS61SKL5ExpMas1zF76R/nOCwmuN7ZPTOeeb8facTMmTnnfV6x+s3r95x5z88+8QlGVq6EefNmtC1J6qRjTfs82MlC2rF4/FTeNXw1mwfv5p3DV8/YqH/Oj35UXar5la/A2BiH3v726lLN5cu9VFNSX+j5G7h/8Llb+PHCHdMf9Y+OMv+BBxj81KeY9+ijjA8OMvy+9zF8ww2MLVkyI7VK0omi58N/8fipfHX0m+wbb2+Fy1n797Nw82YG77qLgZ07GT3zTPbffjsHr76aiVd2fsE3SeqEng//ds3esYPBjRtZeO+9nHTwID9/05t4dt06Dl12mZdqSup7ZYX/xARzH3qIkzdsYN43vgEDA4ysXMnwBz7A8+ed1+3qJKljygj/Q4dY8OUvc/KGDczZvp2xU07huVtuYfi972V8cQOXhkrSCa6R8I+IM4BNwGuAcWB9Zt7ZRFvHctLevQxu2sTCTZuY/fTTPH/22fzsYx9j5KqrYP78TpcjSSeMppZnHgVuzcyzgeXATRFxTkNtvcTAtm0suvlmTl22jJPvuIPnly5l37338tTXv87INdcY/JKK18jIPzP3AHvq5wciYjtwGvDkTLYz+Wbmi1stDl1xBXO2bWPeww8zvnAhw6tXV5dqnnXWTDYrST1v1sTERKMNRMQSYCtwbmY+e9R7a6huCE9m/sbhw8e+8fpkJ23ezOwbb2TWwYMv2j5xyimMfehDjF9/PSxaNN3yT1gDAwOMjo52u4yOKq3PpfUX7PNUzZ07F2jvRiWNhn99398HgY9k5hf/n49P7N69+7iPvXjZMgZ27XrJ9tHTTmPvI49Mqc5eNPkeBqUorc+l9Rfs81S1Wi1oM/wbuyVjRMwBvgDccxzBP2WzX+aL4uW2S5Je0Ej4R8Qsqhu/b8/MjzfRxlirNaXtkqQXNDXyXwG8B7gkIh6vH++YyQYOrF3L+IIXr9s/vmABB9aunclmJKkvNXW1z7/S5jzU8RpZtQrgF1f7jLVaHFi79hfbJUkvr6d/4TuyahUjq1YVeZJIkqajsRO+kqQTl+EvSQUy/CWpQIa/JBXI8JekAhn+klQgw1+SCmT4S1KBDH9JKpDhL0kFMvwlqUCGvyQVyPCXpAIZ/pJUIMNfkgpk+EtSgQx/SSqQ4S9JBTL8JalAhr8kFcjwl6QCGf6SVCDDX5IKZPhLUoEMf0kqkOEvSQUy/CWpQIa/JBVooImDRsRG4Epgb2ae20QbkqT2NTXy/wxweUPHliRNUyPhn5lbgWeaOLYkafoamfY5XhGxBlgDkJkMDQ21dZyBgYG29+1V9rn/ldZfsM8dbbfjLU6SmeuB9fXLiX379rV1nKGhIdrdt1fZ5/5XWn/BPk9Vq9Vqu12v9pGkAhn+klSgRsI/IjYD3wXeEBE7I+IPm2hHktSeRub8M/PaJo4rSZoZTvtIUoEMf0kqkOEvSQUy/CWpQIa/JBXI8JekAhn+klQgw1+SCmT4S1KBDH9JKpDhL0kFMvwlqUCGvyQVyPCXpAIZ/pJUIMNfkgpk+EtSgQx/SSqQ4S9JBTL8JalAhr8kFcjwl6QCGf6SVCDDX5IKZPhLUoEMf0kqkOEvSQUy/CWpQIa/JBXI8JekAg00deCIuBy4E5gNbMjMjzbVliRpahoZ+UfEbOCTwBXAOcC1EXFOE21JkqauqWmfZcB/ZeaOzDwMfB5Y2VBbkqQpamra5zTgJ5Ne7wR+6+gPRcQaYA1AZtJqtdpucDr79ir73P9K6y/Y505pauQ/65dsmzh6Q2auz8yLMvOiep+2HhHx2HT278WHfe7/R2n9tc9tP9rSVPjvBM6Y9Pp0YHdDbUmSpqipaZ9HgV+LiNcCu4BrgOsaakuSNEWNjPwzcxT4IPAAsL3alP/eRFu19Q0e+0Rln/tfaf0F+9wxsyYmXjIVL0nqc/7CV5IKZPhLUoEaW96hEyJiI3AlsDczz+12PU2LiDOATcBrgHFgfWbe2d2qmhUR84GtwDyqv9f7MvO27lbVGfUv5b8P7MrMK7tdT9Mi4n+AA8AYMFpfAt7XImIRsAE4l+py+Bsy87udaLvXR/6fAS7vdhEdNArcmplnA8uBmwpYNuPnwCWZeT5wAXB5RCzvck2dcjPVBRMleWtmXlBC8NfuBL6WmW8EzqeD/797euSfmVsjYkm36+iUzNwD7KmfH4iI7VS/pn6yq4U1KDMngOfql3PqR99fpRARpwO/B3wE+JMul6MGRMQrgYuB9wPUS+Ec7lT7PR3+Jau/9C4EvtflUhpXT388Brwe+GRm9n2fgTuAPwNe0e1COmgC+OeImAD+ITP7/bLPs4CngLsi4nyqv/GbM3O4E433+rRPkSLiZOALwC2Z+Wy362laZo5l5gVUvxRfFhF9fX4nIo6cx3qs27V02IrMXEq1GvBNEXFxtwtq2ACwFPi7zLwQGAbWdqpxw7/HRMQcquC/JzO/2O16Oikz/xfYQv+f51kB/H59AvTzwCURcXd3S2peZu6u/7sX+BLV6sD9bCewc9K/ZO+j+jLoCMO/h0TELODTwPbM/Hi36+mEiHh1fUUEEbEAuBT4j+5W1azM/IvMPD0zl1AtjfKtzFzd5bIaFRGDEfGKI8+BtwHbultVszLzp8BPIuIN9abfpYPn73p6zj8iNgNvAYYiYidwW2Z+urtVNWoF8B7giYh4vN72l5l5fxdratqvAp+t5/1Poloq5J+6XJNm3qnAlyICqlz6XGZ+rbsldcQfA/dExFxgB3B9pxp2eQdJKpDTPpJUIMNfkgpk+EtSgQx/SSqQ4S9JBerpSz2lo0XEm4HbqAY2Y8BfZeZ3ImI/8AOqtYFuAFrApZn54Xq/24Etmbll0rEWUi2z8Ov1fusz87PTqG0R1SJ1Rf04TycmR/7qGxExBKwDrsrMtwBXASP1209k5luBW6nWzDketwEP1sf6HeDH0yxxEbBqmseQZoQjf/WTdwB3H1nvKDMPAD886jOPU60RdDx+OzP/vD7WBNV9BYiIv6FaXvpZ4N1UC+xdmpkfjoj31/tuATYCzwCvBVYCa4DLImIL8AeZ+dTUuyjNDMNf/aQFPAEQEdcBNwIPZ+afTvrMxcB/tttARPwmMJiZF0fEauCPePmVVX+FajmKa4F3Ut2o+8x+X6pBvcFpH/WTPVRfAGTm54DVwFD93nkR8S9UXwgfBQ5R3R3siPm8MEV0LK+jOncA1V22Xs+L7y8wa9LzJzNzHNhFNeUjnTAc+auf3A/cFxGZmft58d/3kTl/ACLiMHBhRBwZAC0F/vqo430nIt6dmffUi+qtoFp/5W31+xcB/w3sp1qDCOA84N/q50d/KTwPzJ5OB6WZ4shffaOeQ18H/GNEfAv4W6p7Hv+yzz5NtTT2VuDbVPcGfuaoj60D3lzP0T8EvC4zHwFGIuLbwHXA31OFfSsi7gdefYwSfwq8KiLui4hXtdlNaUa4sJskFciRvyQVyPCXpAIZ/pJUIMNfkgpk+EtSgQx/SSqQ4S9JBfo/xxl3SqhyAMIAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZoAAAEkCAYAAAAWxvdmAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZgU1bn/P6d6m42dYRmGRQQZWWRXEHEB95ubVUuNMS7RGBO9etWfIcaYXDFEE8PFLRqNUUSNVhSj8brgLqgoq4oCooKsDjszzNLdVXV+f5zqnu6erYfpnvV8nqefma7l9Knq6vrWu5zzCiklGo1Go9FkC6O1O6DRaDSajo0WGo1Go9FkFS00Go1Go8kqWmg0Go1Gk1W00Gg0Go0mq2ih0Wg0Gk1W6bRCI4QYIoSQQojjWrsvnYG6zrf3/kct9Pkt9lmdCSHEJiHETRlo53dCiC8y0SdNyyGEuEgIYTe2XacVGmAL0B/4AEAIUezdjE5sTqNCiOO8doY0u4cdn/7A0231s4QQXwghfped7mSeTF3DTWQy8L8t+Hmadoi/tTvQWkgpHeCb1vp8IUQAsGUnHjErpWyx89+Sn5WKEEIAfilltLX6kC2klLtauw+adoCUMiMv4DjgXaDce30EnOatGwJI4LiUfb4AfpfwXgJXAU8BFcBm4CygG/C41+5XwA8S9om1/UPgFaASWAecAAwAXvTa+gyYXsd+xyV8duJrUwPH+h1glfdZ+4EPgfEJbSa+3vL2eQR4zTu+TYALFAAB4DZgGxDx+vnDlM+TwM+BBd452ALckLJNL+Cf3rGWArOB+cBraX5/xcAzwG6gyjvP/y9hvR+4GfgSCHv9vTth/dXAauAgSsCfBPrXd74TjutHLXGcTf0s4K06vssh3rph3rnaD+wDFgFjEva9CLCBk7zrJAJ8K81z/DtgI1ANfApcnnIcBcA8r79h71q68RCu4U3ALcCdwF7vXN4B+BK2Sefa3ATc1MR2Q8B9wAHv/N0H/AH4IqXtc71rqtprdy6Qn3AdbAHuTNi+D7ADuD3Naz6d660L8Fdgl9eP5cCpGbj/COBB1O8pdi3MAUIJ2/wOdY/8jtdmBfAmcHjCNj2Ax1D3yipgPXAdIBK2Mby2d6F+n08C16AedBOP9RTUPbzK+84fBnql9Hk2sDOhnf9ObafOc53OF5LGF+bzLqq5wHDv9b3YiaVpQvMNcCHqx/wX74t7CfXjHQbc7Z3wXiltfwl8FzgCeBbYjrqxf89b9ox3IQXq6hNKKCTwfaAfUFjPsfZD/ehuAA4DjvQusjHeefi2185kb9ue3n6PAGVe38Z52/uBPwF7gLO9ft6IEqGZKeelFLgMOBz4L2/ZSQnbPA98jrq5jfIukgOkLzTPe+drnHduTgLOS1g/37vALvD6MAX474T1VwMne+dkKvAe8HYdP8jGhCYrx9nUzwJ6om74d3jfYz/v++2Lukbv877DEahrck/smkFdqy6wDJgBDAUK0zjHjwAfA6d65/EclJj9JOGH/hbqpvRdr93jgcuacg0nCMI+YBbq93oOShwvTtgmnWtzE7WFprF2/xd1LX0HKPHOcRkJQuOdw32o6y12nB8DCxK2OR6IAv/pnZtXgKV4v/E0haax6+2f3jGdhvqt34n6/Zc08/5jALcCx3htfBslkv+TIjQVwMvARGAs6sEl8XfVD/glMMG7Zn6EEoHE832tt+wC7zu5FnW/thO2mYG6117lbTMZJWrv4IkW6jdegbo/H4G6B+6nBYWmh3eyT6xnfezLSEdo5iW8L/SW3V3HZ30rpe1rEraZ7C27LmFZ7Ec4uq4+oZ426z2GOtoZUs/64+paj7qJ7AcKEpbloZ5Kf56y7bPAGynn5a6UbdYBf/D+H+5tk3gDCKAu7HSF5qPE7yJl3TCv/bOacE3EztOA+q4B6r75Z+U4m/pZdV2f3rLfAUtTlgnUjeYa7/1FXvvTU7Zr6BwfhrqJl6QsvxlY7f0/02t3Uj1tpHUNe9tuAp5PWfYy8I8mXpubqC00DbWbj7IMLkvZZjnJQrMJ+FnKNsd7x9cjYdlvURbin1G/r8OacI02dr3FrvszU7ZZCfw95bpu0v2nnv78N7Ah5VqzSXhgQFl5LpDTQDt3Aq8mvN8GzE7Z5kmSheYt4LaUbQZ5fR7nvd8K/D5lm6dJQ2gykgwgpdwH/A14RQjxkhBilhBixCE291FCu7sAB/Ukk/hZEZSZXOd+1MRePq5jWep+TeVj1JPTGiHEs0KIq4UQA9Pcd62U8mDC+2FAEPXUkMjbqKf1RFanvN+GeroGGOn9XRpbKVU8YHma/QLljrlRCPGBEOJ2IcTxCesmeH8X1bezEOJEIcQrQogtQohyYIm3anAT+gDZP850P6s+JgMThRAHYy+U22UISggTWZbyvqFzPAklWMtT2r4xod2JwD4p5aEebyoNHX9Trs2mtHs4ynX2Xso2sesFIUQh6rqZm3IuXkroW4zZKAv3WpSbcWMjfWtKX2PXW+o5eIfa56DJ9x8hxGXetVDqHd8fqP172S6T42DbUNdJH68Nw7vfrhZC7Pba+VmsHSFEV6CIhN+Mx/sp7ycD16Sc78+8dcO9dgbQwPfWEBnLOpNSXob6IbyK8k+uEUJc7q12vb8iZbdAHU3VFTBNXSap3fdoyvr6ljXrmKVKIjgDZWouA34AfC6E+FYau1fU12zKe1HHskgd+6QeS+o+aSOlfBh1cd6PytB6SQjxWDr7CiEGoXzRm1BPXJNQrgBQN6umkNXjPITPSsUAXke5vxJfI1BPoDEcKWV1UuMNn+PY5x6b0u5o4KiUPmaKQznXdV2bTWlXJCyrj9i2V5N8LsaiRPeThG37o9w4jve3qRzKNVDXOWjS/UcIcTZwLyoefSbK4rmF2vfEuvoXbwcVj/kVyn17Cuo8/Y2a31065zvW3u3Uvq6HowQ+3XbqbTxjSCnXSCnnSinPAB4CfuqtiilyUWxbIUQflEK2FWJfqK+xDaXiQynlHCnl8ainvIub2g7KNRNGCXMix6MCwekSe/KYGlsghPCjhD9tpJQ7pJQPSyl/DPwEON97klnpbXJqPbtOBnJR7oN3pZTradwyOBQycpxNIELt73E56ml2m5Tyi5RXoxlYDZzjFd4mg+po90tv3QqgpxBiUgP9pY4+HwqZujbrajcCTEtZfmzsHyllKcodOqKOc/FFTMCFEAYqEP4pKmno5gyPi4sd5/Epy6fTvHMQa3OVd79cIaXcgLKKD6Wdl6WUD0kpV0kpvyDBspZSHkDFi6am7Dcl5f1yYFQ95/ug1842an9vqe/rJCPpzUKIYaiA2r9RF0gR6stYCSClrBJCvAvcIIRY533u71EXclthNypgdqoQ4lMg7LnpkhBCHIvylS9CBe+Go544H/I2+RplwZ0phHjKa+dAXR8opawUQtwFzBZC7EKZ8WejgqSnpNtxKeUGIcS/gXs9K3IX6kmnK2k+gQgh7kFZJeuBHFRAeQtQLqUsE0I8DvxFCJGDMrt7AsdKKe8ENnifc5233VhUbCGjZOI4m8hGYJpnsVWiAqj3oATiX0KIW1HnqBhl5f6flDLVtRAnjXP8d+BBIcQNqHOcjxLRQinl7cAbwGLgKSHEtSjXTBFwpJTyb6R5DadDpq7NOtqtEELcD9wqhChFnYufoJICdiZs+mvgISHEfuBfKOvgSOAMKeXlCduMQcUQtnrtPi6EGHeox53S1y+FEP9EXfeXo37bV6CszB82s/n1wE+EEN8B1qCyEr9/iO1cIIQ4CSUEP0YlGCQe/5+B//HuvR8C/4F6aEz8zdwMLBJC/C8q8accdW87G7hSSlnltTPba2cpymtxcjqdzJRFU+F16kmUv/QZlC/vyoRtLkH9CN7ztnsAdaNuE0gpXeAXgIn68a+qZ9MDqKeD51A32L+jUq9ne+2UokzZWajje66Rj/41Ks1xHuop6UeooPXrTTyEi1EX7EuowN42lBuzuoF9EhFeH9agfND5qB917GK8GJXmeSuwFhUUPgxASvkxKlvlcpTVcT0qfTIbNPc4m8JvUan161GiNsj7fqeibuoLvXWPo1xijV3PjZ3jn6Iysn6NOo+vozJ8vgJlSaNuEi+i3G/rUU/0vb316V7D6ZKpazOVWSjxWIC68XVHuZHiSCkXoI7jP7xtlqFck9sg/sB3M3CJlHKrt9v1qISAvzWzf4lciorJPoaKw0xDJSKta2a7f0Ud/8Oo7+kYkl2v6TIb5VF5DvVw0gO4K2WbeagHpDu9z5qCEo34b0ZK+SYqHDAG9TDzMepaLKfGBXin1/b/oh48pqLcfY0iaq5xTUdCCOFDZdA8L6W8rrX7ky06y3FqNJnEs57HSimz5XZOotPODNDR8DKY+qCeWLqgUiWHoNKqOwyd5Tg1mkwhhChCjed5E5U08Z8oF9uVDe2XSbTQdBx8wE2o1M8oyj1zkpTyEy/G8FkD+14upXy8BfqYCeo9zlbtlaZN4cVr6ptE9WspZWMp2h0JBxVrmY2KDX4BXCGlfLClOqBdZ50ALzNrSAOblEopy1uoOxpN1vGyWrvWszoqpfy6JfvT2dFCo9FoNJqs0pnLBGg0Go2mBdBCo9FoNJqsooVGo9FoNFlFC41Go9Foskq7S282TfPvqOkadlqWNbqRbQehplPojkqLnWVZ1ovZ76VGo9FoYrRHi+YR4PQ0t70JsCzLGo+aVfgv2eqURqPRaOqm3Vk0lmW9Y5rmkMRlpmkejporqRA1+eFllmWtQ00aF8ul74aaxVSj0Wg0LUh7tGjq4gHgKsuyJqIm1otZLr8DfmSa5lbURIRXtU73NBqNpvPS7oXGNM0CVC2Lf5qmuRo1K2p/b/V5wCOWZRWjigstME2z3R+zRqPRtCfaneusDgxgv2VZ4+pY9xO8eI5lWe+bppmDmlJ9Zx3bajQajSYLtPune8uyyoCNpmmeDWCapjBNc6y3ejOqSBmmaR6JmlCu0SqIGo1Go8kc7W6uM9M0/wGciLJMSlHFqd4A7kO5zALAk5Zl3WKa5khU4aYCVGLADZZlLWqNfms0Gk1npd0JjUaj0WjaF+3edabRaDSatk17SwbQ5pdGo9EcGqK1Pri9CQ3btx/amMvevXuze/fuDPembaOPuXOgj7lz0JxjLioqynBvmoZ2nWk0Go0mq2ih0Wg0Gk1W0UKj0Wg0mqyihUaj0Wg0WaXdJQPUhZSSiooKGhoTVFVVhW3bLdir1kMIQX5+fmt3Q6PRNJO+407Ft2tP/H0spO8U9qJ0dfsZe94hhKaiooJQKEQgEGjtrrQJwuEwO3bsoEuXLq3dFY1G0wwSRSad5W2VDuE6k1JqkUkgFApRWVnJs88+Szgcbu3uaDSaTk6HEBpNbXw+H9FolE8++aS1u6LRaDo5Wmg6MD6fT1s0Go2m1em0QpO7cCF9jj6a/sXF9Dn6aHIXLsxY23v37uWCCy5g+vTpnHzyyVx66aXs2ZPsU50zZw6DBw9m7ty5Sctd1+Wyyy6L73vuueeyadOmtNvVaDSatkanFJrchQvpdsMN+LdtQ0iJf9s2ut1wQ8bERgjBFVdcweLFi3nttdcYPHgwc+bMia+/4447+Oijj3jvvfdYsmQJ9957b9L+Z599Nm+//TavvfYap512GjfccENa7Wo0mo6FU9irScvbKh0i6yyRrjffTOCzzxrcJrhiBSISSVpmVFXR/brryHviiXr3i44cSdkttzTahx49enDsscfG30+YMIFHH30UgHvvvZcvv/ySBQsWEAwGeeKJJ7jqqqt48MEHueyyyzAMg1NPPTW+78SJE/nb3/7WaLsajabjkZjC3J7nd+twQpMWKSLT6PJm4Loujz76aFw8fvGLXyStz8nJ4cEHH6x3/4cffphTTjml0XY1Go2mrdLhhCYdi6PP0Ufj37at1nJnwAD2PP10Rvtz0003kZ+fz8UXX9zkfe+77z42bNjAP//5z4y2q9FoNC1Jp4zRlM+ahZubm7TMzc2lfNasjH7OLbfcwsaNG7nvvvswjKad6ocffphnn32WBQsWkJvS1+a0q9FoNC1Nh7No0qHq+98HoMttt+Hbvh2nqIjyWbPiyzPBbbfdxscff8yCBQsIhUJN2vexxx7jsccew7IsevTokbF2NRqNpjUQDc0P1gaRdRU+Ky8vb1PTraxfv54ZM2YwdOhQcnJyABg0aBAPPfRQo/sePHiQkpISiouL48cUCoV44YUXmtTuxo0b+fzzzykoKGDatGkZPLq2TXsOmB4q+pg7BxkofKYrbHYkRowYwbY6YkDpUFBQwNatWzPerkaj0bQW2sGv0Wg0mqyihUaj0Wg0WUULjUaj0WiyihYajUaj0WQVLTQaTTtFvBskMLQ/ZSH1V7wbbO0uaTR1ooVGo2mHiHeD+C/siQirjFURFuq9FhtNG0SnN2s07RD/Bb3iIhNDVBn4zV7Q14U8icx3IV9CvkTmqb/kucnvvWUyXyZsq/aPr9d3CU0z6ZSXUI+upxLwf1predQexb6yRXXs0TT27t3L1VdfzaZNmwiFQgwZMoTbb7+dXr1qpvaeM2cOf/3rX7n66qu59tpr48td1+Xyyy9n3bp1hEIhevfuzW233caQIUMAuOSSS9i8eTOGYZCfn8/s2bMZPXp0s/usaV+kikx8OQJnZjVUCESFARUC9hgYmwVUCqg04KBA2OmP3ZOhBIHKlwki5AlUboJI5deIlMxvQNxyZCsOH9S0NJ1SaKL2RPy+DQhRM1uzlEGi9qSMtB+rGxOb0n/27NnMmTOHP//5z0ByPZqrrrqKUCiUNKvz2Wefzcknn4xhGDz88MPccMMNWJYFwLx58+jatSsAr7zyCtdddx2vvPJKRvqtaT/IkKxTbGRI4vzpQOMNRFAiVGl4oiS89wLhLYu9RKUAT7TU/977vX6MysTt0vfES5FoUSVbX+TGRKp+68suEohowBM+V1tfbZwO97UU5N2M39dwPRr1K4umLLPx+9bQvctZ9e5lOyM5WNl69WiAuMgAlJWV6Uk1Oyn2gj0qJlNV8/3LXBd7/t70GggCQQk9HLVvwqpDnpTKBaoShCdByKiMvVcilShsSdbXXrUuvm891lclEKCw1vJ6ra+YezCvEesr101+3wasL/FuEP8FvSgLCwKh/tgL9iCnZb6kSTbpcEKTHkFctw+GsRMhJFIKXLcQ9evLLNmoR3P99dfz9ttvI6Xk8ccfz3ifNW0fOS2CPX9vPFYjQxJ7/t7WvQEZ1NycqS1YhyxgdVhf3fzdObCjLD3rq1Jk3vrKS3zfsPWVJGZNtL7qS/po9e+6iXQ4oUnH4gAwRCm9uk8FwkCIfWUv48o+Ge9PNurR3HHHHQA8/fTT3HrrrSxYsCAjfdW0L+S0CNGvdnT8CSbrsL78vSVydziz1leVkWKNNWB9VQrYZyC2Nm591Ued1ldeQuwrT2I8mYdw6kj6uKAX0a92HOrRtzgdTmjSxZV9qQqfQ25oAVXhc7IiMrG6MY888sgh16N56qmnatWjiXHWWWfxy1/+kr1799KzZ89MdFmj6Ry0lPWVakU1ZH3Ftt1XY32likyM+pJB2iqdVmgAKquuwe/7nMqqazLedjbq0VRUVLB//34GDBgAwKJFi+jevXutmjUajaaVyHDsKzC0f71JH+2JTi00ruzL/vJnMt7u+vXrufvuuxk6dCjf/va3gabVo5k1axbFxcWce+65QE09msrKSi6//HKqqqowDIPu3bvzyCOPIET7errRaDTp0eykjzZCpxaabJGtejSFhYW88MILzemaRqNpR7TJpI9DQAuNRqPRtGE6QtJHVoTGNM2BwKNAP1R+xwOWZd2Zso0A7gTORKXFX2RZ1sps9Eej0Wg0rUe2RvvZwHWWZR0JTAF+YZrmyJRtzgCGe6+fAvdlqS8ajUajaUWyIjSWZe2IWSeWZZUDa4EBKZt9B3jUsixpWdZSoLtpmv2z0R+NRqPRtB5Zj9GYpjkEGA98kLJqALAl4f1Wb1nSKCTTNH+KsniwLIvevXvX+oyqqqrMdbiDYBgGhmFQUFBQ5znrqPj9/k5xvEfP9HPMJMmN1zqd5pgT0cfcvsiq0JimWQA8A1xjWVZZyuq6cnJrJYdblvUA8EBsfV3BMNu2m9nTjofruriuy8GDB9ttAPFQaM8B06bw0Zoi1q6XzP+HwYXnufzsor30LXRbu1stRmf5nhNpzjEXFRVluDdNI2szMpqmGUCJzOOWZS2sY5OtwMCE98XA9mz1py52GqWc1+sH7DJ2ZrTdvXv3csEFFzB9+nROPvlkLr30Uvbs2ZO0zZw5cxg8eDBz585NWu66Lpdddll833PPPZdNmzbV+oy5c+cyYMAA1q1bl9G+a9oPkaggHBb87VGDyTP7Yl7ck5deD7Gj1MDtPJqjaQdkK+tMAA8Bay3LmlvPZs8DV5qm+SRwDHDAsqwWnbznnoJ5LAt+wN0F87ilbE7G2s1mmQCATz75hJUrV8ZnCNB0fCIRWPlxkHfeC7F4afJME66rnAPvfpjDux/mABAKSgYW2wwqdhhcbDNwgMPggQ6DvGUF+e1rZLmmfZMt19k04ALgE9M0V3vLbgQGAViWdT/wIiq1+QtUenPTZ52sg9ldb2ZtoLEyARAhwurgSqSQPJG/gM8Cawg2MnvzkdGR/KasdcsEhMNhbrzxRu69917OPvvsRvuiaZ9ICWs/97P4/RBLloZ4f3mQqioDw5CMG5Nc4iIYlBgCTj+5ilNPrOZAucHmLX6+3upj81Yfy1bmUX4w2XnRq6cTF6FBxQ6DB8ZEyaFfXwefryWPVtPRyYrQWJa1hEYqOFiWJYFfNLRNNtnmSxx9L9nm28phztCMf06mywTccccd/OAHP2DQoEEZ76umddm2w2DJ0hCL31dWy+496m5/+GFRzv1eJdOnRpgyKUy3rpIBo4oIBiSGgRej2UWfemI0UsL+A4LNWz3x2eJn81YfX2/1s/LjIP9+xYeTMHljwC8pHlBbhGLWUNcu2hrSNI0ONzNAOhbHTqOUE/tOVXUmUPUmynwHuHP3Xyh0MzuLcybLBCxfvpzVq1dz4403ZrSPmtbhQJng/WVKWN55P8RXm9TPsbCXw/FTwxw3Rb0G9K8tIKNKIkwaF+Ganx1k5JE92b27/qCMENCju6RH9yhjR6cW/INoFLZ/40sSoc3b/Gze4mP1miD7DyRbQz26K8tnUMwVN8Bh0ECbwcUORf0c/B3urqJpLp3ykrinYB5uSoKbg5vxWE2mywQsXbqUL7/8kilTpgCwY8cOzj//fObOncsJJ5yQsX5rskM4Ais/qomzfLQmgOsK8nJdpkyOcIFZwfSpYUqG2zQ2T+qiZzKXcRUIwOCBKobD1NpzaB0oE2zZ5uPrBEto8xYfH38a4MVXc7ATarD4fJLiIiduASlB8v4W23Tvpq2hzkinFJqVwRVERfIPKioirAwuz9hnZKNMwJVXXsmVV14Zf3/MMccwf/58SkpKMtZvTeZwXS/OsjTEkvdDLF2h4iw+n4qz/NdPD3L8sWHGj4kQzHxx14zRraukW1eb0UfWHkbgOLCj1MfXW3wJrjn1/0uv5bB3ny+lLTfughtcbDNooBMXoQH9HQKBljoqTUvSKYXmhd2Lstp+tsoEaNo+27b7WLw0qIL4H9TEWYYNjXLe9yuZPiXMlMmRDhPn8PmguMihuMhh2jG1raGDFUK54rb642K0eauPdRv8vPpmDpFojTVkGJIB/R0vQ85OSVZw6NHdbdTS07RNOqXQZJtslQlI5YMPUidb0LQ0+w+oOEvMHbbxa/WT6tPb4fhjw0z34ixF/TrnwJaCfMnIETYjR9S2hlwXvtlp1BKhr7f6ee2tHHbt8aW05caTE0YM91HYKy/umisucgi1Yauws6OFRqNpAuEILF8VjLvDPvq0Js4y9egIF55bwfHHhjni8MbjLJ0dw4Cifi5F/SJMmVR7fWWlYPM25Yr7OkGEvtzo580lBtXV3ePbCiHp3zc2Vqh2fKhXT20NtSZaaDSaBnBd+Gy9P552vHRFkOpqFWeZcJTK+po+Jcy4Nh5naY/k5UlKhtuUDLeBcNK6nj17s3b93qTxQrFkhbffDfHNzrzktnJdBg90GDigxhUXE6PiATa5OS14YJ0QLTQaTQpbt/viKcdLlgbjAe0jDo9y/lmVHDclzNTJEboUdIw4S3vEMKBvoUvfwgiTJ9ReX1UNW7fFRCjmmlP/L14aoqoqOQu0X5+acUKJg1cHDbTp01tbQ81FC42m07P/gODdD1SMZfH7ITZtVj+LvoUOJ01XcZbpU8P069M54yztkdwcGH64zfDDa1tDUsKevUZ83FBMjDZv9fHeh0Ge+XcuUtYoS06Oq8YKpQxeHTzQYdAAh9xc/cDRGFpoNJ2O6nDtOIuUgvw8l6mTI1xyfgXTp4QZruMsHRIhoHcvl969XCaOrT2ANRxRVm2qCH29xc/S5UEOViRbQ316OwxMyJCLi1CxTb8+Lk0cQtch0UKj6fDE4iyL31cWywcrVZzF71dxlmt/Xs70KRHGjYnocRwaQkE4fIjD4UOcWuukhH37jbgrLpaksHmrn2WrgvzrRV98klOAYEBNbqrGCiWLUDqTm576g95MHBvhmisO0k5L0QBaaDQdlC3bkuMs+/arOMuIYSrOMn2qirPoWYw1TUEI6NnDpWcPl/FH1T2dz7YdyYNXY2K0fHWQsvKmTW766bogG74M8NS/8r057Yx2WXdICw1QagjmFQRZEfSzaHdFs9vbu3cvV199NZs2bSIUCjFkyBBuv/12evXqFd9mzpw5/PWvf+Xqq6/m2muvjS93XZfLL7+cdevWEQqF6N27N7fddhtDhgwB1GwAoVAoPtvAr3/9a0488cRm97m9s2+/irMsW+3jtTf7sGmLurT79XGYeXyY448NM+0YHWfRZJdAAIYMchgyqLY1BLUnN4255lZ9UvfkpkB8UOvfHzOY/4++nPPdCq654mC7EpxOLTQxgXkqP4gEIhlyyGe7Hs0DDzzQ6aedqQ7DslVBlnjusI8/U3GWLgWSqZOr+ckFKs4ybKiOs2jaDt27Sbp3i3LUqNrWkCQnSHkAACAASURBVG2ryU3jMyls9XHPg10S1gtsGxZY+Xz+ZYBn5u+p1UZbpcMJzc1dQ3wWaLiYRgTY6jPY6VN3IJlwJzqrV149e8HIqMMtZeF618fIZj2azorrwqfrAnF32LKVQarDAr9fMnFshOt+Xs5xU8OcfGI3DhzY19rd1WiajN+PF8dxUHcpkoQmVnfonO9VcM3PDrZSLw+NDic06fB5wKBcCFriUTfT9WiA+MSakydPZtasWXTr1i3DvW4bfL3FF6/N8u4HNXGWkuFRfmSqEfhTJkbIT4iz6GC+pqORbt2htkyHE5p0LI6dCS4zl2SX2dN7KjPan0zWowFYuHAhAwYMIBwO89vf/pabbrqJu+++O5NdbjX2enGWWFXJr2Nxlr4Op5yoxrJMOybcrnzTGk1zaErdobZMhxOadOjjSuaUhbnmYKRewckEma5HAzBgwABAzeh84YUXHpKAtRWqqmviLO+8H2LN2licxeXYo8NcdsFBpk+NcPhhOs6i6Zxksu5Qa9IphSZGquAsD2budGSjHk1lZSW2bdO1a1eklDz33HOMGjUqY33ONo6j4iyxmY6XrQwSjggCfsnEcRGu+0U506eGGTc6qqs0ajQdCP1zpkZwUqeqOFSyVY9m165dXHbZZbiui+M4DB8+nDlzMlcRNBts2uyLT+3y7geheFngI4+IcuF5qqLkMROS4ywajaZjoYUmC2SrHs3gwYNZtCi7Rduay959Bks+8NKOl4bYvFVdYv37OZw2o5rpU1ScpT0GNDUazaGhhUbTLKqqYdnKWOGvIGvWqrnyu3ZRcZbLLzzI9GPDDB3s6DiLRtNJ0UKjaRKOA598Foi7w5avqomzTBof4Yb/KmP61DBHjdRxFo1Go9C3Ak2DSJkcZ3nvgxD7y1ScZeSIKBf9UI3AP2ZihLw8HWdpLTI9jZJGk0m00GhqsWdvTZzlnfdDbN2uLpOifjann1zF9CkRph0TprC3jrO0NqWG4H98NvP7FmR0GiWNJpNoodFQVSX4YEUwbrV8uk4Nr+/axWXaMWGuuOQgxx8b5rBBOs7S2kSBXYZgXcDg7/lBFof8SFwc/cVo2jBaaDooTt2Tx8bXffxpcpwlEhUEAyrO8sury5g+JcwYHWdpMSLALp+g1DAo9QlKDcFOn+H9FXzjM9hpCPYYImluvrq4vUuI8RGH8VGHQle7MzWtT6e8jSQWE8rGdCbZLBNQXV3N7373OxYvXkxOTg4TJ07kj3/8Y60+7Cj18dJrBqfOEEgJGzerecOWvB/i3Q9DHPDiLKNKolzyIxVnOXqCjrNkmjCwM0FAdhqCUl/s/xpR2eurPXOEISWFrqSvIylyXMZHJH1dSR/HJceVvJrr57WcAC6CaIL23FsQjFs4xbbL+KjDuIjDhKjDmKiDrjysaWk6pdAkFhPKRm2HbJYJ+P3vf08oFGLJkiUIIdi1a1edfZASVn0seO/Drlxzcz8qKtWNbEB/mzNPqWL6lDDHTYnQq6eOsxwKVSgB2ekz+MazPnYagm8SBcQn2F/H1EM+T0D6OZKBtsskV9LXcenjiUpf16WvI+nlShqah/zsapudRpj7C7sx33Di0yit/6acNQEfKwM+VgV9rA74+HduIP7ZJbbL+IjDhIjDuKjDcNtFVxvWZJMOJzQ3/6Ern61vfArfWDGhBVY+C6x8+vR2KC5yCAbr32fkiCi3/Kqs0bazVSagoqKCp59+muXLlyO8J9bCwsJ6++G64EhBRaXgsEE2C+7fwxAdZ2mQSkGS26o0UUDiriyDA0btkxiQkkJPKA6zXaZElPXR1xOQPo5LP1fS05UZu7H3cSV3OX5+tutAfBqlXAmTIw6TIw54CWi7DMFqT3hWBXw8nxvgsXx1sRe4krFRJ+5uGx9x6KtdbpoM0uGEpqlIqW4Ypbt8VFYZjD6ydkGi5pDJMgGbNm2iR48ezJ07l/fee4/8/HxuuOEGjj766Dr39RkQ9EsuPFfVr+jMo/ErBHxjqBpESjiMOl1a5XUISFBK+ngCMtx2OS7s0MezOvq6Ln0cST9X0j2DAtJUGptGqdCVnBK2OSVsA+ACX/kMJTye1XN/QRDbewrp7yRbPUdFHfQsQZpDpcMJTToWx4BRRfH/Y7UeYsWEMn0zzmSZAMdx+Prrrxk9ejS/+c1vWLlyJRdddBHvvvsuXbp0SdpXCBh3lOTUGWWceUbj56Q9IoGDAhXz8ILm5YbDxq4hSj1RKfXEpKIOAcmJCYjjMsJ2OSFs08eVnnC48XXdJXQ0I9AAhjkuw6pczq5SD1fVwJqAj9We1bMq6ONFz+VmSMkIz+UWs3qOsN0GXXsaTYwOJzTpkm2BgcyXCSguLsbv9/Pd734XUC65nj178tVXXzF27Nik/fv3dTjjZJeCgvb3GCqBMkFS/CM1EysmIFW1BMQhNy/ouatcRkVdTnJs+rk1ohJb17UDCkhzyAEmRR0mRWtSFvcaIi46qwNKeJ7wXG55KS63cRGHIu1y09RBpxSaxGJC2XInZaNMQM+ePTn22GN55513OOGEE/jyyy/ZvXt3PCMtEV8bfNSUwH5PQErrysRKEJXqOgJJea5yVfVzJGMjLn1cm36OTHBjSY7s3oPI7j1aQDJET1cyM2wz03O5SWCjz2B10PAEyM/fCoLxgaL9PJfbOE+AxkYd2uGzjibDCCnb1VUgt2/fXmtheXl5LddRa7J+/XpmzJjB0KFDycnJAZpWJqCkpITi4uL4McXKBAB8/fXXXHfddezbtw+/388vf/lLZsyYUaudjRs38vnnn1NQUMC0adMyeHS1kcA+QyRZGqUp2Vc7PVdWuA4B6eKmBM0T4h8qiK7+T+eG1bt3b3bv7hjFotKltY85DHwWMOKJBquCPjb61ZOOkJIjElxu4yIOJbbb7Cfc1j7m1qA5x1xUVAStaMBnxaIxTfPvwLeAnZZlja5j/YnAc8BGb9FCy7JuyUZfWoNslQkAVSrg6aefTru9AwJ+1TV0SHNguSjXSV3ZV4mpvDt9gmgdAtItQUAmR+xa2VexdXroTvsmBIyPuoyPuqi5C2CfgNWeu21l0McrOX6e9Fxuua7kqGiN1TMh6lDkSG2FdmCy5Tp7BLgHeLSBbRZblvWtLH1+p8cBXjJclnbNoTQ/mDQHlgPsMVKzr2rEJDagcJch4llIiXSPWRqu5PCIHXdb9UmIf/RxJLm19tR0FnpIOCnscFJYxXsksNknWBVU43tWB308kh/krwVemr6TbPWMizp01Q8gHYasCI1lWe+YpjkkG21r0mOHz2CVkESFiIvMGb3z2ekJSF1zY/X0hKKPIznCdlT2lTeQMJaJVehIclr6YDTtHgEMdiSDq2y+W6XiPRFgbcCIu9tWBXws8rLchJQMs90kq6ck6tL4CDlNW6Q1kwGmmqb5EbAduN6yrE9bsS8dDolyfSXSy5WMjDreCPQaUYkJSANjVTWajBMExkZdxkZdLqpULrcDAj5KsHreDPn5Z566MnOkZLQnPMcbDsN8goHa5dYuaC2hWQkMtizroGmaZwL/AobXtaFpmj8FfgpgWRa9e/eutU1VVVUWu9o+EVBrjMPLRh4dfa4Rv99f5zXSkelIx9wbOBz4vvdeOpKvHVhmuCwTkmV+gwUBPw8KB/p2oVDCZCmY7BpMloJJUtCjg0pPe/6es5Z15rnOXqgrGaCObTcBkyzLaiylol1knbUFNmzcyL1frOOD/n345pQTiQjBtu0dc+BmIjobqeMTBUoLe/JGRXl8cOkGvxGf1Xqo7STNajAy6nYIa11nnTUR0zT7AaWWZUnTNI9GPWfvaY2+dFT8wBmuwall1SypiLA82CmHTGk6IAFgnDQorozyY8/lVibgo4RZDRaH/DzjudxCUjIqZS63wdrl1qJkK735H8CJQG/TNLcCv0VdH1iWdT9wFnCFaZo2aiLccy3L0jkmWaCbpME5sDSajkBXCdMjDtMjNVlu2w2RNJfbE3lBHvJmkujhuHHRGRd1GRdx6Nm+xhS2K5osNKZpisZEwbKs8xpZfw8q/bnVEO8G8V/QCxEWyJDEXrAHOS2SkbazVY9my5YtXHLJJfFty8rKOHjwIJ9+qvMoNJpEBDDAlQyotvlWtcpys4H1fiNpLrc3u/jjLrchtsP4SI0AjYo6NG1OD019HIpFMxf470x3pCUR7wbxX9gTEVYXmAgL/Bf2xJ6/NyNik616NAMHDuTVV1+Nb3fzzTfjNFRKU6PRxPEDo2yXUbbL+d7A0oMCPk6Yy+39kI9n81QSdUBKRkWTp9QZ6rja5XYINFloLMtq0yLju7kr4rOGs+3F0iBCJl8uosrAf04v5JT6hUaOjOLc0nr1aBKJRCI8++yzPPHEE432R6PR1E2BhGMjDsdGah7YdhgibvWsDPqw8gI8bKh4T3dXxgeUxmI+vfREoo3SqNCYpvmQZVk/8f4XwIOWZV2a9Z5lkVSRSVye6Usmk/VoElm0aBH9+vVjzJgxme2wRtPJ6e9K+lfbnOG53Bxggz95Lre7QkFcz+U2KGUut9FRR8+KkUI6Fs3Q2D9eltjhWexPs0nH4ggM7R93myUiQxL76cwmv2WyHk0iTz31FOeee24muqjRaBrAB5TYLiW2y3mey63Sc7nF5nJbFvTxnOdy80vJyGjyrAZDO3m57HSEZrdpmpcC7wFT6QBpyPaCPSpGU1Xz1ctcF3v+3ox+Tqbr0cT45ptveP/997nzzjsz2V2NRpMmeRKmRBymJJTLLvVcbrFZDZ7NDfCoN5Fo18TaPZ71U9iJXG7pCM2FqJH5vwDWAz/Oao9aADktgj1/b3LWWYYSAWJkox5NDMuymDlzJj179qy3DQeHx41HOU+c3+S+azSaptPXlZxWbXNadU257C/8yXO53VsQjM8zWJwyl9uYqENuI9pTagjmFQQPaTb21iQdoQkD36BclfcB44Fl2exUSyCnRYh+tSMrba9fv567776boUOH8u1vfxtoWj2aWbNmUVxcHHeNJdajASU0s2fPbrCdcqOMrWILr+Ys4gzObMbRaDSaQ8EAjrBdjrBdzvHKZVcJVS57ZUKm2wveRKI+KSmJxXs8q2eYVy671BD8j89mft8CJCTNxt4eSEdoHgPeBs6zLOtu0zT/AJyc3W61b7JZjwZgyZIljbZTISqQSD4MvseTuY/Rzy0iX+aT5+aTJ3PJk/neKw+frvyu0bQIuRImRxwmJ7jcdhmC1QlWz/O5AR5LKJfdxZXs9gkEbp1lO9oD6QhNoWVZ95umaWa9N5qMIb38OUc4/LrHLxvcNsfNiYtOnswn3/urRCkvYblali/zyE1alke+zE9aliNzEHrEgUbTKIWu5JSwzSnhGpfbVz6V5faHriFKfQLaqcDESEdodpqmeQ6Qa5rm94Ds+Js0WSMog9y750FChKg0KqkUFVSICipFBVWiigqjImGZWl9pVLLHv5tKUVmzrZH+LNlCCiVCMp9cmZtgTdUIVX6CmNVsm0e+myBinpDFxC7YIaZH1GjqxwCGOS7DqlxOCNvMKwjyVH4QF0GknepNOkJzCXApamr/YuCyrPZIk3Ek8FbOG9xSNqdZ7bi4VImqJKFSwlWZLFSG+r9KVFIRFy617ICxnx1im9rWUNtHRPpJGAEZqCU+eQmi1dPXC6OrSLLA6rLKYvvkeuu0+1DTFunjSuaUhbnmYIT7C7sx33Bw6ZgxmhLLsu4xTbMPcBEwBFiXzU5pMktURFgZXN7sdgwM8r2bdWEG+hUjStQTJSVccRGLC1fissq4mClxU0JWanxDpb+SaqOa8rxyKkUFrkgt/VY/MfdhTHwS3Yex/xOtsNrLaqyymPi1hPtwp1HKBf5zmGvcTaHbJ6ufpWk9+riSuxw/P9t1gHkFwXY3G3s6vf0zMBO4BZUU8DBqPI2mDVPsDGSWcxMF+wuYtn1aa3enQQIECMhudJXdapcFbSKxmh0SSZhqKo0UC0xUUGFU1gibJ14VRkXCMs9CMyrY7d+dtO2hug/jVlWC+zA1DlbjKqxJ2Eh0H8ZEMNF9eE/BPJaK97i7YF6zLVZN2ydm4bS32djTEZo80zRDQMiyrH+YpvmzbHdKo2kuAkEOueS4ufSk/vFGTcXBibsPE+NZdVlcsWUVQglbbNl+Yx/bxdYEy+zQ3IchN8Qu306kkDyZ/ziTI0dzQvgkJdgaTRsi3fTm54DfmqaZA2zMbpeyT99xp+LbVXuCA6ewF6WrFzW7/WyVCQB49dVX+dOf/oSUEikl1157LWeeqcfJtBQ+fBTIAgpkQUbbjRCJW05VRlWyBZYgSHELTFSwJPQOu9gJgIPNNT3VPHrDo0cwPjKR8dGJjI9M5HB7GEanngBF09pkrZRzlshIKeeiARPrXbd924pD6lgi+/btY+3atUllAvbv359UJmDZsmXMnTuXq666ipkzZ8Yn23Rdl9deey2pTMBLL72EZVlIKRk1ahQLFy6kpKSEzz77jO9+97usW7eu1hQ3Gzdu5PPPP6egoIBp09q26yyTdJayxjuNUk7sO5WwqHGhBGSQSw5eyvrAOlYHV7Lf2A9AV7cbYyPjmeCJz7jIeLrIrq3V9YzQWb7nRHQp5zZE15vvIPDZ54e8f6+zflrvuujIIyi75fpG28hmmQAhBOXl5YAqfNanT58mz6Omaf/cUzAPt465xg+KCh7auwCJZKPvS1YGV7AquIJVwZXc1WUuUkiEFAy3RzA+MjEuPofZQ7XVo8ka9QqNaZo9LMva15Kd6YhkskyAEIL777+fiy++mLy8PCoqKpg/f372Oq9ps6wMriCaEtdJzC4UCIY6wxhaNYyzqs4BoFyU83FglSc+K3k59/94Kv9xALq53RkXmcD4yAQmRCZyVHQ8XWT6XgKNpiEasmj+YJpmD2ADsAh4z7Isu2W6deikY3E05Drb8/QDmexORssE2LbNPffcw8MPP8zkyZNZtmwZV1xxBW+99Rb5+fkZ7bembfPC7ppYYroulS6yC9MixzMtcjygxkV95f+SVYEVccvnnS5vxq2eEXaJivV4ls8QZ6ie7UFzSNQrNJZl/QzANM3hwCnAT73CZx8ACy3LanhCLk3GywR8+umnlJaWMnnyZAAmT55MXl4eGzZsYNy4cRnvv6ZjY2AwzB7OMHs4Z1epCVzLRRmrg6vi4vNC7vP8I/8xAHo4PRgXnRAXn7HR8eRL/YCjaZxGYzSWZW1AWTV/MU3TBxwD9AfardA4hb3qzTrLFNkoE9C/f3927NjBF198wbBhw9iwYQO7du1i8ODBGeu3pnPTRXZlevgEpodPAJTV86X/C1YFV7AysJxVwRW82fV1AAxpxK2eCZFJjI9MZLAzRFs9mlp0yqyzbLN+/XpmzJjB0KFDycnJAZpWJqCkpITi4uL4MSWWCVi4cCH33nsvwpuC4vrrr+f000+v1Y7OOus8tPQxHxD7ldUTXMGqwApWB1dx0FAJKj2dnvG06vGRiRwVHUeezMt4H/T33DRaO+tMC00HRQtN56G1j9nB4Qv/BmX1BJezKrCCrwJfAuCTPkqiR8bFZ0JkEgOdQc22elr7mFuD9iw0jbrOTNM83bKsl03THAb8N/CUZVnvZL9rGo2mPeDDxwi7hBF2CedWqoqu+8U+VgVXeqnVK3g292key1cZkr2c3kp0PPEZEx1Lrsxt6CM07Zx0xtFcD7wM3Ag8CNwFTM5mpzQaTfumu+zBSeGZnBSeCSirZ4N/fcK4nhW8lvsKAH7ppyQ6Mkl8ip2BOtbTgUhHaLqYpjkIcCzLet80zfZTqLoT085copoOjg8fJfZISuyR/LDyAgD2GntZnZBa/UzeUywwHgag0OlTk1odncjoyBhy0FZPeyUdofkDcCtwqzfX2dLsdqnpxOb9Eu2sRkO2kFKC3ICQt9I1v5I+Pb8hao9iX1nz53HTaDJFT7cnM8KnMCOsBiTb2HzuX+fFepT4LMp9CVBWz8jo6Lj4zORkcsnTVk87IR2hOQyYZVlWLAo/K4v9OSRycnIoKyujWzc9ay3AgQM7+Oab55BSVYCVMkjUntTa3dJoGsSPn5H2aEbaozm/8kIA9hh7WB1YERcfK+8J5heo7M0+ffsmDSgdHR1DiJzWPARNPaQjNF8BfzRNsxvwb+Bpy7L2ZrdbTSMYDLJr1y527dqFz1d3pUTDMHDdxoudRIG9PkEEQYGUdHNlnTNASSQHxUHKjTJcXPJkHl3dbvhbefo4KW3Ky17mYPknhMMGAwdWAAaVVde0ar80mkOhl9uLmeFTmRlWUzjZ2KwLrGVDj3UsjrzDquAKXsl9EVDlE0YlWD3jIxMpcge0Zvc1HmmnN5um2RO4HzgdeAW407KsJVnsW13Umd4cY82aNXz11Vc4jlNrXV5eHpWVlfU3DKwK+lgc8hOUklOrbQ63awuTi8tngTV8GHqfg+Igg+3DODZ8XBuobhjF59uGz9iI37eGQCDMEcMPMGH8PqQUOM5wbHeY+uscgeMMw3YOhw7m99Zpr52DxGPebexSGW6e5fNxYDXVRjUA/Zx+CVbPJEZGRxOiaQOo2wodPb35DOBcoAfKovkpqsPPAcdntXdNZPTo0YwePbrOdQ19SdsNwbU9clkc8nNydZQ79ldT6CYLsIvLizn/5n+7/plN/o2Mj0zkprI/cUyk9YqNGsbXhAJvEgy8TjDwHkJUI2UOkegkgoGlCGEjZYCq6h/h823H71tLKPAywitxLKXAdQdhO8OxneGeCKm/Ej0uSdM+6O0Wckr1aZxSfRqgSoOvDXwWH1C6KriCl3L/D4CgDDIqOkbNWu29+rn9W7P7nYJ0/DxjgBsty9qWuNA0zcuy06WW5bkcP7/qnksU+OP+Kn5YGU2SfYlkceht7ujyBz4NruGIaAl/3fMwM8OntEIgMkzA/wGh4BsEA2/g96lBcbYzhKrw+USiM4hEjwFyKcj7Fbk5j1EV/iEHq25NasPn24jf9zk+4wv8vs/x+74gGHgHkTAbsOP2w3GOSBGhI5Ayc9UqNZpsECDAUdGxHBUdy4VcAsAuY2dNanVgBY/lP8pDBWoC3f52EeOjE+PiMzI6Oqlctqb5NOo6M01zMPAroAC4ELjYsqy/NbhT9mjQddYQqRbNfgE3dcvl2bwAEyI2d+2r4jAn+VysCqzgT13/wAeh9ym2B3JN+fV8u+p7+Kg7DpQNDGMbwcAbhAJvEggsxhCVSBkiEp1KJHoSkegMHHdo7f1EKb16/Bd79t2NK9Nx69n4jM34fBvw+zbg8wTI59uAIWpcjq7bM8H1dkRchFzZj1a0zON0djdSZ6G5xxwhwrrAZ6wILo+Lz3a/epYOyhCjo2OYEJkUF58+bt9Mdf2Q6dCuM+Ah4ErgL5ZlOaZpnge0ltBkhCVBH9f0yGWnIbi+rJqrDkaSTsR6/zrmdvkjr+W+Qi+nN7/dfyvnVp7fQk85UQL+ZQQDbxIKvIHfvw4AxymmOnyWZ7VMAxqeP8qVfZHG67gy3QvTj+MOxXGHEomeltgShrHDs4BiIrSBUPAFcr0KjgCu2wXHHeYJzxHYnhC5bjG0oDBrNOkQJMhR0XEcFR3HxRWXAlBqfJOUWv1o/sP8reB+AAbYxfHy2BMiEzkyOooAgdY8hHZFOkLjsyxrnWmasffttgzfZkNwSc9c1gb9HB51eH5vJeOiNQH/rb4tzOtyB//KfYZ8WcC1ZTdwUcWlWZ8K3RDfEAy8RTD4OkH/YgyjHCkDRO2jKa/8DZHITBx3GK3zQGLgugOIuAOAkxKWS4TY7bneNsQtoWDgLXwhq2YrmYPtHJ4U/7GdI3DcIaB/qJo2RF+3H6dX/wenV/8HAGHCrA18Ghef5aEPeSHvOQBCMocxkaPUrNWe+PR2C1uz+22adFxnvwEGACeiss1KLcuak/2u1ckhuc5KDcHvC7uy0HCR3qDOL3aUkesd+m5jF/cW3MU/8hdg4OPHFRdzefnP6ZG1eIRNwL/KC+K/QcD/KaDiIpHITMLRGUSjxyEpaNantJZLRYj9Sni8GJDPF/tbU1lCSj+Oe5gnPsM8d9zwZmfCaTdS56C1jnmHsT0+hc6q4Ao+Dawh4sU2B9qDalKroxMpiR6ZUaunQ7vOLMuabZrmaOB1YD2wubF9TNP8O/AtYKdlWbXSwLwCancCZwKVwEWWZa1sYt8bpdQQzCsI8o/8IFG80YseuVIVeXqw4H4ezn+QsAhzduW5XFX+31nJQhFit5ch9gbBwDsYxn6k9BG1J3Ow8kbC0ZNwnCNpC3GO5iJld2x7MnatKfEq8fu+9OI/G7zXekKBVxDC8fYVuO7AhCQE5YZz3OFI2bXlD0ajSaC/W0T/6iLOrP5PQFk9nwY+iQvPh6H3eT7vWQBy3ByOio6LC8/4yER6u71bs/utRr1CY5rmP4HzLcuKWJa1BlhjmuYIVFnnoxtp9xHgHuDRetafAQz3XscA93l/M8rPe+TyQdAXt2ISeTD/fu7vcjf7jf2cWfWfXFv2/zjMOTyDn+7g933kZYi9ScC/Wi11CwlHTyMSOYmIfTxSdqbZDPKwnTHYzhjCScvD+Hyb8BuJ1s8G8gJLEKJmS5UJN7yOTLjMFazTaJpCiBATopOYEJ0E3iyQ241tqlxCcAWrgiv5e8EDREUUgEH2ECZEYlVKJzHCLmn1Qd4tQUNH+CTwommaZ1mWtd80zdOA2cCPG2vUsqx3TNMc0sAm3wEetSxLAktN0+xummZ/y7J2NKXzjXHfvirmFQR5Mj9ABBspaoL5t3WbzfTqE7m+fBajo2My8nlC7CUYeIdQ4HWCgbcwjL1IaWDb4zlY+f+IRGdiO6Nox2GuLBHCcUbgOCPU1AxxnDoz4XJCT2GImrldXbdHPBMOOZ6gvwjbGY4r+9MRLERN+6LIHUBR9QC+Vf0dAKqpYk1wTXxMz3uhd/lX3kIA8tw8xkTH1ozriU6ip1vbZb/TqR5v5QAAGXdJREFUKOUC/znMNe5uA4PDm069QmNZ1jOmae4A/s80zVeAY4FTLcvaX98+TWAAsCXh/VZvWS2hMU3zp6hBoliWRe/e6ZuevYEHAOH+igVGMY78kec+y+GF6CKmGydAcwwK6QIfAS8j5MvAhwhcJL2B03HF6SBOxhfqRV6osTyxzOP3+5t0vtomfalVlUJKXLYCa4F1CGMtAWM9gcBLCPk43T0Pmxp0WgKUIEWJ9/+RwBAQHScTrmN8z02jvR1zMQM5nTMAkI5ki7OZD40P+FC8z4eBD3gweD+2sAE4XA7jaHcKk+UxHC2nMFKOYo7vf1gq3uPBwvuZ69zVmodySDTkOpuNmpllG3Atyr11rWmaWJZ1czM/t67HzDqzEizLegClFwCyqcGwnUYpT/e9i4CwCcjbmVj9PPuNIzhy9yh20/TAmhAHCPrfIei5xHzGLgCi9jgi0WsIR2ZgO0dRk9Ir4RA+JxN07CBxLjDBe8WQ9O4lOXDgg5QkhFfxGQtqtpKhBjLh2t9AvY79PddNez/mPPI5kRmcyAwAqkQVawIfq9TqwApeDb7CP/yPAZDr5lItwkjh8rgxn8t2/azJVo2XDNBqNOQ6e837+zpwb4Y/dyswMOF9MXBoIzEb4Z6CefER/AH2UWL/hVvKmpI0J/H51hIKxGItyxDCwXW7E4meQEX0JMLRE5FSpza2PgJEIVF7KlE7eWogIQ54MyHEUrE/J+BfRSj4PEKoZxwpfQmZcIkiNIyONiecpm2RK3OZHDmGyREVqpZItvq2sCq4gvvy7+bzwHoAHFzuLpjXxHtY69OQ6+ztLH7u88CVpmk+iUoCOJDp+Ax41kz+U/FAXFREeCb/Ka46eE2DTwSCgwQCi1WWWPB1fMY3an97FJXVPycSnUnUHk96w5A0bQEpu2E7E7GdiSlrqpIy4WLxoFBgUUomXHGS9ROboLRzJXNoWgqBYKAziFA4xKzu18V9QOnew9oaWblTmqb5D9S4m96maW4Ffos3Os+yrPuBF1GpzV+g0psvzkY/7imYh5vikav7iUDiMzYQDL5BKPAGAf+HCBHFlQVEosdTEZlJJHqiN82KpmORi+2MxnZGp2TCRfAZm+IZcGo6ns/jk5fGcNy+KdZPYiacTkTQNI/072Ftm7TLBLQRmjRg87t9h/KJL1xr+RgnxL9K1xAMvOvNI/ZGfDChbZcQjs4gEp3hFQtrv6PX27sf+1DI/jE7+IwtCZlwNX8NcTC+let2TxiEOhzHVX9dt4hMC5D+njsu3+p9KmuDn9ZafmRkFC/sTr9ibpsfsNmeea38HHJDTybNSqz88L3w9RiNEGFcmUc0Op2K6iuJRGfg6kJJmgbx4bhDcNwhRKKnJCyXGGJHPAGhZk64F8k19sW3cmV+PO7jJExK6riD0HPCaVJJFJP2LK4dWmgqq64hN/RU0jLldw9SFf4x4cgMovYx0E4LIWnaEgJXFuHaRUTt5DJNQuxJGgfk931OMLAEX+jp+DYqE25ocmE69wgc5zDqyoTr0fXU+NRFuNDHG3oRtUexryz9J12NpiXo0ELjyr5Uhc8hN/SEVwTMR3XkO5RX3N3aXdN0IqTsRdTuRdSekrRciLI6MuE+IhT8d0om3JBaMyJE7bH4fRtSrPWg5+7VaNoWHVpoINGqsYEAFZW/ae0uaTQASNkV25mA7UxIWRPLhEuOAwUDryG8QX1q/9ptVlZdkd1OazSHQIcXmrhVk/MYVeFz0iwCptG0JvVlwkW9TDjlhssJPo3PtzE+V6wQEXp1n6bGAtkl2M6R3qsE1x2InvpI01p0eKEBz6rJ+YrKqmtauysaTTMI4Lgqg43omVSHz6NX96lAGClDlFfcgs+3A79vHQH/J+SEXojvqZIQjsC2R2I7JfGXLs2taQk6hdA0vdqkRtP2SbbWz6U68qOk9YIKfL71+P3r8PvW4fd95mXBPR7fxnH7KtGxj8RxYlbQMCCnhY9G05HpFELT2chduJAut92Gb/t2+hQVUT5rFlXf/35rd0uTBRqy1iX5dcSAJIbYid+3Dp9/LX7fWvy+deTlPBwvyRCfisc+0rN8tPtN0zy00HQwchcupNsNN2D8//buPjiu6j7j+Hdf9C5jY8tvejExCFsy8UBS15C6vAVCaEsgIe1JQtIpSWeYTJMmaZOhbpMOSWY6Q9KZtnSa0rqEFpoE8huXJKZhIEkdF1JoMUkoKchmjMHWyjgY2/hFlqXdvbd/3Lur3dVa2LKu9u35zOx49+ru6pyxrUfnd889Z2wMgOTICPNvvx1AYVOHzny0HsPzlzKRWQqZKwuOZ8LrP0PhCGiIpuRztLY8XPC9OshmVxcEkMpvcnoUNNUikyF2/Djx0VFio6PEjh8PXp84UfZ57MQJ4sePB+eOjuafJ/buJeZ5RR8dHxtj/uc/j9fZSWZwkGxvb9FuoyKQJOv1k/X6GU+/J3+0uPwWjIDKlt/C8FH5Tcqp+6CJpIzk+zA+XvyDf3S0KCSKAiP3tTLn5V+PT10qp+y3jsXwOzrwOzvx29vxOjvxOzrILl+O19lJ2yuvlH1f/OhRFn00WFLOmzePzOrVpAcHSQ8OkhkcJD0wgH+OtkqWYmdUfmsqLb+dH85+U/mt0dV10JQtI33uc8RTKdKXXnpGI4V8gOTOzWTe5LsH/EQCv7MTr6NjMiA6OsguWlT02jvV8zBQ/M7O4NHaCvFT/0dt3r6d5MjIlOOZ7m4O3303TUNDNO3YQXJoiLYtW+j418l9WjK9vWQGBooCKHP++ZCs638mcsbepPyWfCGcfLBD5TcB6nxRzSXr15f9oTvtN2hpwSv8wV4wasj/4C8IjdIAKQ0MWlrmtExVGq4AXlsbR7761akjOd8nvm9fUfg0DQ2RfOmlfJD6LS1k+vuLRz+Dg3iLF1dl+a2W14OaqWrvc4zjJbPfghFQvGANuKy3LD/6OZ3yW7X3OQpn02ctqhmhxClCyQcOPvDAZEB0dgbh0tEBzbW3w2KhXJjkyoXZ6cqFsRheTw/jPT2MX3vt5PHxcZK7dhUFUMtPfkL75sm1ubILF+ZDJz/6WbUKv00bhEkxn84yewGF5bfkEImC8Hnz8tsgntdbmY7IjDXkiCbT08NrTz89m+2qSrP9W1/80KFg1FM4+tm5Mz968mMxsitXThn9ZPv6pi33zSb9plvrMiTiL5NMDuXLb8nEDhKJPfkzPL+DWOytnDzZ31DlN41oqtSxjRvLlpGObdxYwVbVLm/hQiY2bGBiw4bJg9ksiT17aNqxIwieoSGann+e1kceIRb+EuO1t0+59pMeGMBfsKBCPZHqlQxWP5i4kHFuzB8tLb+1tb5ES/P3S2a/lSu/XYhWZ6+8uh7RQPGss2nLSHWokr/pxkZHSe7cWTT6aRoaIv7GG/lzssuXTwmfzAUXnFX5sr5+uz89jdvnA8RjvySZ3FFUfgtWtT5V+W1NOPutl1qb/VbLI5q6D5qcxv3PWEV99n3i+/cXj36Ghkju2kUsnQ5OaWrKTz7IXwMaGMBbtuy0Jh9UXZ/ngPpcqlz5bYhEYm/+jKmz3wbD8tu5c9OBGajloKnr0plUmVgMb/lyxpcvZ/zqqyePT0yQ3L27KHxannqK9oceyp/iLVgwdfQzMIDf3l6Bjkh1O73yW3Dz6XTlt1wIqfx2thQ0UnnNzWTC4OB978sfjr3xxpTSW/uDDxI/cQIIJx+cd17R6Id3vAPOOQcS2hZZip169ltp+W2I9qYn85vKFZffCrdeqL3yW6UoaKRq+QsWMHHZZUxcVrAzpeeRGB4uGv00DQ3R+uij+ckHy9ra8isf5EY/6cFB/IX1PStJZiKG5y9jIr0M0lcVHC8tvw3RlPzfaW4+HSyY/Va95bdK0TWaOtZIfY6NjZF88UXOHR7m5Pbt+SBKHDqUPye7bFlQciu49pPp7w9uqq1hjfT3nFOpPheX3yZDKB4vmOSSL7/ltl6YnfKbrtGIVJjf1kb64ovxrrmGozfcEB70iR84MGX00/Lkk8QmwrJIMknmggumjH687u6qXPlAKuvNym/BpIPczaflym+lWy80RvlNQSP1KxbDW7KE8SVLGL+yYE2udJrkyy8XhU/z9u20f/e7+VO8+fMnRz9h+GQGBvA7OyvQEaluk+W3ibLlt8K1356ltWVL/ow3K7+de851NCWfD0+GJWH1N525iMNHfzA33ZsFChppPE1NZFatIrNqFSdvuil/OHbkCE07dxYFUNvmzXQcP54/J7NixZTRT3blSk0+kDIKZ78V/DvLl98mp1+Xn/02CASjoVgsm/+a7zeTzqybu27MAgWNSMifP5+J9euZWL++4KBPIpUqCp/kjh20/vCH+X1//NZW0qtWFY9+Bgfxuroq1BOpZtOX34bCJXdyIfRiUcgE4mV3VK1mChqR6cRiZPv6yPb1MX7ddZPHx8Zo2rWr+NrP1q20f/vb+VOyixfnS27pwUHSa9YEkw9atSGYlCosvxXcY0aaczo+RUvz94nFsvh+M2PjH8Dzl1SspTOhoBGZibY20mvXkl67lrGCw/HXX58y+um4/35iJ08Cwf5EmfPPnzL60a6nUl4Tx098kZbmx4AstTiaAQWNyKzyurqYuPxyJi6/fPJgJkPylVcml9zZsYOmZ5+lbUvBReF58/KrHaQHB8msWUN69eppdz2NZPdYqTqev5Sx8Q/Q1vqNmhzNgIJGJHrJJJn+fjL9/Zx8z3vyh2PHjwehUxBAbd/73mntetq2ZcvU3WNvvx1AYVOHTox9hrbW3TU5mgEFjUjF+J2dpNetI72uYAZR4a6nudHP0BAtP/4xsWxwUdhvaQHPyy9EmhMfG2PenXcqaOqQ5y/Fj/8Hnl+bN+YqaESqyWnuetpx991l354YGaHr3e8m29tLtqcn+LPg4Z17rq4FyZxT0IjUgpYWMhddROaiixgDWrdsKbt7rN/RgbdkCcndu2l5/PH8AqQ5Xnv7ZPAUBFGmp4dsXx/ekiVzthuqNA4FjUgNOtXusUcKS2e+T+zwYZIjIyRSqcnHyAiJ4WGaf/azoo3oAPzmZrLd3fkQyhQGUl8f2eXLoalpLrsqdUBBI1KDcmEy7e6xsRj+woWkFy4kvXZt2c+JjY5OCaHk8DCJVIqWbdto/+Uvi87343G8pUuDACoNofC139YWWb+lNkUWNM6564G7gARwj5ndWfL1W4G/BHLj/78zs3uiao9IvRm7+WbGbr75rFb19Ts6yKxeTWb16vInjI+T2LdvMoRSKRLDwyRGRmh+5hkSDz9MLJMpekt20aKppbm+vvxrf/78GbVValckQeOcSwBfA94FpIDtzrktZvZCyanfNrNPRtEGEZkFLS1kV64M1nMrJ5slvn//ZHkuDKFEKkVy505at27N36ya482bVxREhSGU7e0Nlu7RhIW6EtWIZj2wy8x2AzjnHgRuAkqDRkRqWSKB19PDRE8PFK4Rl+P7xA8eLC7PpVLByCiVovnpp4kfPVr8ltZWst3dk+W50sfSpXPUOZktUQVNDzBc8DoFXFrmvPc7564AXgT+yMyGS09wzt0G3AZgZnTNcKHCZDI54/fWKvW5MVR9nxcvhoGBKYd9IANw5AixvXthzx5ie/YQ27uX+N69NO/dS+xHPyL22mvF70skoLeXZStW4J93HqxYgV/wYMWKmt/Mrpyq/3ueRlRBU27cW7qV58PAA2Y27pz7OHAf8M7SN5nZJmBT7jNmWovWLoSNQX2uUcuXB4/CbbtzxsaC60PhbLlEKkX766+TeeklElu3kti/P7+Sdk52yZKgHNfXVzxzLnzU4r5Cs7DDZsVEFTQpoK/gdS9QtAezmR0sePlPwFciaouI1LK2NrL9/WT7+/OHWrq6OJj7oZtOk9i/Px9C+WtEqRRNzz1H66OP5ndUzfEWLJg6c043tkYmqqDZDlzonFtJMKvsg8AthSc455ab2avhyxuBoYjaIiL1rKkpv5VDWZ5H/MCB/ESFZOG1opdfpuWJJ4iPjha/ZbobW3t78ZYu1Y2tZyCSoDGzjHPuk8BjBNOb7zWz551zXwaeMbMtwKecczcSlGkPAbdG0RYRaXDhvT/e0qXF68rlTHdjaypF089/TuLw4eK3THdja28v2e5u3dhaIOb7pZdOqpq/b9++Nz+rjLqoY58h9bkxqM/RK3dja+HsucTp3tiau7n1DG5sLdwOouyNuachvEZTsVqgVgYQEXkTM7qxNTeF+yxubG176KG62A5CQSMicrbO9MbWwutE09zYGhsbmxJQtbgdhIJGRCRqM7mxdWSEjnvvLf9xM7yEUCkKGhGRSovF8Lq68Lq6SF9ySf5w62OPld0OIlvh+2LOlObniYhUqWMbN+KVTBrw2to4tnFjhVo0MxrRiIhUqdPaDqIGKGhERKrYbGwHUWkqnYmISKQUNCIiEikFjYiIREpBIyIikVLQiIhIpBQ0IiISKQWNiIhESkEjIiKRUtCIiEikFDQiIhIpBY2IiERKQSMiIpFS0IiISKQUNCIiEikFjYiIREpBIyIikVLQiIhIpBQ0IiISKQWNiIhESkEjIiKRUtCIiEikFDQiIhIpBY2IiERKQSMiIpFS0IiISKQUNCIiEikFjYiIREpBIyIikVLQiIhIpJJRfbBz7nrgLiAB3GNmd5Z8vQW4H/gV4CDwATN7Jar2iIhIZUQyonHOJYCvAb8BrAE+5JxbU3La7wOHzawf+GvgK1G0RUREKiuq0tl6YJeZ7TazCeBB4KaSc24C7gufbwaucc7FImqPiIhUSFSlsx5guOB1Crj0VOeYWcY5dwRYBLxeeJJz7jbgtvA8uru7Z9yos3lvrVKfG4P63Bhqtc9RjWjKjUz8GZyDmW0ys3Vmti58z4wezrmfns37a/GhPjfGQ31ujMcs9LliogqaFNBX8LoX2Heqc5xzSWA+cCii9oiISIVEVTrbDlzonFsJjAAfBG4pOWcL8HvAU8BvA1vNbMqIRkREalskIxozywCfBB4DhoJD9rxz7svOuRvD074OLHLO7QL+GNgYRVsKbIr486uR+twY1OfGULN9jvm+BhEiIhIdrQwgIiKRUtCIiEikIluCplo45+4FbgBeM7O3Vro9c8E510ewvM8ywAM2mdldlW1VtJxzrcDjQAvBv+vNZnZHZVsVvXAVjmeAETO7odLtmQvOuVeAY0AWyIS3PtQt59wC4B7grQS3gHzMzJ6qbKvOTCOMaP4FuL7SjZhjGeCzZjYIXAZ8oswSQPVmHHinmV0MXAJc75y7rMJtmgufJphw02iuNrNL6j1kQncBj5rZAHAxNfj3XfcjGjN73Dn3lkq3Yy6Z2avAq+HzY865IYKVGF6oaMMiFE6NPx6+bAofdT3TxTnXC/wW8BcEMzelzjjnzgGuAG4FCJf0mqhkm2ai7oOm0YUh+zbgfyrclMiFZaSfAv3A18ys3vv8N8DtwLxKN2SO+cAPnHM+8I9mVrPTfk/D+cAB4J+dcxcT/Pv+tJmNVrZZZ6YRSmcNyznXCfwb8BkzO1rp9kTNzLJmdgnBShTrnXN1e03OOZe77vjTSrelAjaY2dsJVof/hHPuiko3KEJJ4O3A3Wb2NmCU6O85nHUKmjrlnGsiCJlvmtlDlW7PXDKzN4Bt1Pe1uQ3AjeGF8QeBdzrnvlHZJs0NM9sX/vka8B2C1eLrVQpIFYzONxMET01R0NShcLuFrwNDZvZXlW7PXHDOLQ5n5+CcawOuBXZUtlXRMbM/NbNeM3sLwRJPW83sIxVuVuSccx3OuXm558B1wP9VtlXRMbP9wLBzbnV46Bpq8Fpr3V+jcc49AFwFdDnnUsAdZvb1yrYqchuA3wV+4Zx7Njz2Z2b2SAXbFLXlwH3hdZo4wbJH/17hNsnsWwp8xzkHwc+vb5nZo5VtUuT+EPimc64Z2A18tMLtOWNagkZERCKl0pmIiERKQSMiIpFS0IiISKQUNCIiEikFjYiIRKrupzeLlOOcuxK4g+CXrSzw52b2pHPuCPAzgrXSPgZ0A9ea2RfC930R2GZm2wo+q51gOZhV4fs2mdl9Z9G2BQQLhDbUjbZSvzSikYbjnOsCvgS818yuAt4LjIVf/oWZXQ18lmAdsdNxB/Cf4Wf9OvDyWTZxAXDzWX6GSNXQiEYa0W8C38it/2Zmx4Cfl5zzLMGaaafj18zsT8LP8gn2xcE597cEWxYcBT5MsLjptWb2BefcreF7twH3AoeAlcBNwG3Au5xz24DfMbMDZ95FkeqhoJFG1A38AsA5dwvwB8B/m9nnCs65Atg502/gnPtVoMPMrnDOfQT4OKdeQftcgiVzPgS8H9gErGiEJWWkMah0Jo3oVYKwwcy+BXwE6Aq/ttY592OC8LkTOEmwa2dOK5NltulcQHCtB4IdMPsp3h8nVvD8BTPzgBGCsplIXdGIRhrRI8Bm55yZ2RGK/x/krtEA4JybAN7mnMv9UvZ24Ksln/ekc+7DZvbNcEHTDQRrUl0Xfn0d8BJwhGBNNoC1wHPh89IASgOJs+mgSDXRiEYaTnjN40vA95xzW4G/B+4/xbkHCbZbeBx4AthsZodKTvsScGV4TeW/gAvM7GlgzDn3BHAL8A8EwdLtnHsEWDxNE/cDC51zm51zC2fYTZGqoUU1RUQkUhrRiIhIpBQ0IiISKQWNiIhESkEjIiKRUtCIiEikFDQiIhIpBY2IiETq/wHNloulGyngtgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEkCAYAAAAM+hfoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd3hUVfrHP2dqZiadQBBYQSwoYEOk2VDBviooF13X3gtrXUREXUURd9WfrrquFaWoXJQA1kUsWGiCoiKKoiDSAiF9Mpl2z++Pe5NMegIzpJ3P8+TJ3HvPfc976/ee9zQhpUShUCgUinhha2kHFAqFQtG+UMKiUCgUiriihEWhUCgUcUUJi0KhUCjiihIWhUKhUMQVJSwKhUKhiCtKWGIQQvQSQkghxLEt7UtTEEL8QwixvqX9qI+a/gkhLhNCRPZS3nstr46EEGK49Yz0iIOtjUKISfHwS7H3EEJ8KoR4saE0Sliq8wewD7AcQAjRw3qIhreoV+2H2UD31pqXEOJY63r3SohHCUAIMUkIsXEvZrkE8xnZuhfzVLQxHC3tQGtCShkFtre0H+0VKWUACLS3vOpCCOGSUoZaKv9EYR2TekYUDZLQEov1BfilEKLE+vtWCHGqta3OsJMQYr0Q4h8xy1IIMU4IMVsI4RdCbBJCnC+ESBNCzLLs/iaEOC9mnwrbfxFC/E8IUSaE+EkIcYIQorsQ4j3L1lohxHF17Ffh0x/W/0+s9RvrOc6rhBCb67AzM2bd5UKIXCGEsJazhRCvCCF2WsfwpRDi+Jj0QgjxghDiVyFEwDrGKUIIdwPnO9Oys1gIkd7QtbHSnyOE+MY6P4VCiBVCiCNjtu8vhJgjhMi30nwnhDjL2pYhhJhpXY+AEGKdEOL2iuOrJ79q4amKZSHEMUKIr608vhJCHFVjvxFCiO+FEOWWDydY5/ev8crLKqV8biXfYNn/NGb/C4QQqy0fNgohHhdC+GK2fyqEeEkIMVkIsQ3Y0sRzfIAQ4i1rW4EQYqEQ4tAax3KUEOIDIUSxEKLUsjFYCHEZMBnoafkrRcyzU8NGRQhrpBDiM8uftcJ6HmPS9RFCvGvlUyqEeFsIcUAddno00+7hQogl1vn7WQih1eFjshDiSSHEFsvON0KI0THbNSFESAgxKGbdJZbNI2vaq8N+U++3IdaxBKxr8poQokvM9n8I8z2lCSF+sezME0KkCiFGW89CiRDiTSFEWsx+A4QQ7wshdljn9ishxGk18t4ohHjAOg/5wnxnPCqEsMekGWndb/lCiCJhPu+DatjZz7qXyoX5jN4oaoSwhBAO61g2WOl+EEJcW8NOT+veC1h2xjV2ngGQUibkD7AD+cDjwIHW3yjgOGt7L0ACx9bYbz3wj5hlifmFdClwAPAfoAx4H7jMWvcU4Ac61bD9K3AucBCQg1l8X2T5cRDwFqZ4OOvyCTjSWh4NdAU613Os+1np+ljLVwI7gK0xaWYAb1i/PcBaK/+B1jHcDQSBQ6w0NuBBYLDl19nANuD+GJv/ANZbv/e1bL4JuJtwfboCIWC85f8hwF+AQ2O251rn61hgf+Ac4IyY7XcCA6z9/wqUApfX5Z+1fBkQqbFsAJ8BxwEHAwut6+aw0nS3rveLQF/gZGCVdb7/2sDxNSsvzPv1bMvu0dbxZcbsWwBcDPQGjge+A2bE2P8UKAH+a/l5aBPOcTbmvf2slb4P5r28C+teA/ph3tuvY94rBwIXAkMx76OpmPdwV+svuZ7zMdw6tm+B0yw704FCID3mvvwd+Ag4yvr7BPOZdNWw06OZdrcA7wGHW75/ZV3XSVYaYeX1Keb91hu4xjp/J8ccxwvWNUvFfIZLgHFNfCc1eA/E3NfFwGvWNTnWutaf17iv/cC7wGHACcBOy1bFMR6H+fw8UuMaXGrdHwdhPt8h4KCYNBsx77UJ1rkcC0So/lyNAsZYNvphPhv5VL3/BLAaM6Q/CDjC8qsIeDHGzivWsZ2CeX+Ota7blTF2vrau1WDLzofW+XmxwXOdQGHJsG644fVs70XTheWJmOXO1rqn6sjrrBq2b4lJc7S17vaYdRXC0b8eYenR0DHU8HsDcIP1exZwv3UB+lrrNgPXxNzgm7Fu5hgbH8ceax153Ar8UvPFjXlzbwGeAWxNvD4Vx96rnu2TMV96vmZc8yeBD2v6V+PBrvmyl8CAmHVDqC7SD2E+bPaYNKexe8LSWF7H1nVOrPyvq7HueCtthrX8KfBz7Plvwjn+B7CsxjqB+aK7xVqegfnSrvO6ApOAjU24NsMtX0bHrOtqrTvVWr4S82WfFZMmGzOkeEkNOzWFpSG7V2F+dGTEpOlvpZkUY6ccSKvh98vAvJhlD/ADoAPfxG5rwjloyj0wGfPZdMWkOdxKc3zMdYvUOE/PAFFiPj4xn4eVjfj0LXB3jXttQY00HwCvN2DDhilGF1nLIy1/D4hJk2ld2xet5f0wRfbgGrbuBVZbv0dYdmKFr7N1PzQoLAmrY5FSFljFrv8JIT4GFgM5Usp1u2Hu2xi7O4UQUUyljc0rBHSpbz+q4sLf1bGu5n67wyfASZglqhMxb7SBwEmWv90xhQOqvogLRfXIkZuYegEhxNWYD2UvwIf5ZV0zfNkZ8wvsRSnlHc3w9zvgf8AaIcSHmC/GuVLKivDfUcASKaW/rp2FEDbML/ELMAU4CXBifvE2h4qv3Qq2WP+zgXWYX3dfSbP+q4KlzcyjqXnVQgjRGegJPC6EeDR2k/X/AMwvOoBVUkojJk1j5/ho4CghRGmNbD2YX6tgXocPatjdE1ZX/JBSbrfuzWxrVT9grZQyLyZNrhBinbVtd+32BX6UUhbEpFkjhCiK2f9owAVsqfFMuIBfYvYLCCHGWvnlYpZgm0Nj90A/TLGvrB+TUn5r+doP81kD2BJ7njDfJdullDtrrIsNoXXG/OA8CfP5d2A+Nz1r+Li6xvIWTCGosLMf8ABmya8L5jvBG2OnL5AnpaxskSmlzLeuYwUDMe/hlTXOtwNTIGPt/BxjZ2cNO3WS0Mp7KeXVQognMYtaI4HJQoibpJTPYaolVD2gFTjrMBVuwjpJ7ZduuMb2+tbFo67pY+BJIUQ/IAVYYa07GfNC/RFzoW3Aj5hF2pqUAQghxmCK0wRMUS7GLP4+VCN9IeYL7BwhxBNSys00ASllVAhxOuYDPQI4D5gqhBgjpXynIlkDJm4H7gJuwywul2CWqM5sSv4xGDVEo65rUtOPhvza07xqUrHtZsyPh5rEnu9qItyEc2zDDDvdVIfd2Jfu7h5vXdTVoKChcw3mM9qYDw3Zbcr+NsxjProJtivqQNMxX6z5jdiOZXfut7rW1/X+aeyd9ApmyHo8ZoQjALyBKZ6x1DzemnbeAfKAGzHDoCHgixp2mnK+AYZhvXPq2Lcp161B4wlDSrlGSvm4lPJ04CXMuCmYMUmAbhVprQqyvdUctSlUXGB7g6lMPsIsbt4KfCaljGAKywmYL5WPY9KuxIwhF0sp19f4q2jGeTzwjXXuVkkpf8EsudQkjFkH9D2wWAhR8+unXqTJCinlFCnl8ZgCdrm1eRVwjIipoK7B8Zhf0i9JKb+xRPPAetLuCWuBo2MrLzG/1BJBrestpczFfHj71HGt1kspyxsy2Mg5Xon5FbylDrsVz8cqYIRVQqzP56bcn03hB6CfECKrYoUQIhszlv/DHtrtK2IalFgfYGkxaVZiCkVSHediU439HgeuxaxnfUM00KBlN30dKoSofEkLIQ63fN2TcwDmM/MfKeUCKeX3mHWmvZtjQAjRCbMkMVVK+T8p5VrMEGJs1GUt0FlUb3SRgXkdK1hl/d+3jvP9q7XtB8vOgTF2smrYqZOECYswW7s8IsyWYT2FEEMxK7TWQmVz0C+B8cJsMXIUZqVfMFE+7QZ5mLHhU4QQXa2LUydSym2YRelLqRKR1Zgls7OpLiyzML9Y3hVCnCLMVmSDhRB3CSHOtdKsAw4VZqui/YUQN2MKSF15hwEN8+FcLIRo9GYVQgwTQtxj5buvEOJkzLqatVaS/2DeH/OF2YpmPyHEWdYXeIV/w4UQJwohDhJCVDQ0iDf/wQxTPCuEOEQIcSJVpbZ4fsmDGcYzgDOEEF1EVYueu4G/CbPPSH9htpw6VwjxXEPGmnCOn8YUhXlCiOOs++BYIcRDQohhVpp/Ygr2LCHEQOteGGM9T2DeR12FEEOFEFlCCO8eHP9rmB98s4XZgukozC/qLZj9gvbEbgkw03rWh2DWncQ2B/8Ys6HIXCHEKCFEb2G2hhtnhYQRQiRZ/iyQUr4EXI1ZvxobotxTnsZsGPCKda2Pxazn+kJK+XnDuzbKOuAiIcShQogjMBtkNPejoADzGl1tPXdDLTux53IRZrhvuhDiaEsYZ2DWC0kA60PwZeAFIcTF1vv6cCHEFUKIOy07H1l2ZgohBlk+z7LsNEgiSyx+zAfiDcxKzbcwO1fFFvuvwHxxL7HSPY+p4q0CK659I+ZL+w/MysKG+AgzvPixtb/E/EKtXGetL8csyawEpmGen7mYLTgq6iiew7wZpln5DsasNKzP1whmi6MvMMWlsdJDEeaX/3zMGPbLmDfNZMveNsyQQwlmi5IfMF/oFaHLydaxzces88gA/t1Ins1GSrkFU5iHYQr1k5gV1mB+qcUzr1zM8N4EzPtwvrV+BuY9cCZmiPMrzGuxpU5DVTR2jnOt7XmY13+dtb2nlT/Wl+1wzLq0xZjn4A6q4uDzgDmYLZR2YoZZdgvrY+8UzI+7z6z8/MBpcg/65Egpy4AzgE6Y528W8H+YLScr0kjM6zwXs0TyE+YxnYnZmAFrHx9maQWrzuYi4DohxNm7618NX3Mxz0EPzOv8DrAGM4y5p1yO+c5dgXndPqCqfq6p/hmYIfH9MUPgrwBPEPPetM7lKMxr9znmMbyPeX/FPjPXYJ7TuzE/dj7C/DD+LcbOuZj38WeWnfcwQ98NIsx9FYq2gzD7+ywGDrNevAqFogGEECmY9YGTpJRPJTo/1fNe0eoRQlyPWSTfihlf/j9guRIVhaJurBJcBLORUBfgPswwmL438ldjhbVThBATRVXv6Vp/Le1fM+mJGSpdh9mZ8HOa3/pM0c5p6H4XQkxsaf/2Ml7MuqcfMENYNsz+ebl7I3MVCmunCCEyMVup1UlsG3eFoj0Q2wqqDvKllM1plqzYA5SwKBQKhSKuqFCYQqFQKOKKEhaFQqFQxBUlLAqFQqGIK0pYFAqFQhFX2lw/Fk3TXgbOAnbout6/kbT7Aq9ijkFkBybouv5e4r1UKBSKjktbLLG8gjkfR1OYBOi6rh+JObz7fxLllEKhUChM2lyJRdf1zzRN6xW7TtO0/TGHmO+MOQT01bqu/4TZ0zTVSpaG2XNboVAoFAmkLZZY6uJ5YJyu60dhDtBXUTL5B/BXTdM2Yw6e1rT5mhUKhUKx27R5YdE0LRlz5Ns5mqatxhwVeB9r84XAK7qu98AcXXWGpmlt/pgVCoWiNdPmQmF1YAMKdV0/oo5tV2LVx+i6vlTTtCQgi5jhuhUKhUIRX9r817uu68XABk3TxgBomiY0TTvc2rwJa05sTdMOwZxfemedhhQKhUIRF9rcWGGapr2OOfFRFpCLORz0x5ij3u4DOIE3dF1/QNO0vsALQDJmRf54XdcXtoTfCoVC0VFoc8KiUCgUitZNmw+FKRQKhaJ10dYq71XxSqFQKHYPsbcyamvCwtatu9fHMSsri7y8vDh707pRx9wxUMfcMdiTY+7WrVucvWkYFQpTKBQKRVxRwqJQKBSKuKKERaFQKNoIttydpN71MFmnXNjSrjRIm6tjUSgUio6GLXcn9vv/j+xX54A0EKFwS7vUIO1CWKSU+P1+GuqTEwgEiEQie9GrlkMIgc/na2k3FApFcwiGsBUUYcsvxFZQiK2gCPvvW0h672Nc3/8IAkQk2tJeNol2ISx+vx+3243T6WxpV1oFwWCQbdu2kZKS0tKuKBQdDykRpX5TJGoIRcWfKCjCnl+IiF1fFmhpz+NGuxAWKaUSlRjcbjdlZWXk5ORw/PHH43a7W9olhaJtEolgKyqpEob8QkSlQFQXC1NAirAVFiHCdUdHpBDItBSM9DSMzHSM7M5EDj4AIyMdIyPN/Mus+J0O0SjeGW/he+tdMCQiFNrLJ2D3aBfCoqiN3W4nHA7z/fffM3DgwJZ2R6FoeQLlMUJQWF0MaomF9b+opF5z0umoFAAjM53IAb2qxKFifYxgyMw0jLRUsNub5XbxP++m9I5r6fzfmdhe1cFQdSyKFsRutxMMBlvaDYUivkgJhUXYN/5RWxRqCUWRGW7KL8RWXv+zYPi81UoMkZ7dMTLSkbFCkVlDKHxeEHunM7vRJYvovx9k53V/JfmJF3Gt/Hav5Lu7dFhh8cydS8rUqdi3biXarRslEyYQGD06Lrbz8/O5+eab2bhxI263m169evHII4/QqVOnyjRTpkzhueee4+abb+a2226rXG8YBtdeey0//fQTbrebrKwspk6dSq9evZpkV6FoU4TD2AqL6ygxxIad6hCLaJTsOsxJmw0jLQVpCUC0WzZGv4NqlCJqhJzS08Dt2uuHvjsYXbIonjKhpd1olA4pLJ65c0kbPx5bwKwsc2zZQtr48QBxERchBNdffz3Dhg0DYPLkyUyZMoXHHnsMgEcffZRvv/2WJUuWMG7cONxuNzfeeGPl/mPGjGHEiBHYbDamTZvG+PHj0XW9UbsKRUsiAoEqMcivqx6ijt/FpfXak25XlQikpxE5qHelMHh6dKPEZYWiMjOqShFpKWBT3fNamnYnLKn33otz7doG07hWrapVCWYLBEi//Xa8r71W737hvn0pfuCBRn3IyMiofPkDDBgwgOnTpwPwzDPP8OuvvzJjxgxcLhevvfYa48aN44UXXuDqq6/GZrNxyimnVO571FFH8eKLLzZqV9GxseXuNEMkq74jb+Hre2bMMBDFpfW2aKoz5FRYhGgo1JTiq1ZaiPTet1YpQtYIOUlPUr2hJndWFoEONlZYW6LdCUuTqK9lRQJaXBiGwfTp0yvFIrZkApCUlMQLL7xQ7/7Tpk1j5MiRjdpVdEwa7TgXCmMrrNFqqbGmr4XFCMOoMz9pt2Okp1YJxJ+6YRzet3pdRGzIKTMdIz0VVKvNDkW7E5amlCi6DBqEY8uWWuuj3buz68034+rPpEmT8Pl8XH755c3e99lnn+WXX35hzpw5cbWraPvYcneS/PgL+GbPN5uhRqs6zmWd/tcq4Sj112vDSHJX1kUYGWlEDzmwWlPXWkKRmY5M8alQk6JR2p2wNIWSCROq1bEAGB4PJRPiWyn2wAMPsGHDBl555RVszXwYp02bRk5ODrNnz8bj8cTNrqLtI/ILyTr3SuybttQ5wYaRlUnkgP1iRKJ2xbXMTEPWuK8UinjRIYWlooI+Ua3CAKZOncp3333HjBkzmt1BcebMmcycORNd18nIyIibXUXbxvndj/imzcazYCGiPEgkOwt7fiEIUS0Elj/j3y3opULR9ua8l3VN9FVSUtKqhi9Zt24dJ510Er179yYpKQmAfffdl5deeqnRfUtLSzn44IPp0aNH5TG53W7eeeedZtndsGEDP//8M8nJyRxzzDFxPLrWTbubACoYwvPOInzTZuP6Zg2G10PgvDPwX6YROfgAbDvyanWc27plVUt7nXDa3XVuAnGY6EvNINmW6dOnD1vqqMNpCsnJyWzevDnudhVtC9uW7fhmvIX3tRzsuwqI9O5J0QN3UDbmLGRq1UdUW+s4p+gYKGFRKFoLUuL64it8r+ok/W8xAOUjj6PsMo3gsYMarDRvKx3nFB0DJSwKRQsjSkrxvPUevld0nL9sIJqRRukNl1B28flEe+zT0u4pFM1GCYtC0UI4fv4N3ys6njffxeYvI3REPwqeuJ/An0dCkmqYoWi7KGFRKPYmkQhJCz/DN2027iUrkW4XgT+PxH+ZRvjI/i3tnUIRF5SwKBR7AVtePt5ZOfhmvIV9Wy6R7l0pvusmyi48F6NTRuMGFIo2hBIWhSJRSIlz1ff4XtXxvLMIEQpTfvxgih4aT/mI45o9L4dC0VZQwqJQxJtAOZ75/8P3io7r+58wUnz4Lz4P/yVjiB7Qq6W9UygSTkKERdO0PwHTga6AATyv6/qTNdII4EngDKAMuEzX9a8T4U9Nzso6hR9dP9Raf0ioH+/kLdxj+4majwXgiiuuYNOmTdhsNnw+H5MnT6Z/fxWbbw3Yf9+Mb/qbeN9YgK2wiHCf/Sl8+C4C551hTgqVAHJ32hh7pYOnptro0rnugSMVir1NogaaigC367p+CDAEuFHTtL410pwOHGj9XQM8myBfajEgdBROWX1iH6d0MSAUnyl8K+ZN+fzzz1m0aBE9e/ZkypQpldtj52P54osveOaZZ6rtP2bMGBYvXsyiRYs49dRTGW/NFQPwxBNPsGjRIhYuXMh1113H7bffHhefFbuJYeD+ZAmZl9xMl2POxffCawSPGUjem8+z86PZlF1yfsJEBeCJZ5NZskLwxH+TE5aHQtFcElJi0XV9G7DN+l2iadqPQHcgdqKUc4Dpuq5LYJmmaemapu1j7bvbTE69lx+dDc/HEiJEhOrDi0eIsNa5hr90Or/e/Q4J9+We4pabjwUgNTW18ndxcbEahLKFEIXFePW38b06B8fGP4h27kTpzVfiv2g0Rre65jaMP7/9bue1t3wYhmDWHB/JyZKMNAOnExwOiSvmv9MpcTqkuc1ZfZvDIa3tVPtftW2vzcCraCckvI5F07RewJHA8hqbugN/xCxvttZVExZN067BLNGg6zpZWVm18gjEjFLcFFy46Bztwk77DqSQCCnoHO2Mi/hPT5qI+VjuuOMOFi9ejJSSWbNm1bmfzWbDZrORnJxc5zlrrzgcjoQer/huLbZnp2N7PQcRKMcYNpDIA3/HGHU6bpeLRPc+kRI+XyqY/oaN19+0EYmab/xIVPDMi4kbL69KoKj876xcltXWu5zgdIHTAS4XOB3S+l9zfdV2p4ta9isFsY71m7fZsds6myJZ777tZ4T/bbkwYpSTmc9l0bVLS3vTOAkVFk3TkoG3gFt0XS+usbmub6BaI2Lquv488HzF9roGYYtEIpW/m1KiANhhy2V49lCCBHHjZkHeB3Q24n/FEjEfy6OPPgrAm2++yYMPPsiMGTNq7WsYBoZhUFpa2qEG60vI4IShMEnvf2xWxq9YjZHkpmz06fgv1Yj072OmKa55e8eXLVvt6PM9zJnv5fc/HHg9BjXHj01ySz7K2UFamkEkIgiFIRIRhMOCcIRq/6ttC1NnmkgEQmFRw1bFOut/5b6WnYigLADhEnPZ3LcqfdU6iFj77T6Nq4bdXlUCq15Co7IE53JKHBWlNWf1bZWlPIesTFchpNVLhRJHTVv1/a/2u/q2Cps1Gwze+1AqXy5zcu9DIabc0/x7zRqEcq+RMGHRNM2JKSqzdF2fW0eSzcCfYpZ7ALWHLk4QXYxszveP5TXfDM7zj02IqCRqPpYKzj//fO68807y8/PJzMyMh8uKGGzbd+Kb+RbeWXOx79hFpGd3iu69lbKxZyPTUxs3sIcEyuGDRR5mz/PyxTIXUgqGDQpy2w0lLF/p4s0FXqIx9fWGAc9P91kvnrYxarmUEInULWw1BS5WBD3eVHbtKqlX/CqFztqvzm2R2uJX6rfV8qe6gFatSyRCVImW3Q7FJQIQzM7xcct1pa2+oUaiWoUJ4CXgR13XH68n2QLgJk3T3gAGA0V7Wr/SXG4qvYVfnD8zrvSWuNtOxHwsfr+fwsJCunfvDsDChQtJT0+vNWeLYg+QEtfyb/BNm03SB59A1CB44jAKLx9LcPjQhMdWpIRvvncyO8fLgvc9FJfY6NEtwq3XlzLmnDL27WHOFPn8q75aL7dQWLBydfzDuYlECCq/2k2aJohZWZK8vPLEOdYIUkI0Soz4VYlXRWksFK5nW50iWEfJ0dr22RI3pX4HhmF+PDzx3+TdKrXsTRJVYjkGuBj4XtO01da6icC+ALqu/xd4D7Op8XrM5sZ7fY7dLkY2r+96K+52161bx1NPPUXv3r05++yzgebNxzJhwgR69OjBBRdcAFTNx1JWVsa1115LIBDAZrORnp7OK6+8glA1q3uM8Jfhmfs+vld1nD+ux0hPxX/lhfgvOZ9orz81bmAP2bHTxltve5id4+WX35wkJRmcMbKcseeWMWxQqJaeLXyrKtzXEecmaWmEAIfD/PMkSRJVQszdaWP6bLOBBpgfD22h1KIm+mqnqIm+mob919/xvToHr74AW4mfcL8++C/XCJx7asKn7g2FYNHiJGbnePnkCzfRqOCoI0KMPbeMP58WIDWlqV/vHU9YOsox3/VAKm/MrV46dTklF57nb1apRU30pVAkmmgU90df4HtFJ2nxMqTTQeCsEfgv1QgPPCzhbWvX/OhAn+cl510P+QV2sjtHue6yUrRzAxzQO9K4AUWHYdW3rjYZ8lTCougw2PIL8L4+H+/0N3Fs3ka0axeK/349ZReNwujcqXEDe0B+oWDeu17emOvlh5+cuJySkSeWM3ZUGScMC+JQT6KiDtpqyFPdzop2j/PbtfimzcazYCEiGCI49CiK772V8lOON2uOE0QkAouXuJmd4+XDT5IIhQX9DwkxeWIR555ZRmZ6mwpDKxRNRgmLon1SHsTzziJ8r8zG9c0PGF4PZWPPxn+ZRqTP/gnNev1vDmbneHjrbS+5O+1kZkS5eKyfsaPK6HewCnUp2j9KWBTti01bSHnyBbyv5WDPLyS8f0+KJv+dsvPPRKYmroFHcYlgwftmn5Ovv3Vht0tOPDbIg6OKGHFCOa7WHRJXKOKKEhZF20dKXJ+vMJsKL/wMJ1B+yvH4L9UIHTcoYZXxhgFfrnCh53h5b1ES5eU2DuwdZtLtRZz350Crbg6qUCSSdjKSzu6Ru9PGeZd2YsfO+J6G/Px8Lr74Yo477jhGjBjBVVddxa5du6qlmTJlCj179uTxx6v3HzUMg6uvvrpy3wsuuICNGzfWyuPxxx+ne/fu/PTTT3H1vS0hSkrxvfwGnYefT9aFN5jDrdxxPQyOHQsAACAASURBVDuWLaDgpccIHT84IaKyabOdR59OYeipXbjgyiwWLU7i/LMDvP36Tj5ZsJPrr/ArUVF0aDq0sDzxbDLLV7niPuR4IofNB/j+++/5+uuvK3vgdzQc634l7a6HyR5wGmn3/AuZ4qPgyQfI/eo9opPHE+2+T9zzLCsTzJnv4fzLOjH01Gye+G8y+/WM8vQjBXz96XYeua+IAYeF1SjACgXtMBR278OprF3XeEufUAi+/s4cf2nGbB9rfnLiamS3vn3CPHBX452SEjlsfjAYZOLEiTzzzDOMGTOmUV/aDZEISf9bjG/abNxLVyHdLgJnn4L/Mo3wEf0SkqWUsPIbF7PneXj7Aw+lfhs9/xThjpuK0c4J0L1bNCH5KhRtnXYnLE1l89aq4UMlsHmLnd694v+iiPew+Y8++ijnnXce++67b9x9bY3YduThnZWDb+Zc7Nt3EOmxD8UTx1F24TkYmYkZI21bro03F3jR53n5baM5kvBZp5p9TgYNqD28ikKhqE67E5amlChyd9oYemo2UppxCykFRcV2/vOvvLjHxuM5bP7KlStZvXo1EydOjKuPrQ4pca78Dt+rOp53FiHCEcpPGELhwxMInnxs7THF40AwBP/7OAk9x8viJW4MQzBoQJCbrirhzFPKSfapPicKRVNpd8LSFJ54NhlZQz8SMWpovIfNX7ZsGb/++itDhgwBYNu2bVx00UU8/vjjnHDCCXHzu6UQgQCeef/D+4qOa806jBQf/kvG4L90DNH9e8Y9Pynh+7XmSMLz3vVQWGxjn65RbryqFO3cMnr3VKEuhWJ36JDCsjfG30nEsPk33XQTN910U+Xy4MGDefXVVzn44IPj5ndLYN/4B77pb+KdvQBbYTHhg/encOpEAqNPT8h88bvyzZGE9XlefvzZidslOe3kAGNHBTh2SDARBSKFokPRIYUldvydRJCoYfPbFYaB+9OlZmX8J0vAZqP89BPxXz6W0OAj495MOByGTz53M3uel0WLk4hEBEf0DzHlnkLOOT1AepoKdSkU8aJDCkui6dOnD1u2bNmtfZOTk9m8eXOT0i5fvny38mhJREER3tkL8M14E8fGzUS7dKL0lqvwXzQaY5/4z+K5br2D2Tle5r7tYecuO1mdolz5Vz/auWUcfKAaXkWhSARKWBR7Bcean/C9ouPJ+QBbeZDgoCMoHn8D5aefRKPtvJtJYZFg/vse9Bwvq9e4cDgkI04wJ8068bhgIsedVCgUKGFRJJJQGM97H+GbpuNa+S1GkpvAeWfgv1Qj0u+guGYVjcIXy9zMe8/O/Pe6EgwJDjkozH3jixh9VoCsTqonvEKxt1DCoog7tm078M18C++sHOw7dxHp1YOi+26jTPszMj01rnlt+N2OPs/LnAVetm23k5EuufC8MsaOKuPQvqonvELREihhUcQHKXEtXWXOyvjBp2AYBE86hsLLxxI8YQjx7FXo9wveWWhO6bt8lRubTXLCsCD3/r2Iv5yfTGlpUdzyUigUzUcJi2KPEP4yPG++i+/VOTjX/YqRnor/6r/gv+R8oj17xC0fKWH5Khezc7y8878kygI29usZYcItxZx/dhn7ZJuhrqSkZEpL45atQqHYDZSwKHYL+/qN+F7V8c55B1uJn9ChB1Pw2L2Un3MK0urQGQ+2bLUzZ4GHOfO8bPzDgc9rcM4ZAcaeG2DgkSEV6lIoWiFKWBRNJxoladHn+KbpuD9fjnQ6CPx5JP7LxhIe0D9ufU8C5fDBInPSrC+WmQOFDj06yC3Xl3DmyHK8XtXnRKFozShhAWy5O0l+4kVcq74jb+Hre2wvPz+fm2++mY0bN+J2u+nVqxePPPIInTp1qkwzZcoUnnvuOW6++WZuu+22yvWGYXDttdfy008/4Xa7ycrKYurUqfTq1Qswe9u73e7K3vx33303w4cP32OfG8K2qwDv6/PwTn8Tx5btRPfJpnj8DZT95VyMzp0aN9AEpITV3zt5I8fLgvc9FJfY6L5PhFuuK2XMOWX0/JMaXiWWs7JO4UfXD1Urupn/Dgn14528hS3jlEJh0aGFpUJQfLPfBmkgQuG42K2Yj6Vi6PzJkyczZcoUHnvsMaD6fCzjxo3D7XZXG/V4zJgxjBgxApvNxrRp0xg/fjy6rlduf/755/fKMC7Ob9aYfU/e/hARDBE85miK/3E75accD4743Do7dtqY+46H2Tlefv7VSZJbcsbIANqoMo4ZpEYSro8BoaNY7/yFsAhVrnNKFwNCA1vQK4XCpN0JS+q9j+Jc+3PDiUJh7Ju3Yt9hzuooZFVopdP519S7W7jvQRQ/cEejPiRyPpaEUx7Es2Ahvlfn4Fr9A4bPS9kF5+C/TCNyUO+4ZBEKwaLFZquuT75wE40KBhwe4pH7Cjn79ACpKSrU1RC5tu0cGOlDlOqluCgRim2FPJ7yT7Kj2XQxupr/o9lkGZ1xtL/HXdFK6ZB3mvPn3xAlpeyNet94z8cCVA5EefTRRzNhwgTS0tL22E/75m14p8/B+9o87AVFhA/oReGD4wmcfyYyJT4zbP7wkzm8Ss67HvIL7HTJinLtpaVo5wY4cH81vEp97LTtYJlrKcvdS1jmXsIGx28AOKUTKQ2kkAgpyDAy+Mq9nPds7xAV1UVHSEGW0Zku0WyyDVNssqNd6WJkW+LTlWwjm0yjE7aOPbGsIg60O2FpSonCtiPPCoEtAKN6CGzXm8/H1Z94zscCMHfuXLp3704wGOS+++5j0qRJPPXUU7vnnGHg/mIF3mk6SYs+B6D81BPwX6oROvbouFTG5xcK5r3rZXaOhzU/unA6JCNPNCfNGn5MMF4RtXZFni2PFa6lLHMvYZlrCb861wOQbCRzdGgwY/0XMSQ0jKxoFidnH0uQIG7cvLtzEZ2NLkSJkm/bRa49lx227eZ/ey65tlx22LeTa9vOd85v2WXLQ4rqpUOHdJBldDZFp14RyiZdZiD2yqeZoi3SIR9ro0sWxVMmUHrLVfUKTDyI93wsQOU89263m0svvXS3BEsUl+Cd8w6+V+bg+O13op0yKL3xMsouHh2X+eIjEVi8xM3sHC8ffpJEKCzod3CYB+4qYtSZATIz1PAqseTb8mOEZCm/ONcB4DW8DAwN5rzAWIYEh9IvfGitcNb5/rG87pvJef6xdDbMQTzt2OlsdLGWD6033zBh8mw7TdGpQ4Q2OX5npW05BfaCWvu6pIsu0exaJZ4ulvBkW2G4ZJmiBKgD0iGFpYKaAuNa+W3cbCdiPpaysjIikQipqalIKZk/fz79+jU837soKib1rodxrfqOwn9PNivj33oPW1mA0JH9Kfj3ZAJnjQD3ns9Fs36DHT3Hy1tve9m+w05GepSLx5ojCfc/RIW6KigUBSx3L2O5JSbrnD8C4DE8DAwN4tzAaAYHh9I/fBhOGh4x86bSW9jg/Y1xpbc02w8nTvYxurGP0Q0a+KYKUs5O+05ybdsrRcj8bQrRz451fO7+jFJbSa19PYbHEp/qJaDY+p9soyteGf95dxQth5CyTVWUyq1bt9ZaWVJSQkpKSgu4Uzfr1q3jpJNOonfv3iQlJQHNm4/l4IMPpkePHpXHVDEfy++//87VV1+NYRhEo1EOPPBAJk+eTHZ2di07G9avZ/3TL7DPsm8YubUQETVj7tLtInDOqfgv0wgf3nePj7WkVLDgfbPPyarVLux2yYnHBtHOLWPk8HJc8Zs7rUlkZWWRl5fY+XaaS5Eo5CvXCpa5v2SZewk/OX5ECkmSkcRR4aMZHBzKkOAwDg0fjovmn7DWcsx+4WeHLbdaCagqBJdLrhWGK7eV19o32UixGhxUlYAqf1eKUBfcmM9TaznmvcmeHHO3bt2AvVd0VMLSTtn0+RJ+nvQQ+5QGGLndHDur+O6/UXbB2RiZGY3s3TCGAUtWuJg9z8t7HyZRXm7jgN5hxp4b4Lyzy8ju3HKhrtbwwikRxXzlWl4Z2lrrXIMUErdMYkDoKAYHhzI0dAyHhg7HTfNKs3XRGo65qUgkpaIkptRTo/7HnlspTqGYptQVpBvpdIl2pYe9BxnlmXXW/3Q2ujRa0muLtCVh6dChsHaNlKYCxFB6w6V7ZPKPLdZIwvM9/LHFQUqywXl/DjB2VBkDDuu4IwmXilK+ci1nuXspy1xL+MH5PYYwcEkXR4aO4m8ltzE4NJQjQkdWfnF3VASCFJlKSiSVAziw3nQSSaEoiCn9mCWeChHKt+9irfsHdtp21NkCLtPoFFPn0zWmEUJVCaiTkYUdNQ91IlDC0p6JQ+/CQEDw7odmn5MlK9wIITl2SIg7/1bCaSeX4/G0qRJvXPALPytdKyrrSNY4vyMqojilkyNCA7ix9GYGB4dyZGgAScRv3LSOhECQITPJiGTSJ3JIre0VX+9RohTY8muVeGJF6AfnGvJsO2u1gLNJG52NLjGCU1XnEytCGUaGaoLdTJSwtFOiWZnII/sjl69GupzNavEmJaxc7UTP8bLgAw+lfhs9/xThjpuKGXNOgB7dOtbwKmWijK9dK1nm+pJl7qV87/yWiIjgkA4ODx3JtaU3MiQ4jAHhgXikEpK9iR07WUZnsozO9Iv0rzddhEjdLeBs5vJmxx98bVtJvj2/1r5O6aRztEtlS7f6SkApMlW1gLNoF8IihCAYDDa79VV7pby8nOJwCOP0kyg+9UT8n69uUou37TtsvDnfiz7fw68bnHg8BmedYvY5GXxUxxlepZwAq1wrK0Nb37lWExZh7NLOYeHDubr0OoaEjmFAaKBqzdRGcOCgq7EPXY19GmkBFySvRgu42BLQr471LHF/SbGt9pw/SUZSVUODOpphV4iQT/qa7HdbHROuXQiLz+djw4YNSCnr7S9is9kwjPbZf0KU+rEVFiNdTqKdMijx+/ntt99wu924unSheMqEevcNhmDhJ0noOV4+/dKNYQgGDQhywxUFnHVqOcm+9h/qClLO165VlULyresbQiKEXdrpHz6MK0qvYXBoKANDg5r1UlC0Pdy46R7tQfdojwYFKCACVS3g6hChtc41fOJeRJmtrNa+yUZyTB+g6g0PKsJwXaJdSMLTZseEaxfCIoSgW7duvP/++wSDQez22hVyHo+HQCDQAt4lEClxLV2Ja9UaIr33pfzUE2CTeUmj0Sg+n4/+/WuHB6SENT86mZ3jIeddL4VFNrpmR7nxqlK0c8vo3bN9h7qCBPnW9Q3LXOYQKd+4viYkgtikjX7hQ7nEfwVDgsMYGBpEilStDRW18UgPPaO96BntVW8aswVcaR3iU1X/87VrJbn2XEIiWGv/NCOdzGgnolTvA2bHtlv9lvYm7UJYwBxz64wzzmDTpk2Ul9duJ5+WlkZRUTuasjYSJfm5mbi/XEH5iOPwXz622hAsgXIvjz17FEccWUxSkllS25VfNZLwjz87cbskp55czthzyzhuaJA69LhdECLEd67VLHV9yXL3Ur52rSIoyhFS0Dfcn4v9l1YKSarc83HXFAqoaAGXQkokhf05oN50EkmRKIwZ9aB6M+xiWyG7bLtAmKWV2FEWWisJERZN014GzgJ26Lpe65NZ07ThwHxgg7Vqrq7rD+xpvm63mwMPrLsJY1tq698YotRPxtV/J+mz5RTfeSOl4y6vNa7XXQ+ksuQrD4/9x+Dk44Po87x8+GkSkYjg8H4hHppUyLlnBEhPa3+hrjBhvnOuNkNb7iWscn5V2SnvkHBf/uL/K0OCx3B0aBBpMr2FvVV0dASCdJlBeiSDPpHa02HssOUyPHsoQYJtorQCiSuxvAI8DUxvIM3nuq6flaD82y22HXlkXvw3nD+up+Dx+wiMPbtWmtydNmbn+DAMwUzdx0w9mU6ZUa64yBxe5ZCD2tfwKhEirHF+xzL3Er52rGRp1y8rY9t9wocwtuwiBoeGMig4mAyZ2cLeKhTNo4uRXeeYcK2ZhAiLruufaZrWKxG2OzL29Rvp9Ndx2HYVkP/qEwRPHFZnukkPpRG06vqEgOHHlDPt6Xyc7aQzcoQIa51rWOpewnLXUla6luO3+QE4xOjLeQGNIcFhDAoNJdNQQqJo++zJmHAtQUvWsQzVNO1bYCtwh67rP9SVSNO0a4BrAHRdJysra7cyczgcu71va0As/xrH6KvAZiOySCflqMOoq1r56RdsvPehnYrRG6QULFvpRtizaKuHHyXKd+JbvhCL+dy2mKXiS4pFMQB95MFcYFzEceHhHCuPYx9HNyLOCO1wRI96aev39u7Q0Y45iyw+ZDGRzLYRbWgpYfka6KnreqmmaWcA86Du8R10XX8eqJgkRe5uPUlbrmNxL1xMxvV3Ee3amV2znibasxvUOJZIBCY/msqLM5IRQhI7BFw0Cvc+FGLKPcV72fPdw8DgJ8dac6wt9xJWuJZTYjN93y/SmzODZzM4OIwhoaG1wgKRrEibvc67S1u+t3cXdczNwxorbK/RIsKi63pxzO/3NE37j6ZpWbqud6w7pQl4Z84l7a6HCR96MPnTn8TIqh3ayS8UXH97Jl8sc5OZESW/oHrzrlBYsHL1Xh5muBkYGPzs+KkytLXCvYwiWyEAPSO9OCPwZ4aEhjI4OJRso2sLe6tQKBqjRYRF07SuQK6u61LTtEGADdjVEr60WqQk5bHnSPm/Fyg/6RgK/jsV6avdy/vHnx1cMS6T7bl2Hn+wgLGjqvrqtNavOonkZ8e6yrG2VriWVk4mtW+kJ6cETmNIaBiDg0PNuUIUCkWbIlHNjV8HhgNZmqZtBu7Dinrruv5f4Hzgek3TIkAAuEDX9fbX7nV3iURImzAF3+vz8V9wDkVT76Kumvd3FyZxy8R0UpIlb03PY8Bh8Z0BM15IJL861ltjbS1huWsZ+XbzO6J7pAcnBUcyJDiMIaFhdIt2b2FvFQrFnpKoVmEXNrL9aczmyIoaiLIAGddNIOmjLyi55SpK7riuVh8Vw4BHn07hyedSGHB4iBefzG/ROVBqIpFssP9q1ZEsZblrKXn2nQDsE+nGCcETK4WkR/RPLeytQqGIN+2m5317wLargMxLb8b57Y8UTp1I2cXn1UpTUioYd2cGH36axIXn+XloUlE8ZhXeIySSjfYNlWNtLXcvZYc9F4Cu0a4cGzyOwaFhDAkO40/RfdUIsApFO0cJSyvB/vtmOl00Dvu2XApe/Bflpw6vlebXjXauGJfJxk0OHrq7kEsvLGuRybUkkj/sm6wZEpew3L2E7fbtAHSOdqksjQwODqVXdD8lJApFB0MJSyvA+d2PZF78N0QkSt4bzxI++vBaaT76zM1N4zNwOCSvv7CLYYNqT9uaSDbb/6gctHGZawnbHOYU0Z2iWQwJDWOINW/7ftH9lZAoFB0cJSwtjPvTJWRcPR4jM51ds54icsB+1bZLCc+8mMzUJ1Po2yfCy0/l75WJtrbat1QKyXLXUjY7/gAgM5rJ4NBQriu9kSGhY9g/coASEoVCUQ0lLC2IZ847pN/xAJGD9mfXjH9jdO1cbXsgILjtnnQWvO/h7NMDPD65MGFTAW+zba1WR7LJ8TsAGdEMBoWGcmXptQwODeWgSB8lJAqFokGUsLQEUpL89DRSpz5D8NhB5L/4L2RKcrUkf2yxc+XfMlm7zsHEW4u54crSJtWnNHXGuVzbdktIlrLM/SW/OzYC5hwQg4JDuMR/BUODwzgocrCa71uhUDSLZguLpmlC9TnZA6JR0u75F75X51A26jQKH/8HuKr3UVmywsW1t2UQiQimP5vPScfVngSoPuqbca5vuB9vJ81nuTVMygbHbwCkGKkMDg7hr9acJAdH+iohUSgUe8TulFgeB26NtyMdgkA5GeMm4Xn/E0qvv4TiieOInUheSnjlNS/3PZLGfj3N+pT9ezWvPuWm0lt40ze72roIYd7y6bzl00k2kjk6NJgL/BcxJHQMh4T7YqedzvClUChahGYLi67rSlR2A1FQROblt+Ja+R1F99+O/6q/VNseDMHEyWm8MdfHyOHlPPVIASnJzS8YdjGyOb78RD5M+sAc4FjCPtFuXOy/nCGhofQN98ehIqAKhSKBNBrz0DTtpZjfQtO0FxPrUvvDvmUbWaOuxPXtWgr+M6WWqGzfYeP8y7J4Y66PW64r4eWn8ndLVAIiwIOp95miYuEmibl573CN/3oOCx+hRKUd4Zk7ly6DBuFMSqLLoEF45s5taZcUCqAJwgL0rvhh1a3snzh32h+Otb+Qdfbl2HN3smvW05SffUq17V9/5+QMrTM//ezg+f/L5+/jSmKjY01mhWsZZ3QewbTkF7m47DK0sguxSRvnt5EZ5xTNwzN3Lmnjx+PYsgUhJY4tW0gbP16Ji6JV0JTP1zxN064ClgBDUaMQNxnXl1+ReeXtSJ+PvLkvEjmk+pQzs3M8TLg/na7ZUWY9v2u3pgz2Cz//SnmYGcnT2DfSk1l5cxgSGsYOWy6bPX+0mRnnFM0jZcoUbIFAtXW2QIC08eNxrVqFkZGBkZlp/q/4s5alz1dr/DmFIp40RVguxZzB8UZgHXBJQj1qJyTNX0jGLfcS6fUnds18CqN71Twi4TA88K9UXp6VzLFDgjz7WD6Z6c0PfS1xfcHE9L+z2f4Hl5Veye0lE/BKc2j9LkY270c+Is9ofcPmK3YPEQjgXrgQb04O9m3b6k3jmTcPW2FhvXak01m38NT8i9ku09PZraK0okPSFGEJAtuBKPAscCTwVSKdauv4np9F2v2PExx8JPkvP45MT63cll9g49rbMliyws3Vl5Qy6fZiHM2s9igVpTyS+iCv+WbQK7Ifb+yay8DQoDgfhaJVEI3i+vJLvHPnkvT++9hKS4l27YpMTkaUltZO3r07O1asgGgUW1ERtvx8bAUFiIICbAUFlcu2mGXH+vWVyyJSd6lZCoGRno6sQ3TqXU5PB1frnWBOkTia8kqbCSwGLtR1/SlN0x4GRiTWrTaKYZA6+QmSn59F4IyTKXhqMiS5Kzev+dHBlX/LZGeenSemFDDmnEADxurmS9dnTEi/g232rVxZei23lvwdj/TE8ygULY2UONeswTN3Lp7587Hn5mKkpFB+5pmUjR5NaOhQPPPnkzZ+fLVwmOHxUDJhgrlgt5sv98zaM442lK8oKakmOjVFqOK3fetWnGvWmIJVXl6vSSM5uVEhkhkZRDMzK0VLejwqVNfGaYqwdNZ1/b+apmkJ96YtEwyRfus/8M7/H6WXj6X4/tvBXtU/ZP77Sdw2KZ30NMnc6XkccWjzJuUqEcU8nPogs32z6B3eHz1vHgPCA+N9FIoWxL5pE56cHDw5OTh/+QXpdFJ+0kkERo2ifMQI8FR9QARGjwYgZepU7Fu3Eu3WjZIJEyrX7xZCIFNTiaamEu3Zs+n7BQKNCpGtsNAsHW3YYC4XF9drTrrdDYbljMxMRM+eOO32ymWZmqrEqBXRFGHZoWnaWMCjadoooO7gbgdGFJeQeeUduJespPjuv1F6/SWVN3k0Cv98KoWnX0hh4BEhXnginy7NnJRrsfsTJqb/nR22XK4puYGbS24jCVVKaQ+I/Hw877yDZ+5c3F+ZEebgoEEUTp1K4MwzkQ2UOAKjRxMYPbrlp6D2eDA8HoxuzZhGOhw2xaaB0lFF+M6xbp25vbAQYVQ9O7Ej60m7HSM9vc7SUL2lpfR0mh2HVjSJppzVK4CrgK+BHsDVCfWojWHbvpNOfx2H45ffKHjyAQLnn1m5rahYcNOdGXz8WRIXne9n8t3Nm5SrWBTxUNr9vOmdzYHhg/hP/gscHj4yAUeh2KsEAiQtWoRn7lySPvkEEQ4TPvBAiu+8k8CoUUT/1AFm1XQ6MTp3xujcufG0FRgGorgYW0EBmYZBcUXppw5hcmzahO3bb816o2D9QyIZqanVhSc9vdH6o9iSo6JumiIsB+u6/rSmaV2Ay4BewE+JdKqt4PhlA5kX3YStsJj86U8SPGFo5bb1vzm4fFwmmzbbefjeQi4ZW9Ys2x+7P2RS+gTybDu5vmQc40puxY278R0VrZNoFNeSJWYl/HvvmZXw2dn4r7iCstGjifTrp0I5jWGzIdPTiaanI7OyCO7fhC51UiICgWrCI2oKUcXvvDwcv/xiLvv99Zo0PJ7GS0M1m3gnJ+/29fXMnVsZ8uwSj5DnXqApwvIYcDLwAGYl/jTM/iwdGtdXq8m87Fak08mut54nfOghlds+/NTNuDszcLkks1/axZCBTZ+Uq1AUMDntPuZ536JP+BCey5/GoeHDEnEIikQjJY4ffsBbUQm/fTtGcjLlZ5xhVsIPG1atHk6RAIRAer1EvV6iPXo0fb9gsCpU11DdUUEBzs2bTcEqKkLIursNSKezSaWhyhBeZiZGWlqtRhoVHWGBVi0uTREWr6ZpbsCt6/rrmqZdl2inWjtJH3xCxo13E90nm12zniLa07xhpYR/P5/Mv55Kof8hYV56soDuzZiUa2HSB9ybdhcFtnzGldzKDSV/w4VqrtnWsP/xR1Ul/M8/Ix0OsxL+vvsoHzlShVLaAm43RnY2RnZ20/exmniLmkJUhyg5fvutqi6pgSbeQC2xsgUCpEyd2uaFZSYwH7hP07QkYENiXWrdeF+dQ9qkfxI+vC/505/AyMwAwO8X3DopnXcXehh1Zhn/ur+oyZNy5dvyeSD1Ht72zqNvuB8v75pB30j/RB6GIs6IgoKqSvgVKwAIHn00hQ8/TOCssxqshFe0E6wm3mRm0uTPSSkRpaX1loaS/+//6s5q69a4uZ0IGhUWXdefAZ6JWXVZwrxpzUhJyiP/IeWplykfcRwF/33YbG8PbNps54pxmaxb7+CeO4q49jJ/k8OpHyS9y71pEym2FXFL8R1cV3oTTpyN76hoecrLqyrhP/7YrIQ/4ACKx483K+H33belPVS0doRApqQQTUmp837x6DqOLVtqrY82pwVeC6Da2jWFcJj0vz+Id847+C8aRdGUCZXNFD9fD08kygAAF3hJREFU5uK62zKREmY8m8/wY5s2KVeeLY/70+7mPc879Asdyoxdb9AnckjjOypalmgU19KlZqjr3XexlZQQ7dIF/+WXExg9mnD//qoSXhE3SiZMaLgjbCulXmHRNC1D1/WCvelMa0T4y8i4ZjxJny6l+I5rKb3lavMrQ8KLM3xMfjSV/XuZk3Lt17PxArBE8m7SAu5Pm0SprZTbi+/k6tLrVSmlNSMljrVrzUr4efPMSnifr6oS/phjVCW8IiEkpCPsXqChEsvDmqZlAL8AC4Eluq43f/jdNoxt5y4yL7kZ5w8/U/ivSZT9ZRQA5UGYcH86c+Z7Oe3kAE8+XEiyr/H6lDzbTu5Nm8j/PO9xeOhIpu56jIMifRJ9GIrdxL5li1kymTsX57p1SIeD4PDhFN17L8FTTqkMhSoUiaTVdIRtBvUKi67r1wFomnYgMBK4RtM0ASwH5uq6vnnvuNgy2H/bRKe/3oQtN4/8lx4lOPJ4ALbl2rj65ky++d7F7TcWc8t1pY0O+iqRLPDk8EDaPZSJAOOL7+bK0mvUpFutEFFYaFbC5+TgXrYMgNDAgRQ+9BDlZ5/dvLG3FIoOSlMq73/BLLX8R9M0OzAY2Adot8Li/GYNmZfcDFKya85zhAccCsBX3zi55pZM/GWCl/6dz2kn1z/4XgU7bLnckzaBRZ6FHBk6ikcKH2f/yAGJPgRFcygvJ+mjj6oq4UMhwvvvT/Hf/25Wwjdn3CyFQtG8T2Zd16OYE361W9yLPifjugkYnTuxa+ZTRPc3Xyqvvell4uQ0uu8T5Y2XdtHngIajghLJXM8cHkz7B0ERZGLRvVzmvwo7KhbfKjCM6pXwxcVmJfyll5qV8IceqirhFYrdRMViYvC+Po+0O6cQ7nsg+dOfxOiSRTgM901N49U3fJwwrJxn/lVARiOTcm2zbWVS+p18mvQxA4ODmFr4GPtFeze4j2LvUK0Sfts2sxL+9NMJnHcewWHD1KCECkUcaPQp0jTtNF3XP9A07QDgVmC2ruufJd61vYiUJD/xIqmP/pfyE4ZQ8Pw/kck+8naZk3ItW+nmustLueuWhiflkkje9MzmwbR/ECHMPUUPcIn/cmyomfdaEtuWLXjnzTMr4X/6yayEP+EEiu65R1XCKxQJoCmfZ3cAHwATgReAfwNHJ9KpvUokQtrd/8Q38y3KzjuTwkfvAZeT79c6uWJcBvkFdp56pIDRZzU8KddW+xYmpo3n86RPGRwcysOFj9Iz2mvvHIOiFqKoCM+775o94ZcuBSB01FFmJfyf/4zRqVMLe6hQtF+aIiwpmqbtC0R1XV+qaVr9w362MUQgQPoNd+NZuJiSmy6nZMKNIATz3vVw+71pZKRLcmbk/X979x4dVXnucfw7k/sFBDKgxktB9PSqAm2xp55C1ap4OQiojxatxZ5VVldrvS+KvXGsoGA9CipiU++rCj7lItaFtVXUVi1WrW0FUZeXnmMIKgQSEHIhmTl/7AmEkGAge2aSmd9nrVnJzOw9eV4Ufnn3s/e7OebzXd+UK0GCRaUPMrv/dcSJc23dLCZvv0izlExobCTyyCMMvO8+ip96ikhzMy1HHMGWq68OmvBDh2a6QpGc0J1guQGYCcxMrhW2KrUlpUdkUx0VU66g4G+vUTdzGtsvPo/WVrjhlv4suLec0aOaqLplM4NjXd+Uqzrvfa4ZcDUvFD3Hvzcdzw11N3FYq5bxSKt4nMJVq4Im/GOPEd2yhejgwWy76KKgCX/MMWrCi6RZd4JlGDDd3dtWPevdawl0Q977NQy64BLyq9ezuepGGk8/kbr6CJdMG8jTzxVz0XnbuHZ6PYVdLCwcJ85DpQ8wp/8sokSZWTeH87dfQAT9A5Yu+WvXBveEf+QR8mtqiJeW0njaaRRMmcKGY45RE14kg7rzt+9d4EYzOwD4HbDY3TeltqzUyV/9BhXfupRIUzO1C++g+biRvPV2cFOudTV5zJlRx4XW9U25/jfvX1wz4GpeLPoLX2scy/X1v6Sy9ZA0jiB3Rdeto3T58qAJv3Ytibw8msaOZetPfkLjKaeQKC0lFotBH7k6WSRbdecCyeXAcjMbBNwJ3GRmTwDz3P25VBcYpqLlf2DQpT8DYMMfHqLl08N5YmUxP/zRAEpLEvz23lq+PKrzm3LFifNA2b3c1O8G8shn9ub/4ZyG8zRLSbFIfT0lK1ZQsmQJhatWEUkkaB41irqZM4MmfCyW6RJFpIPunG58GnA+MJBgxjIViBDco2VMSqsLSfTDDeRfeCmDnn4eCIpvPmo48+4o56b5/Tn2883cdesmKg/qvJ/yXt67TB9wFS8X/ZWxjScyq24OB8d797LVfVpTE8UrVwZXwj/1FJGmJlqGDWPrVVfRMGECrcOGZbpCEdmL7hwKOxr4sbvvdlMAM/tuVzuY2T3AmcBH7r7HHauSa47NA04HtgNT3P1v+1J4d0Q/3ED53LsoW/gIkR27Xyk/9YqBPP5kCWeP386cGXWUFO+5fyut3F92Nzf1n0NRoogbN9/CpIZzNUtJhXicwhdf3NWEr6+nNRZj24UXBk34Y49VE16kj+hOsDwM/MzMyoFvAxe7+13u/uZe9rkPuB14oIv3TwOOSj6OAxYkv4Zq4Pd/TOGLr3Z6H+onVhYzY1o9372o85tyvZv3NtMGXsmrha9wUuPJXFc3mwPjB4VdYs7Lf+ONoAm/bNmuJvy4cTRMmkTT176mJrxIH9Sdiy3uBuYClcm1wr75STskr8zfW4P/LOABd0+4+ypggJkd3J2C98XmBTew7aJziBcV0RzZ/RSvB6tqmfrtPUOllVaqyhZwxpBTeC//HW7efBu/2nSvQiVE0ZoayhYsYPDJJzPkpJMov/NOWj7zGTbffjsf/uMf1N12G00nnKBQEemjuvM3N8/d3zCztudhXPl3CPB+u+fVydfWd9zQzKYS9HVw9+Csn+6KxaDqJn5UcA1DH5jHxc0PkxeJU5RoZtJ/9t9j8zdYy/fzv8sr0Zc4M34WN7fcyoFlB0HZvg2ut8jPz9+3P69Uqq8numwZ0YULiTz7LJFEgvjo0bTcfDPxc88lOmQIZfT8j7pXjTlNNObc0JfG3J1gWWlmdwKVZjYP+GMIP7ezg+Wdruzo7lVAVds2+3qjmw83RJn7yEG0lM5mVukVvHLi9VS8+ffdbpjTQgu/Ll/Arf1upjxezrxNd3BG43giRNhI3z11NeM3Bmpqovjpp4Mm/JNPBk34oUPZfuWVQRP+iHYLc4ZUZ8bHnAEac27oyZgrK9N7slF3Tje+zsy+ADwFvAn8Xwg/txo4rN3zQ4GaLrbtkbkLynce7qotGMI1Q2Zx/W1bdr7/Zv5apg24ktWF/+T0hjOZUT+LWLxv/FbQK8XjFL70EiVLlgTL0dfV0VpRwbYLLgia8CNGqAkvkuX2ds/73wIXuHuzu68GVpvZpwluUzy6hz/3UeASM1tE0LSvd/c9DoP11Icbojz8SBk7dgT/kDXviPDwsjIu/97HDBzcxJ3ltzO/3zz6xftz+6ZfcVrjmWGXkDPy33xzVxN+3TriJSXBcvQTJwZN+IKCTJcoImmytxnLImCFmZ3j7nVmdipwHXDRJ32omS0Evg7EzKwamAEUALj7ncAKglON3yY43fjingyiK3MXlJPocGlKPA4/r2pm3dwzeL1gDWduP4sZW2YyKK5bzu6r6Pr1lCxfTunSpRSsWRNcCT9mDFunT6fx1FNJlPXR5pSI9Egk0cmpuG3M7KvAL4EngK8C57t7XZpq60yipqb7R8yOPK+ehtWf3eP1yNGvcsjTp3Jd/WxOaRwXZn29SiqOQ0e2bKH48ccpXbKEwhdeCK6EHzmShokTaRg/nvjgwaH+vH2lY++5QWPeN8keS9qOQe/tUNh1BA31dcCVBNeaXGlmuPvP01Rfj0x+fDZetogdkd2XaRm6YxhLPnqaAYmBGaqsj2lu3tWE/+MfdzbhP77iCrZPmEDr8OGZrlBEepG9HQp7Mvn1KWB+GmoJ3SUfX87isod3e60gUcCi2qUKlU8Sj1P48stBE/6xx3Y14SdPDprwI0eqCS8ineoyWNz92XQWkgpD4gdyzrbz8LKF7IjsoCBRgG2bzOD4kEyX1mvlv/XWriZ8dTXx4uJdTfgxY9SEF5FPlPWXNrefteSRxw8/vjzDFfU+0Q8+oCS5HH3h6tUkotGgCT9tGo3jxqkJLyL7JOuDpW3WsrDsN5y97TzNVpIiW7dSvGIFpcuWUfjcc0ETfsQI6q+9NmjCD9Gfk4jsn6wPFghmLe+Vvpszs5WSpUvpN3s2eTU1DKmsZOv06TRMmgTNzRQ98wylbU34xkZaPvUpPr7sMrZPnEjrkUdmunQRyQI5ESxD4gfyeMtTbIxn/+mJJUuXcsC0aUQbGgDIX7eOAVdfTcmiRRSuWRM04QcNYvv557N94kR2fPGLasKLSKhyIlhyQiJBpL6e/jNn7gyVNpGmJoqef56GCROCJvzYsWrCi0jKKFh6s8ZG8mpriW7cuPOxx/ONG4nW1hKtrSWyY0fXnxWJUDe/T541LiJ9jIIlneJxonV1uwVDtLY2CIcNG3Z93xYWW7d2/jHFxcQHDyYei9FaWUnzMccQr6ggHotRPm8eeZs377FPa5pXNxWR3KVg6aFIQ8OeQbFhw+6h0X5W0dq6x2ckotGdwRCvqKB5xIhdz2MxWpNf2x6J0tIu64lXVOzWYwGIl5Swdfr0lIxfRKSjrA+WLs+Q6kprK9HNm3c/1NQ+HDq+tn17px8TLy/fGRQthx9OfNSoICwGD6a1XWjEYzHiAwZAXl4o420bW9uYW7szZhGREGV1sHR6htRVV1H4/PO0Hn74rqBIHoaKbtxIdNMmIp0szJnIy9sZFK2xGC1Dh+4+o2g/w6iogJKSdA93p4ZJk2iYNCknF+oTkczL6mDpN3v2nmdINTdTtmgRAPH+/XcFxfDhxEeP3jMokjOMxAEHQDSMuzKLiGS3rA6WvC6W2E9EIqx/5x0oKkpzRSIi2S+rfwXv6kyo1spKhYqISIpkdbBsnT6deIdeh86QEhFJraw+FKYzpERE0i+rgwV0hpSISLpl9aEwERFJPwWLiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIRKwSIiIqFSsIiISKgULCIiEioFi4iIhErBIiIioVKwiIhIqBQsIiISKgWLiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIRKwSIiIqFK2T3vzWwcMA/IA+5y99kd3p8C/BJYl3zpdne/K1X1iIhIeqQkWMwsD5gPnAxUAy+Z2aPu/nqHTR9290tSUYOIiGRGqg6FjQbedvd33b0ZWASclaKfJSIivUiqDoUdArzf7nk1cFwn251tZmOAt4Ar3P39jhuY2VRgKoC7E4vF9qug/Pz8/d63r9KYc4PGnBv60phTFSyRTl5LdHj+O2ChuzeZ2feA+4ETO+7k7lVAVdtnbNy4cb8KisVi7O++fZXGnBs05tzQkzFXVlaGXM3epSpYqoHD2j0/FKhpv4G717Z7+mtgTopqERGRNEpVj+Ul4CgzG2ZmhcD5wKPtNzCzg9s9HQ+sTVEtIiKSRimZsbh7i5ldAjxBcLrxPe6+xsx+Abzs7o8Cl5rZeKAF2ARMSUUtIiKSXpFEomPro1dL1NTUfPJWndAx2dygMecGjXnfJHssnfW+U0JX3ouISKgULCIiEioFi4iIhErBIiIioVKwiIhIqBQsIiISKgWLiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIRKwSIiIqFSsIiISKgULCIiEioFi4iIhErBIiIioVKwiIhIqBQsIiISKgWLiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIRKwSIiIqFSsIiISKgULCIiEioFi4iIhErBIiIioVKwiIhIqBQsIiISKgWLiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIRKwSIiIqFSsIiISKgULCIiEqr8VH2wmY0D5gF5wF3uPrvD+0XAA8AXgVrgPHf/V6rqERGR9EjJjMXM8oD5wGnA54BvmtnnOmz2X8Bmdz8SuAWYk4paREQkvVJ1KGw08La7v+vuzcAi4KwO25wF3J/8fjFwkplFUlSPiIikSaoOhR0CvN/ueTVwXFfbuHuLmdUDFcDG9huZ2VRganI7Kisr97uonuzbV2nMuUFjzg19ZcypmrF0NvNI7Mc2uHuVu3/J3b+U3Ge/Hmb2Sk/274sPjTk3HhpzbjxCGHPapCpYqoHD2j0/FKjpahszywcOADalqB4REUmTVB0Kewk4ysyGAeuA84HJHbZ5FPg28BfgHGClu+8xYxERkb4lJTMWd28BLgGeANYGL/kaM/uFmY1PbnY3UGFmbwNXAtNTUUs7VSn+/N5IY84NGnNu6DNjjiQSmiSIiEh4dOW9iIiESsEiIiKhStmSLr2Fmd0DnAl85O5fyHQ96WBmhxEsl3MQEAeq3H1eZqtKLTMrBv4EFBH8f73Y3WdktqrUS65y8TKwzt3PzHQ96WBm/wK2Aq1AS/JShKxlZgOAu4AvEFyS8R13/0tmq9q7XJix3AeMy3QRadYCXOXunwW+AvygkyV1sk0TcKK7HwuMAMaZ2VcyXFM6XEZwgkyuOcHdR2R7qCTNA37v7p8BjqUP/PfO+hmLu//JzIZmuo50cvf1wPrk91vNbC3BSgevZ7SwFEqeqv5x8mlB8pHVZ6aY2aHAGcAsgjMrJcuYWX9gDDAFILlEVnMma+qOrA+WXJcM1ZHAixkuJeWSh4VeAY4E5rt7to95LjAN6JfpQtIsAfzBzBLAr9y9z5yGux+OADYA95rZsQT/f1/m7tsyW9be5cKhsJxlZuXAEuByd9+S6XpSzd1b3X0EwUoPo80sa3tqZtbWN3wl07VkwPHuPopg9fQfmNmYTBeUQvnAKGCBu48EtpH6a/56TMGSpcysgCBUHnT3pZmuJ53cvQ54huzurR0PjE82shcBJ5rZbzJbUnq4e03y60fAMoLV1LNVNVDdbva9mCBoejUFSxZK3n7gbmCtu9+c6XrSwcwGJ8+ewcxKgG8Ab2S2qtRx92vc/VB3H0qwZNJKd78ww2WlnJmVmVm/tu+BU4DVma0qddz9A+B9M/t08qWT6AO90qzvsZjZQuDrQMzMqoEZ7n53ZqtKueOBbwGvmdnfk6/92N1XZLCmVDsYuD/ZZ4kSLCP0WIZrkvAdCCwzMwj+/XrI3X+f2ZJS7ofAg2ZWCLwLXJzhej6RlnQREZFQ6VCYiIiESsEiIiKhUrCIiEioFCwiIhIqBYuIiIQq6083FumMmY0FZhD8ctUK/MzdXzCzeuBvBGuNfQeoBL7h7j9N7vffwDPu/ky7zyolWF7l35L7Vbn7/T2obQDBgpo5dWGrZA/NWCTnmFkMuBaY4O5fByYADcm3X3P3E4CrCNbh6o4ZwLPJz/oP4L0eljgAmNTDzxDJGM1YJBedDvymbf00d98KvNphm78TrDnWHV919x8lPytBcF8YzOxWgiX8twAXECwG+g13/6mZTUnu+wxwD7AJGAacBUwFTjazZ4Bz3X3Dvg9RJHMULJKLKoHXAMxsMvB9YJW7X91umzHAm/v7A8zsy0CZu48xswuB79H1CtMDCZag+SZwNlAFHJ4LS7RIdtKhMMlF6wnCBXd/CLgQiCXfO9rMniYIm9lAI8FdKdsUs+uw2d4MJ+jVQHCHxyPZ/f4wkXbfv+7ucWAdwWEwkT5NMxbJRSuAxWbm7l7P7n8P2nosAJhZMzDSzNp+CRsF3Njh814wswvc/cHkAqDHE6zpdEry/S8B7wD1BGuaARwN/DP5fcfA2QHk9WSAIpmkGYvknGTP4lpguZmtBO4AHuhi21qC2w/8CfgzsNjdN3XY7FpgbLIn8jww3N3/CjSY2Z+BycCdBEFSaWYrgMF7KfEDYJCZLTazQfs5TJGM0SKUIiISKs1YREQkVAoWEREJlYJFRERCpWAREZFQKVhERCRUChYREQmVgkVEREL1/z+wxddRQKSsAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Managed Memory Plots\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXoAAAEkCAYAAAAhJPoXAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZwUxdn4v9Vz7cXusrtcy3LILWAEEc9ovI/E10SUVqNgjFFjEqOv5keI8TW+khCTqDExhsQcKODVKkbjGxXRBMUjEQEFgeUQBBfk3vuYme76/dE9586ws7sze8zW9/OZz8xUV1dX9fQ8VfU8Tz0lpJQoFAqFInvRursCCoVCocgsStArFApFlqMEvUKhUGQ5StArFApFlqMEvUKhUGQ5StArFApFlqMEfTsRQowUQkghxBe7uy59gUT32/l+dXdcuxNl7RBC3Bn1/V9CiD+3cc7dQoitnb12KqTrngohHhVCLE9HnRRtI4T4hhAi2FY+d1dUJsvYBQwBDgIIISqctDOllP/qaKGOMHkLOEpKuaPz1cxqhgDV3V2JdjIdaOzuShyB3nhPFSmiBH07kVKawOfddX0hhAcIyj680k1K2W33v6NIKfd3dx2ORG+8p4rU6XbVjRDii0KIt4UQdc7rQyHE+c6xhFNnIcRWIcTdUd+lEOJmIcTTQogGIcROIcRlQogiIcTjTrmfCCEujTonVPbXhRCvCiEahRCbhBBfEkIMFUL8wylrgxDitATnheq0y3n/p5O+4wht/aoQYo1zrWohxH+EEFOFECOxR/MA251y/uWc86gQYrnTvh1AC5AvhPAIIe4VQlQJIfxOPb8edz0phPiOEGKxcw92CSHmxOUpFUI847R1rxBinhDisVSn30KICiHEc0KIA0KIJuc+/7+o424hxF1CiG1CiBanvg9FHb9FCLFWCFEvhPhcCPGUEGJIG9eMUTN0QTuPEkK87rRvuxDiqqhyU31GY1Q3CdrkE0IsEELUCCEOCyEWAL62KpZi2/sJIf4ohNgvhGgWQqwSQpyXoJz23tP+IvKf2yuE+CkgEtTxZue/1SyE2CKE+LEQwu0cGyOEqBVC/HdU/qOdMm9Kof0d/R8LIcSfnOcy9NzOF0L4ovLc7fyOX3XKbBBC/FMIMTruHiwRtsxpEkJUCiFuF0KIqDyaU/Z+5zl/Sghxq4hTuQghzhW2LGxy/icLhRClcXWeJ4TYFyoH6N/WPQJAStltL8AFHAIeAMY6r0uA05zjIwEJfDHuvK3A3VHfJfYo+xpgDPB77Gnyy8A3nLSHgAagNK7sbcDXgHHA88BuYLlTj3HAc9jC3JOoTsBU5/sMYDAwIElbBwN+YA5wFHA08HXgGOc+XOyUM93JW+Kc9yhQ69RtipPfDfwKW30006nnHYAFnB13X/YC1wOjge87aWdG5XkR2AycCUwCFgI1wPIUf8MXnfs1xbk3ZwJXRh1/DNgHzHLqcBLw31HHbwHOce7JycA7wIqo462eAef71ZluZ9S1dwNXAeOBnzr3+fh2PqM7gDujvv8L+HPU91879+mrwATgPud339rG/U+l7c841z8f+7n7DfazOKGT9/R5p51nOfd0iVPn5VF57gY+xf4/HQV8GdgJzIvKcxX2AOY4IAf4CHguxecvdP/b+z/WnN/yRKeMi4E9wP/G1b0BeAWYBhwLrCH2+RwM/NCp+1HA1UA9cG1UntuctFnYMu42bLkXjMpzFrbMutnJMx34J/AmIKL+Kw3Ycm4ctiypji4n6X3qKqGe5Efq7/xIZ7TxI6Yi6B+M+j7ASXsowbUuiiv71qg8052026PSQoJ8cqI6ARVHakOCckYmOf7FRMexBX01UBCVlof9x/hOXN7ngTfi7stv4/JsAn7ufB7r5InuHDzYf4hUBf2H0b9F3LExTvmXteOZCN2nocmeARILpbS3M+ra8+LS3wGWtPMZ3UESQQ/kA83A9XFlrCI1QX+ktod+gy/H5VkN/LUT9zRU7rlRx71AVeieOs9pI3BBXDmzgeq4tIXYHfFC514Vp/i8hO5/u/7HScr6b2BL1Pe7gSBRgzfgCuyOPucI5fwGeC3qe1WCZ+gpYgX9v4B74/IMd+o8xfn+GfCzuDzPkoKg71bVjZTyMPBn4FUhxMtCiLlCiPEdLO7DqHL3Ayb2yCD6Wn5gYLLziOjeP0qQFn9ee/kIeBVYL4R4Xtgqi2EpnrtRSlkf9X0M9p/qzbh8K7BHVtGsjfteBQxyPk903t8LHZRSBrAFTKo8CNwhhPi3EOIXQojTo44d57wvS3ayEOIMZ8q9SwhRB6x0Do1oRx0gs+18N+7721FlpoPR2Gqad+LSVybIm4hU2h7/rLxJ62elI+WG6yyl9APvR+WfBOQCzzmqhnohRD3wR6BICDEgKu/3sGeqs7FnhO01DLf7fyyEuN55bvc69fo5rZ+73TLWvlKFrZ4a6JShOXJrrbDVl/XAt0PlCCEKgXKinj2H+GdqOnBr3H3a4Bwb65QzlA4+I92uo5dSXo89LXoN+BK2ILzROWw57/F6P0+CogIppElatzkQdzxZWqfulbSNuBdiT9HeBy4FNgshLkrh9IZkxcZ9FwnS/AnOiW9L/DkpI6VciP1Q/wHbc+NlIcSSVM4VQgwH/oE9grsCOB57Cg12R9YeMtrOOKKfx/Y8o22V19H6pdL2RNds63pHKreVLj4BobwzsVV7odcx2LOsQ1F5x2ALROl8bi/t+h8LIWYCDwNPY6uTpgL30Pp3S3QPwuUAtwM/wlYNn4vdvj8TeX5T/W014BfE3qcp2Pfp5XaUk7TwbkdKuV5K+YCU8kLgL8ANzqFQT1oeyiuEGIjds/UUQg+Cq62M0uY/Usr5UsrTsUfg17a3HGy1QAt2xxjN6cDHKZwfIjRiODmU4BjJprWjDKSUe6SUC6WUs4HrgKucEchqJ8t5SU6djj3iu1VK+baUspLIiDGddLadJ8V9PxnY6HxOxzO6Ffv3PzUu/ZR2lJGM0PNwelz6abTvWUlWbriOQggv9m8anacZGCWl3JrgZTrn5WGrMp7FVp/8XggxthN1S4XTgTWO3PlASrkFWw3UkXJekVL+RUq5Rkq5FVs4AyClrMG2F5wcd178M7UKmJTkPtU75VTR+hmJ/56QbnWvFEKMwTb2/B1bX1qO/QCuBpBSNgkh3gbmCCE2Ydf3Z9hCrqdwANvQcp4Q4mOgxVETxSCEOAU4G1uNsQf7YfgCdscGtsHKAr4shHjaKacm0QWllI1CiN8C84QQ+7Gn2DOxDXnnplpxKeUWIcTfgYedWdR+7BFKISmOHIQQv8MelVdiG9JmYP+WdVLKWiHE49h/3Bzs6WoJcIqU8jfAFuc6tzv5jgXuSrX+qZKGdl7nPH+rsI1tJwO3OmV3+hmVUjYIIf4A/FQIsRf7Xl6HbZTdl2o5ScreJoR4Bvs3uBH7ObsJmIztDNDRcrcKIV4kck/3AnOBflF56oUQ84H5jhPKa9j35xhgqpTyh07Wh5z0m6SUdUKIc4GnhBAnO+qgTFCJ/bt+FVgPXIT97HaknFlCiDOxBfFsbANvtAy4H/hf5/n4D/AV7MFP9LN3F7BMCPFrbAeGOmwZMRP4npSyySlnnlPOe9iz33NSqWR3j+gbsBvzFLYh5jlsHdT3ovJ8E1uQvuPkewRbUPYIpJQW8F1AxxZwa5JkrcEWEC9gC7i/Ao8D85xy9mJPAedit++FNi79Y+BP2Dryj7EF0NVSytfb2YRrsR/0l7ENQlXYf8jmFM8XTh3WY+t984ELpWMpcsr/I7aHw0Zsg/FRAFLKj7C9DG7EHnX/AEeAZoDOtHMu9izzI+w/8jVSymhddDqe0bnA34DF2MKgGFu1kA6+hW0fWoKtyz4V2ylhUyfL/Sb2IOMl7NlpFfbvG0ZKOQ97lP4t59orne87AIQQOvaze4WUss457Vpsb5Z7O1m/I/FH7Hu9EPs/eyK28bW9zMNu+wvYA5n+wG/j8jwI/A7bSLsGezR/P1HPnpTyn9hq3WOwXa0/wvbEqiOigvqNU/avse/7ydjqpjYRkf+jQgFCCBe2d8WLUsrbu7s+maKvtFPRMxFC/BU4VkrZLjVpR1ErY/s4jpfMQOyRRj/s0dZIbLfOrKGvtFPR8xBClGP78/8T2xvwv7Bnht870nnpRAl6hQu4E9vTIYCt3jhTSrnO8YrZcIRzb5RSPt4FdUwHSdvZrbVSHBEhxMvYdrtEvOU4cPR0TGxd+zxsO9ZWbHvEn7qqAkp1o0iK45ky8ghZ9kbpVRWKtCOEGIrtmZWIJillVVfWp7eiBL1CoVBkOd3tdaNQKBSKDKMEvUKhUGQ5StArFApFlqMEvUKhUGQ5Pcq9Utf1v2IvRd5nGMbkNvIOx14qXIztOjfXMIx/ZL6WCoVC0bvoaSP6R4ELUsx7J2AYhjEVO/Lh7zNVKYVCoejN9KgRvWEYb+q6PjI6Tdf10dgxPwZgb2JwvWEYm7ADAhU62YqwI8QpFAqFIo6eNqJPxCPAzYZhTMMOehUaud8NXK3r+mfY0RNv7p7qKRQKRc+mRwt6XdcLsONdP6Pr+lrsiHOhjaOvBB41DKMCe+OAxbqu9+j2KBQKRXfQo1Q3CdCAasMwpiQ4dh2OPt8wjHd1Xc8Byuhk/G6FQqHINnr0CNgwjFpgu67rMwF0XRe6rh/rHN6JvZEHuq4fjR0saH/CghQKhaIP06Ni3ei6/iRwBvbIfC/wE+ANYAG2ysYDPGUYxj26rk/E3nijANswO8cwjKSbUCsUCkVfpUcJeoVCoVCknx6tulEoFApF5+lJxlg1tVAoFIr2I9rK0JMEPbt3d2zNU1lZGQcOHEhzbXo2qs3ZT19rL6g2t5fy8vKU8inVjUKhUGQ5StArFApFlqMEvUKhUGQ5StArFApFltOjjLHxSClpaGigLV//pqYmgsFgF9Wq+5BSkpOTg9fr7e6qKBSKTjBoynm49h8Mfw+ZVM0Bpexdm/51nz1a0Dc0NODz+fB4PN1dlR6BlJK6ujrq6uooKyvr7uooFIoOEi3kU0nvLD1adSOlVEI+CiEEhYWFHD58mK1bt3Z3dRQKRS+hRwt6RWJcLherVq3q7mooFIpeghL0vZS+YJNQKBTpIesEfe7SpQw84QSGVFQw8IQTyF26NG1lHzp0iFmzZnHaaadxzjnn8K1vfYuDB2N1avPnz2fEiBE88MADMemWZXH99deHz73iiivYsWNH+Pg3v/lNzjnnHM477zwuueQS1q9fn7Z6KxSKvk1WCfrcpUspmjMHd1UVQkrcVVUUzZmTNmEvhOCmm27irbfeYvny5YwYMYL58+eHj9933318+OGHvPPOO6xcuZKHH3445vyZM2eyYsUKli9fzvnnn8+cOXPCxx588EGWL1/OsmXL+Pa3v83tt9+eljorFIqehzmgtF3pnaVHe91EU3jXXXg2bDhiHu8HHyD8/pg0ramJ4ttvJ++JJ5KeF5g4kdp77mmzDv379+eUU04Jfz/uuONYtGgRAA8//DDbtm1j8eLFeL1ennjiCW6++Wb+9Kc/cf3116NpGuedd1743GnTpvHnP/850r7CwvDn2tpaNC2r+mCFQhFFtAtlV8T36TWCPiXihHyb6Z3AsiwWLVoUFt7f/e53Y47n5OTwpz/9Ken5Cxcu5Nxzz41J+8EPfsCKFSuQUvL444+nvc6K3oF424t7Vim1LQKPbwjBxQeRp6b/GVb0HXqNoE9lxD3whBNwV1W1SjeHDuXgs8+mtT533nkn+fn5XHvtte0+d8GCBWzZsoVnnnkmJv2+++4D4Nlnn+WnP/0pixcvTktdFb0H8bYX9zUliBY78qxoEbivKSH42CEl7BUdJqv0A3Vz52Ll5sakWbm51M2dm9br3HPPPWzfvp0FCxa0W8WycOFCnn/+eRYvXkxuXF1DXHbZZbzzzjscOnQoHdVV9CLcs0oRTbHPlGjScF9ZirYwD+2FHMRKL2KjG/ZpoJyvFCnQa0b0qdA0YwYA/e69F9fu3Zjl5dTNnRtOTwf33nsvH330EYsXL8bn87Xr3CVLlrBkyRIMw6B///7h9IaGBqqrqxk6dCgAy5Yto7i4OCaPom8QGsm3SjcF7juLEx6TxRaUmshSC0otZInzXhb6btrvpRaUWKAiaPQ5skrQgy3s0ynYo6msrOShhx5i1KhRXHzxxQAMHz6cv/zlL22eW19fz9y5c6moqOCKK64AwOfz8dJLL9HY2MiNN95IU1MTmqZRXFzMo48+ihBtbhyjyDKkTyYU9tInCfxnL+KABgc1xEENDmmIgy44GJW2zY32Hw0Oawgr8fMjC22Bb3cEZrgDCHcU4Xe7g6B94xlFDyTrBH0mGT9+PFUJbACpUFBQwGeffZbw2IABA3jppZc6UzVFlhBcfNDW0Uepb2SuRfCxQ1DmCGdS2HfTBGqiOoCDmt1JHAp9d9nvO91oa5zjZpKOoSBuplBqQZkZ+z3qnVy1K2hPQwl6haIHIU/1E3zskK2rbxFIn+yYIdaFraYpsZBjnbKPlN8CaoQt8A+5YjuI0OzhgAt2u9DWe+z0QJKOIdeyO6WEMwWngyiL6hjyZAq7nio6gxL0CkUPQ57qJ/DJnq7dP1UD+kvobyIx7XocKb8E6kRUh+CK7RgOaohDGuzX0Da57eNJ7A8yR0KJrUJqGOzG1a84piOQpWZMh0E/1TG0FyXoFQpF+xFAoYRCE3mUCQTa7hgaROuOINRBHLC/y0OgbfTax5sSe7RJr4wS/AlsDHFGaIpUx6AEvUKhyDwCKJBQYCJHJJ8xFEfPYppExLYQY4COnUFoO9z28YYkHYM7qmMosZBlZmubQ7SNodjKMsdzJegVCkVPJVdChYmsSEGVBNBMchtDlIpJ+9Brdxi1SToGl4T+rW0MITVS7HcL+lu2TaSddOUK6IwKel3XXcAqoMowjIsyeS2FQtHHyQGGWsihKXom+XGMzBocSmJjOKChbfDY+aqTdAxC2rOAkNoomQE66rj4d9eugM70iP4WYCNQ2FZGhUKh6FK8wGALOdgCgm13DAHs9QlxM4RW3klb3Ij3nLUMMokBGomIMxyIJg33rFICn+xJQ+NiyZig13W9AvgK8DPgtkxdJ5r+hefhcX/cKj0QnMTh2s5vuHvo0CFuueUWduzYgc/nY+TIkfziF7+gtDQSWnT+/Pn88Y9/5JZbbuG22yLNtiyLG2+8kU2bNuHz+SgrK+Pee+9l5MiRMdd44IEHuP/++3n99deZMGFCp+usUCjShAcYaCEHtmMtQ1THEN0ZuO9PPPZN5pnUWTI5on8QmAP0S5ZB1/UbgBsADMNoteF1U1NTuy4YCE7D7dqCEJGpj5ReAsHj21VOMkLx6EOhiufNm8f8+fO5//77gdh49DfffDM+ny8mquXMmTM555xz0DSNhQsXMmfOHAzDCB9ft24dq1evDodCSIamaWia1uc2CHe73X2qzX2tvZCFbR6UOLn2YQnNCYR6jsxI+zMi6HVdvwjYZxjGB7qun5Esn2EYjwCPOF9lvM9w9HZ5BXl34XYdOR69rXQLxKUFcbvWU9zvsqRnBc2J1Dd2bzz6lpYW7rjjDh5++GFmzpx5xHpYloVlWV3nY91D6FK/8h5AX2sv9J02i0XepCugDxxIXUdfXl6eUr5MjehPBS7Wdf3L2CaSQl3XlxiGcXWGrufgxbIGomn7EEIipcCyBpCJKE7pjkd/3333cemllzJ8+PC011WhUPQs0rYCOkUyIugNw/gR8CMAZ0T/g84K+VRG3ACa2Etp8clAC+DjcO0rWHJgZy6dkHTGo1+1ahVr167ljjvuSHc1Fb2M8y4tY9qxfm69qZ5s0mAoWtOVK6CzbFkAWHIQTS2XI6WgqeXyjAj5dMejf++999i2bRsnnXQSJ554Inv27OGqq65ixYoVaa+7omfz8SYvTy3N5+TzB3HzD13s3Z91f1FFNyCk7DGR5uTu3btjEurq6ujXL6ktNyma2EthwXeorV+QdkF/7733smrVqiNuHJKMJUuWsHDhQgzDiPHUiefEE0/kscceS+p1s337diorK7ngggvadf3eTl/Q3w6dFNG5appECJg4PsBZpzUzZLBFQZ6koMCiX4GkIN+iIF/anwssfFkQZ74v/MbxdKbNjo6+TVedrFwZa8lBVNc9l/ZyMxWPXtF3qa4RfPChl1VrvLy/JlZSW048+XUbPKzb0LYU97gjnUB+nqRfgUVBgbQ7g3z7c798i/yoziG244h0Hu6slAx9F/VztoNMxaOP59///neHrqHo2UgJ23e6eH+1lw/W2oJ98zYPAC6XZPKEWI8xr1eiCdC/1sCN1zSQkyupbxDU12vUNQgaGjTq6gX19YK6Bo2GBkFd+Jj9+cBBjR07NSePoClJoLB4cnIs+uVLp6MIzRycjiOuEynIdzqXfPuc8OcCSV6upJ3aTUUGUIJeocgQzS2w7mNboK9a62HVWi8HD9lBUYoKLaZN8fO1rzQxfaqfKZMD5OVJhk4qx+uxheM1V1p8+xv7GTjAiivZ7HCdgkFoaLQ7i/pG4XQUsZ1Dfb2gvjHSidQ32mmf7XbTEHWOP0k8+miEsDuD/KhZRUHM54j6KT/fonywhpS+mBlG6FiOD9Smax1DCXqFIk3sP6Cxam1IsHtZ97EnLAyPGhHk7NNbOH6Kn+lT/YwZFUw40p00wc/xU/zc+u16Jh5dwoED8UK+c7jdUFQoKSrseGcRosVPeFZRVy9oCHcOmj3zCHUcUZ8bGuzZx779bifdPseK2fYwsf3K7Y4V/qFOopVKKnqGUSDplx/5HDrH4+l083sVStArFB3AsqByqzss2D9Y42XHLvvv5PNKvjDJz7dm1XP81ADHT/FTWpKawF72XO8xRPq84PNalHRyD3spobnZ7izc3hJ27aqJ6QRi1VXRnYfG4WqNnVURNVZDY4qqKd8RbBQJOo6CJHaN/LyOqaa62o1WCXqFIgUaGgSr13lYtcY2nK7+yEttnf0PLys1mT7Vz6wrGjh+ip9jJgaywgOmqxACcnMlubmSsjIoKYpf3Z46pgmNTfEqqagOoyFq1tHo2DfqNRoaBXv2atTXu510jeYU487k51lxnYBtw4gxiMd1KB9v8lK51cNTz+ejf83itu9oDGqloksfStArFHFICbv3uMK69ffXeNlQ6cGyBEJIJowNcvGFtm79+Cl+Rgwzle64h+ByYatrCiT2Rrgdx++H+sZoo3esXSO64wippOqdDuXgIbfTudjHzQQbrweDdtoSQ+OZFwZx+dcauPWm+owIfCXoFX2eQAA2VHrCuvX313j5fK9tNM3LtTju2ADfv6Ge6VP9TP2Cn6LCHrP2RJFBvF4o8UpKijtnz5DSNsxHdw4XzBwQlUPQ0gKLjXw2b/Pw3GMHO1fxBGSloN+n7eWW/t/ht4cXMMBK34KpTIYpPvHEE/H5fPh8PgB+/OMfc8YZZ6St7ooIId/19x01zNr1nrDb4dAhQU6a5hhNj/MzYWxQ+ZQrOoUQkJsDuTkWA8og3msq5EZ7+SUN3Prt+ozUISsf4d8VPMj73n/zUMGD3FM7P23lZjpM8SOPPKJi0KcZKeGTT11h3fqqta19179+aSPHO2qY8sGZ05MqFNG07UabPnqNoJ9XeBcbPW2FKQY/ftZ6VyOF5In8xWzwrMfbRvTKowMT+Z/a7g1TrEgPzS3w0fqQCsb2XT90ONZ3/ZKLmjh+SsR3XaHoajLtRhtPrxH0qVLlil59KqlyfcZR5qi0XyfdYYoBvve97wEwffp05s6dS1FRUZprnX3sP6DF6NbXfewh4Bi5Ro0Mcs6XWsJG02S+6wpFV9PVbrS9RtCnMuLep+3ljEEn25v1Ym/aW+uq4TcHfp9WXT2kN0wxwNKlSxk6dCgtLS385Cc/4c477+Shhx5KZ5V7PaYJGze7w4J91Rovn0b5rh872c/1s22j6bQpgZR91xWKbKfXCPpU+F3Bg1hxOzmaWGnX1YfCFD/66KMdDlP89NNPx0S/DG0f6PP5uOaaazrUgWQb9Q2CNes8rFptC/bVH3morbM77AGO7/rsKxqYPtXP5KOV77pCkYysEvSrvR8QELE7tASEn9XeVWm7xr333stHH33E4sWLwx4yqbJkyRKWLFmCYRj07x9ZTtjY2EgwGKSwsBApJS+88AKTJk1KW517A1JC1R5XOIrjqrWeVr7rl8+wmDyhlulT/QyvUL7rCkWqZJWgf+nAsoyWn6kwxfv37+f666/HsixM02Ts2LHMn5++GUhPJBCAjzd5YmLDxPuu33JjPcdP8XPcsX4K+0knbnf7NoxXKBRZJugzTabCFI8YMYJlyzLbSXU3h6uduOuObn3NOg/Nzbbaq6Lc9l2fPtXP8VOV77pCkW7U30mRduJ9199f42XLJ7bvuttt+65fdVkj04+zvWGGDFJGU4UikyhBr+g0Id/16LjrId/14kKLaVP9zPivSNz13Fzlu65QdCVK0Cvazb79reOuR/uun3tGJO766KOU77pC0d0oQa84IqYZF3d9bWvf9RuusY2mync9/ezVBA8WePnA62bZgYburo6il6IEvSKG+gbB6o88fODo1ld/5KWu3h6Sh3zXr7migeOn+jnm6ABe5bueEXZpgh+6AjwzqAAJ+JUvqaITKEHfhwn5rr+/OhJCYONmd4zvemhP0+OnKN/1dBEA9roEu10au12CPZr9vtul8alL4xO3RosmAAlEbvjtRTkMMy2GmdJ+D1oMsiRKM6ZoCyXo+xAh3/XoEAKf77ONpvl5Ed/1UNz1wn7KaNpeTGC/FhHi8e97XBr7NIEV12P2syTlpsXnLkGyjY3eyHGzzxUr1r3SPm940Bb+FabldAL29wGqI1CQ5YI+3frNTMajb25u5u677+att94iJyeHadOm8ctf/rJT9Q35rod06/G+6ydPt42mync9NSRwUBOtBXjUiHyvSxCME+K5jhAvtyRfaglSbtrfh5hW+HOoT93nPLNP53uxEPijilqzt54moMqtscsl2OXS+MylsdNtv7/qcXMgriPwSUlF0J4FRDqByKygzJKoSVr2k5V/7b1Rf5Z06jczGY/+Zz/7GaC76E0AACAASURBVD6fj5UrVyKEYP/+/UnrYSbY8EZK2LbDxQdrIxtqxPuuXz0zEndd+a7HIoFqgSPAW4/C7ZegJe5Z8knJEFMyxLQ40R8R4pF3i2JJysJ0oCWZX9vCrfV+/jCgiMc0E4vIM5wLjAlajAlC/AYWAI0CPnNp7HI5nYFbC3cIH3rcHI7rCHIsRw0UUgkFI7OC4aakv+oIsoJeI+jvKvSxweM6Yh4/9kO+z2U/mjLqT3lZaV7S8yYGTO6pbWmzDpmKR9/Q0MCzzz7LqlWrEE6dBwyI3moslj17XfxjmUB6fHyy3RP2Xz9cHeu7funFkbjrfd13vS4sxEVYmO8Jj8jt9yYtVqS5pGSwI7CP9ZtcaFlxQlxSmiFBONCS/NZ08+39NTxY4GWVN7W/ap6EcUGLccHEHXl9dEfgjpoVuDRWezWq4+5BXnRHELRnBcOd7xXB9nViiu6j1wj6VNjs0agTgq6wGKYzHv2OHTvo378/DzzwAO+88w75+fnMmTOHE044IeG5UsKadYLnXrVVRiOGBTjvzEjc9b7mu94koEqLEtzOKDxasNfFCTAhJYMseyQ+IWhxVkvr0fgAS3LkoUXmCY3woe2BSCoUSJgQtNucqMhagS383VGzApfGLrfGv72t72OBJaPUQRGVUIXzvahvjy96DL1G0Kcy4o7Vb8aqbJ492JjW+qQzHr1pmnz66adMnjyZ//mf/2H16tV84xvf4O2336Zfv35JSrHbJoRkyCCLB35a3dGm9GhagM/DwtqkssDbSrVSnaBXK3ME9lFBi1MdIT7EigjxQabE0/XN6fEUSpgUtJiUZEZQHZoRuO1ZwGch9ZBb422fm4a4jqDIcmwDwVj1UGiWUKA6gi6h1wj6VIjWbyYT+Okg3fHoKyoqcLvdfO1rXwNslVBJSQmffPIJxx57bMJyXC5Jjk9mdEPhTBMkys2wlaeKPULfH6NTNqEwh2LLYogjsKf5W+vEB5uSnO5qVJZTLKE4aDE5QUcggcNC8JlbOJ2A5tgIBJ+4NVb43K3UY8VWxENomGkxQTMp8bnD39VOj+khqwR9iHiBn6p+MxUyEY++pKSEU045hTfffJMvfelLbNu2jQMHDoQ9cuIRAqZMlhg/3JvRDYU7g4U9w0pk2Ax9T+RmWGBFBPfkgBn+PMS0mFRYTO7Bg+rP30MRQImUlAQkXwgk7ggOaYKdIY+h8KxAY7Nb440cN83ChCh7WqmZ3GOoImiR2+oqikQIKXvMv0bu3r07JqGuru4Iqouup7KykrPOOotRo0aRk2OPGdsTj37ChAlUVFSE2xSKRw/w6aefcvvtt3P48GHcbjc//OEPOeussxKWtXXrdrZureSCCy5IU8vaR7Sb4Z6Y0XhEiH+ewM0wx5KUW1ZCz5SQMC88wuNox6Pv2r02u5O+1l4JmGWlfFhTzWcuwc4oj6GdbkGVS2s1Ox/grB1ItI5gqGn1ipldZ37n8vJySMEenpUj+kyRqXj0YMekf/bZZ1Mqy5VBC6EEapK4Ge4+gpuh13EzLDctTnDcDIeYsUK9v1SueorkCGAwAnfAZFqg9fHQLLGVx5Bb40Ovi3+43ATinstBcR5D0Ybjoaakr0TwUIK+F9ORBWH1CdwMd0fpyfe4NBoTuBkOMu3R+BcCJhc0tx6Nl6oVmIoMowGDLclgy2R6go7AxP5P7HK3XkfwgdfFiy43ZlRHIKRksBVZOzA8bh1BeRYZ7JWg74WYwMvC4jtxAa+aiBfiIjwCDwn12gRuhgMtSbkpGR+0ODOBm+HAHuBmqFC0hQsotyTlfpMTEywmC2J7cO0KryOwO4PPXBr/9rr5myvWZqQ5s9ToBWUVwcg6gsGm7DUCtLfUUxHFHpfGWiFj1CeTBxW0WvUItjGr3JSMCFqc3GI6OvKI18qgPjR9VfRt3ECFKakwTU5O0BEEgD2u1h5Du1waK31uPtdEzCJMl7QHSMPMxIbiwe0YIGU6HLUS9L0QCVhxyu6vNAdbGTiHKDdDhSJlPMBwUzLcNEkUXsIPVEV5DEVmBoIVPjef58UOtDxSMtSMX0cQmRkMsiT7NcH/uoI8luFw1ErQ90IE4IrzTvlFTXO31EWh6Ct4gaNMyVGmCf7WHUEzUOXS+MwdpR5yOoLlOe64NSG2asj+G1sxM4VMoAR9L2SIaTFFCnZJmZEFYQqFov3kAKNNi9EmJJoRNAm7I9jp0vifIh87XFqXhGuBDAl6XddzgDcBn3ONZw3D+EkmrhXNeZeWMe1YP7feVM+gDCwkylSY4l27dvHNb34znLe2tpb6+no+/vjjhPVwARdIjTv21qd9QZhCocgMuTIUedRi8gEzaTjqTJApj7gW4CzDMI4FpgAX6Lp+UoauFebjTV6eWprPyecP4kf3FLJ3f3qbFwpT/NZbb7F8+XJGjBjB/Pnzw8ejwxSvXLmShx9+OOb8mTNnsmLFCpYvX87555/PnDlzABg2bBivvfZa+HX++eeHwyEcidAKYLWXqELRuwj9d9/dW8+1lkaOlHgzuHg1I0NBwzAkEArA4nFenWrFXT8vZENl216t/oDdNS428lls5DOwzKSi3Dzi3qYTxwe450e1bZadqTDFMfX3+3n++ed54okn2qyPQqHo3XQ0HHV7ydicX9d1F/ABMAZ42DCMfyfIcwNwA4BhGJSVlcUcb2pq6vD1pbQF/t79LhqbNCYfnWCFRSdIZ5jiaJYtW8bgwYM55phjkp6raRqaprW6X9mO2+3uU23ua+2FvtvmiSWlPBJKKEt/BJ+Mx7rRdb0YeB642TCM9UfI2ulYN0MnlYc/ez0STSMc3THdwb/uuOMOPv/8c/785z+3O4LlggUL+L//+z+eeeaZcATLELNmzeKMM87guuuuS3r+9u3bqazsvlg33UVfi/3S19oLqs3tJdVYNxlftW4YRjXwL6BLpJLXY4fvvfLSBt59dS/z/6c27UI+FKZ4wYIFHQ5TvHjx4lZC/vPPP+fdd9/lkksuSWd1FQpFHydTXjcDgIBhGNW6rucC5wC/yMS1opk0wd5hKRMj+BCZCFMcwjAMzj77bEpKStJVXYVCociYjn4I8Jijp9cAwzCMlzJ0rTDLnsvslK+yspKHHnqIUaNGcfHFFwPtC1M8d+5cKioquOKKK4DYMMVgC/p58+ZlpvIKhaLPkimvm4+AqZkouzvJZJhigJUrV3aobIVCoTgSKrKsQqFQZDlK0PdCTEye0BazX9vX3VVRKBS9ACXoeyHV2mE+Yxe/6ncvQYLdXR2FQtHDUUFSeiFNogkpJM/lP81z+U9TYBVQaBVRLIsptIoosortlyyiKPy9iCJZHPlsFdFPFqKpvl6hyHqUoO+FCGd9hEu6mOSfzLTAdKq1amq1GmpEDZ+4t1Kj1VCtVeMXLcnLkYLCuM6gUBZRHPpsFVMsQ5+ddKczyZf54XooFIqejRL0vZBQFGtTmGz2VvLI4UcZYA1MmLeZJmq0GudVTY1w3rVqOy38vYZarZoq7TNqRA21Wg1BkVwt5JZue/bgdAyF4ZlC/EyiOGqmYb9ySP8Sb4VCkRwl6Hs5JhYPFTzIPbXzEx7PIZccK5dB1uB2lSuRNIgGZ5ZQHZkxaDVUi2pqnc6hWqumVtRwSDvIdvc2u8MQNUiRPLSGT+bEqZSiPkepncKzCKuIQllMEUXtaoNCobBpt6DXdV040Sl7JOJtL+5ZpYgWgfRJgosPIk/1p6XsTMWjB3jttdf41a9+hZQSKSW33XYbX/7yl9usU0D4We1dlZb2RSMQFMgCCswCyhnarnMtLOpEbdxMojryOWYmUc0e1242ujdQq9VQr9Ufsez8wfmt7BHFVnFYBWXPLhLbI1xqi3NFH6UjI/oHgP9Od0XSgXjbi/uaEkSLrTsWLQL3NSUEHzuUFmEfikcfClU8b9485s+fz/333w/ExqO/+eab8fl8MVEtZ86cyTnnnIOmaSxcuJA5c+ZgGAZSSm655RaWLl3KhAkT2LBhA1/72te44IILEsbSqTCH8UPzx1ywu2cGNdPQbEFrFifaaOeIBAhQq9VSK6JmDE7HECjws6d5t9NR2Md3uLeHO5JmLfl2ikIK+snCpOqlWNtErD2iQBYoe4SiV9NuQW8YRrcIedddhYgNR45HL97zImTsH1I0abgvL0WelFzQy4kBzHu6Nx69EIK6ujrA3mFq4MCB7Q6Ylg148FBqlVJKaatOoiyvjAO1ycNctNAcNYuooUYcTjqLqNFq2OPe7dgmagiI5GGsXdIVY6gO2SNibQ/FziwiooYqtoo7bI/Yp+1llvtyHtAeSmp/UShSpU1Br+v6XwzDuM75LIA/GYbxrYzXrAPEC/no9HTrmtIZj14IwR/+8AeuvfZa8vLyaGho4LHHHktzjbMfHzkMtHIYaA1q13kSSaNopEbYs4fqkHFahD5H0u20w+xwb6dWq6ZW1GKJ5AH0vNIXp1KKs0fIiHop+vtvCh7gPfHOEe0vCkWqpDKiHxX6YBiG1HV9dAbrk5RURtyeUUPCaptopE8SfPZgWutz5513kp+fz7XXXtvucxcsWMCWLVt45plnAAgGg/zud79j4cKFTJ8+nffff5+bbrqJf/3rX+Tn56e13orWCAT5Mp98mU+51X57RL2oSzCLiPZyqqFGs9P3ap9T6d5EjVZDvVbXZvlP5C+iVqvh2MAUxgXGMy44gTJrgFIlKdpFKoL+gK7r3wLeAU4G0isx00hw8UFbR98UUXnIXIvgY4fSep1QPPpHH320w/Hon3766XA8+o8//pi9e/cyffp0AKZPn05eXh5btmxhypQpaa27Ir1oaBTKIgrNIoa10x4RJEitVttqJvF43mOs8X6AKewCl+W8zN/z/hY+r8QsYWxwPOMCExgXtIX/uMA4CqXySlIkJhVBfw32dn/fBSqB2RmtUSeQp/oJPnYo1usmTYbYEJmIRz9kyBD27NnD1q1bGTNmDFu2bGH//v2MGDEibfVW9DzcuCmxSiihJGyP2KftZW7xbWEhL4VEIPjHvtc5qB1gs6eSze5KNns2sjTPoEGLbAw/JFhuC/3g+PDof0xgjFq3oEhJ0LcAn2M/iguwww+/n8lKdQZ5qp/AJ3syUnam4tEPHDiQn//859x4440IYU/JH3jggYSbkyiym98VPIgVZ1EysXg8bxH31M7nFP8Xw+kSyW5XFZvdm5wOwH5/17cSv7AHN5rUGG6OYHzg6JgOYGTwKNxqGU2fIZVfegmwArjSMIyHdF3/OfaOUX2OTMajnzFjBjNmzOho1RRZwmrvBwRE7Aw02VoJgWCoWcFQs4IzWyJ/ySBBPnXviO0A3JW8lvNK2HDslV5GBUczLjCB8cEJ4Q6g3Byq4h9lIakI+gGGYfxB13U947VRKPo4Lx1YFv7c0U2j3bgZHRzD6OAYLmy+KJzeTBPb3NvY7NnkqH82scr7H17Mez6cJ9/KZ2xwXET/73QEpVaZMgD3YlIR9Pt0Xb8cyNV1/RIgM3oRhUKRUXLIZVJwMpOCk2PS60StI/grwx3A8pxXMVxPhvMkMgCPD4ynnyzs6mYoOkAqgv6bwLeA1UAFcH1Ga6RQKLqUfrKQaYHpTAtMj0k/oB2g0r0x3AFs8WxSBuBeSiqCfoJhGL/TdX0g8A1gJLApk5VSKBTdT5lVRpn/NE71nxZOizYAV0apgOINwCPMkVHqH2UA7m5Suev3A2cD92AbZRdi+9MrFIo+RqoG4Er3JjZ7NiU0AMd7ACkDcOZJRdDn6bruA3yGYTyp6/q3M10phULRu2iPAfg/3vd4IW9pOE+0Afg4bRrl3gplAE4zqbpXvgD8RNf1HGB7ZqvUcQZNOQ/X/tYLd80BpexduyzBGe0jk2GKly9fzq9+9SuCwSDFxcX8+te/Zvjw4Z2us0LRnXTIAFxm54k3AI933pUBuP0IKXtMaHm5e/fumIS6ujr69euXcgHlQ6clPba76oMOVyzE4cOH2bhxY0yY4urq6pgwxe+//z4PPPAAN998M2effXY42JllWSxfvjwmTPHLL7+MYRhUV1fzxS9+kRdeeIHRo0fz3HPPsXTpUh5//PGE9di+fTuVlZVccEHPDFOcKTrqbthb6WvtlUhkmcW7te/EGIA3uyuz2gDcmd+5vLwcaHva02ssI4V33Ydnw+YOn1962Q1JjwUmjqP2nh+0WUamwhTv2LGDAQMGMHq0HS/urLPO4vvf/z6HDh2ipKSkQ+1VKHobAsEABnFqnAHYwopbAawMwO0l6R3Qdb2/YRiHu7IyvYl0hikeNWoU+/btY+3atUyZMoXnn7cXsFRVVSlBr+jzaGhUmMOoMIdxVsu54fSQATjaBTSRAXh0cEyrDqCvGYCP1NX9XNf1/sAWYBnwjmEYyXeLzjCpjLiPpLo5+Owj6axOWsMUFxYWsmDBAu6++25aWlo488wzKSoqwu1WIxGFIhnRBuAvN/9XOD1kAK70RDqA5AbgiAdQNhuA29TR67o+FjgXOAVbF/RvYKlhGMkDt3SMHq+jD3HPPfewceNGHn300XZHsFy4cCFPPvkkTz/9dNKgZfv37+fEE09k/fr15OXltTqudPR9g77WXshsm6MNwCH3z83uTRx2RRQX0QbgSAygzBqAe4SO3jCMLdij+t/ruu4CTgSGAOkW9J3GHFCa1OsmXWQiTDHAvn37GDhwIJZlce+993L11VcnFPIALm0XQv6MgSXXARAITuJwbee9ihSKbCbRCmCJ5KB2wBH8qYWAHh+YwLjABEYHR/caA3BWed1kmsrKSs466yxGjRpFTk4O0L4wxRMmTKCioiLcplCYYoAf/OAHvP/++wQCAU4//XTuvvvu8DXi2fnpm2zc8GNmXfUJUnpparmS+sbs326ur41w+1p7oee0OZkBeJt76xENwOODRzMiOLJdBuAeMaJXRMhkmOL77ruvg7Wy8AdOQhN7sORgUvjNFQpFGyQzAAcIxIWATt0APNSsaKX/76pN4FPZHPwCwzBe0XV9DPDfwNOGYbyZsRopUsB+WKQEIYIU97sJAEsWYJpjCJpjMM2xzvsYTGskqk9XKDqPBw9jgmMZExzbygC81bM1pgNIZAAeFxzP2KgO4G+5z3XJJvCp/Pt/ALwC3AH8CfgtMP2IZyi6iBwOVr+Iph3G7dqKy7UVt2sLXs9KXL5nw7mk9GBaIwmaY52OYGy4Q4DEdgCFQpE6OeQyOXAMkwPHQFMkvVbUsNmzObz5iz36fxnD9UTM+c/lP83N9bdmbFSfiqDvp+v6cMA0DONdXdcb2jxDkVEsKw8QNLVcjmlNwrQgEPxiTB5BXVjwu1zbcLu24HZtwud5FSEiu1ib5lCC1pi4DmAsUpai1EAKRecolEUc75/O8f7WBuAfFf0/VuS8gSlMTKyMjupTEfQ/B34K/NSJdfNeRmqSACklUsrwPqoK+560+HOAYTQ23Zo8H/0ImlMJmlPjjvhxaTucDmALbtc2XK4teH1PIERkKGJZxRHBb0VUQZY1DPrQQhOFIt0IBBYWb+e8Gd4EPiD8GR3VpyLojwLmGoYRcomZm/ZaJCEnJ4fGxkby8/O76pI9GiklVVVVbN26A7d7NpbsyAPhxbTGYVrjIBCdbqFpe+wOQNsSVgX5vMvI1SI7DUmZQ9AcFbEBWKGZwFFAYi8hhUIRS7JN4DM1qk9F0H8C/FLX9SLg78CzhmEcSntNEuD1evH7/Wzbtg1Nix1FisZGtMOHwbIiiZqG1a8fwjQRjY1gWUivF1lQgMzLg148M5BShu9FY2NjBiJbaljWUPzWUOCMmCNCHAqP/N3aFlyurXjca/B5X0QI6dRPw7SGxxqDnZmAlEVprqtC0btpzybw6SBlP3pd10uAPwAXAK8CvzEMY2Ua69LKjz5EdXU1b7/9Nn5/7I1xb9yIb+VKtLo6rH79aPniFwkefTQAwu/HvXEjng8/xLV/P9LnIzBxIoEvfAGrrCyN1e5ahBD069ePGTNmUFNT0821aXI6gG1RM4FtuFyfIERLOJdpDWhlAzDNMVhyCO2xA/QUH+uuoq+1F1Sb20va/Oh1Xb8QuALojz2iv8Ep+AXg9CTnDAMWAYMBC3jEMIzfpFj3VhQXF/OVr3yl9YFLLgHauFFS4l21irxFi8h96SXE0qW0nHQSjbNm0XThhdDO1a09BY/H091VAHIJmpMJmpNpiUk3cWk7HWPwVmcmsJUc7wtoWqRzsmR+AnfQsZjWCKAntE+hyA5SUd0cA9xhGEbMSiFd14+0SXgQuN0wjNW6rvcDPtB1/TXDMDZ0oq4dQwj806fjnz6d2rvvJu/pp8lbsoT+3/0uhaWlNF55JY1XX405bFiXVy17cWFaR2FaR+EPnBuVLtHEfscIHOUO6n4Hl++5SC7pTugOijyx65uiUGQBqQQ1GwH8CCgArgGuNQzjz+25iK7rLwC/MwzjtSNkS6q6aYt2T30sC9+KFeQtXkzOa6+BlLSceSYNs2fTctZZ4HJ1qB5dSbZNcQX1Ue6gkZmAS9sR5w5aHjYAhxaE2e6gZWSbO2i2/capoNrcPlJV3aQi6JcD3wN+bxjGWbquv24YxtmpVkTX9ZHAm8BkwzBq447dgK0KwjCMafE6+FRxu90Egx2MoLxrF66//hXtr39FfP45csQIzOuuw/rGN2DQoI6V2QV0qs29CekHtgGVuLQtWOYGYBNQiSCypEPSHxgPTEAK+93+PhJEz++4E9FnfuMoVJvbh9frhTQJ+n8ahnGmrutvOIL+n4ZhnJlKJXRdLwBWAD8zDGNpG9m7bkSfiECAnFdfJX/RInxvv430eGi+8EIaZs/Gf9JJPc5jR418HHdQzRn5O7MAt2sLmha5L7Y76FFOKAh7FhBxB+3ZkQfVb9w36BHGWOANXdf/AJTruv4b4EjqlzC6rnuA54DHUxDy3Y/HQ/NFF9F80UW4tm4lf/Fi8p55htwXXyQwbhyNs2bReNllyEK1MXHPIModNPilmCNCHLYFvxaxA3jcH+HTXopyBxVY1vAY9U9ohbCUifcJUCh6Kym5V+q6Phk4GqgEdhqGUd1GfgE8BhwyDCP58s1YundEnwDR1ETOiy+Sv3gx3jVrsHJzabrkEhpnzyZwzDFpv157UCOfjtCEy7XdCQexFZczG3DHuYNaVlmMDSC0JsCyyulKO4D6jfsG3Tqi13X9GeAqwzD8hmGsB9bruj4ee1vBE9oo91RgFrBO1/W1TtodhmH8I5XK9xRkbi5Nl19O0+WX4/noI/IWLyZ36VLyn3gC/5QpNMyeTfPFFyNze7YKQBEiF9OciGlOTOAOugtXlCeQS9tKjvfvaFpkTGPJPEfwx60JsEai3EEVPZmkI3pd1y8FbgIuMwyjWtf184F5wGzDMDZloC49bkSfCFFTQ95zz5G3aBGeLVuwioponDmThlmzMMeM6ZI6gBr5dA0SIQ5E1gKEOgJtCy5X5Fm13UFHxISGDnUEkoIOX139xn2Dbve60XX9FOBX2CthTwGuaEtt0wl6haAPIyXe994jf9Eicl5+GREI0HLqqfYo//zzIcMLmtQfonsRNMTOAEIuodp2hIh4UJjmkKh4QJHFYZYcQKL/Z//C8/C4P26V3le2i+xJv3FX0d2qm3mABKqA24AFwG26rmMYxl0dqlU2IQT+k0/Gf/LJaPv3k/fkk+QtWULJjTdiDhpE45VX0vD1r2MNHdrdNVVkAEk+QfNYguaxcWqgAC7t06gOwJ4J5PieRhMRd1DLKmoVGdQ0xxAITsXt2oKIioMipZdA8Pgua5si+ziS181y5/114OEuqEuvxRowgPrvf5/6734X3xtvkL9oEQW/+Q0Fv/0tzeeeS+Ps2bScfjpoKrxv9uPBtOyonv7ABVHpEk3siQ0L4dqC1/MGLt/TkVzSS1xYUQAam76d+aorspYevTl4qvTE6Z5r1y7yliwh76mncB04QHDECBqvvprGK67AKinpdPk9sc2ZJlvbLER1jDuoz/siLq0qZumG7Q5a4YSGHh0THyibNonJ1t/4SHS7jr6LySpBH8bvJ+fll+2FWO+9h/R6abroIhpnz8Z//PEdXojVo9ucIfpKmzWxl9LikxGiBSm91DXMQ9P2R0JFu7Yl2CTGnkVE7ABjMK1h9La9gvvKbxxNT1kwpegMXi/NX/0qzV/9Ku7KSvIWLybv2WfJW7qUwNFH0zBrFk2XXoos6Lh3hiK7sOQgmlouJzdnCU0tV9Lsvzo+R9QmMVvD6iCv53VcvqfCuaT0YpqjCFqj46KEjkbtFdy3UCP6bkA0NJD7t7+Rt2gR3vXrsfLzaZoxg4bZswlOnJhSGb2tzemgL7VZE3sp7f99Dh5+qF07iQlRjUvbFhUdNBQc7lOEiGzSY5rlBK2xzrqA0VF7BCT2Buoq+tJvHEKpblKk1z4cUuJZs4b8RYvI/fvfEc3N+I8/nobZs2n6ylcgJ/nWfL22zZ2gr7U5ve1twaV9Ghcd1H5pojGcy7IKHRvA2Dg10Ai6QgHQ135jUII+ZbLh4RCHD5P3zDPkL16M+5NPMPv3p+nyy+2FWCNHtsqfDW1uL32tzV3T3nhvoJBb6DZc2t5ILumJ2iNgdNSisNGdWhQWT1/7jUEJ+pTJqodDSrwrV9oLsV59FWGaNH/pSzTOnk3zOeeQ++KL9Lv3Xly7d2OWl1M3dy5NM2Z0d627hKz6nVOgu9srRK2jBtribBeZZI8Aa3BcWIjRzqKwQbRXDdTdbe4OlDG2LyIE/tNOw3/aaWiff07ek0+Sv2QJJdddh1VUhGhoQDixq91VVRTNmQPQZ4S9ouuQspCgOZWgOTXuiN/ZKnJLjC0gx/cMmqgP57JkQYz6R20V2X2oEX1vIBgkZ/lyir/zHbSWllaHzQED2Pfuu1kfXC3rf+c4el97JZrYG3YBjTUGfx7JFY4NFFH/hNxDS0tH9bI2dx41olfYuN00X3ABLT428gAAEa5JREFUIskOXK79+xk8bhzBMWMITJ5M4Jhj7NekSSp+vqILEVhyMFZwMIHgaXFH6h31zxbHCLzNWRn8ekxsIGkNobjfUbHGYGt0l4eIzjaUoO9FmOXluKuqWqeXltI4ezae9evxvfMOeUsj+7wER46MFf6TJ2OVlnZltRUKJAXh2ECxBBw1kD36z8/7DCHWkeN9Hk2L7DwaGyJ6TFScoKMAb5e2pTeiBH0vom7uXIrmzEFriloVmZtL7d13x+jotf378axfj2fdOvv10UfkvvRS+HiwvDws9EOdgDV4cI/bLlHRF/BgWqMxrdH4A+eTV1DG4doD2Gqg/VF2AFsV5HG/R44vMpCR0oVpDY81BjsLxKQs7r5m9TCUoO9FhIR5W1431oABtJx5Ji1nRrb2FdXVeD7+2Bb8TieQs2wZwrHRmGVlrYS/OXy4Ev6KbkJgyYFYwYEEgqfGHWnA5fokbo+ArXg9K2KifprWgNgFYZY9G7DVQH0rwKAyxvZS0tFm0dCAe8MGPOvX43VG/+7Nm8NePVZREYFJk2LUPsFRo8DlSkcT2k1f+537Wnuhs20OOjuFRRmDNdsmoGk14VxS5jouoInUQMkXKWYKZYxVZBSZn09g+nQC06cTXhvZ3IynsjKi+lm/nvxHH0U43j5Wbi7BSZPwRwv/ceMyvtGKQtE2bkzrKEzrKPwxkZ4lQhyM3SlM24bHvYoc398iuaTmqIGiFoRZ9roAKTsfcbY7UYJeEUtODoFjjyVwbJTRLBDAvXVrRO2zfj15hoG2cCEA0uslMGFCRPVzzDEEJkyALHf3VPQWBFKWEQiWEQieFHesKdYVVAsFiFsZt2F8adTof3Q4RLRlVdAb1EBKddNL6fY2Wxau7dsjah9nBqBV2ztNSpeL4Nixrd09OxGls9vb3MX0tfZCT2qziaZ9FokNpIVCRG+J2TBeyhyC5ignHlD04rBRQPKBTrq2jFSqG0Vm0TTM0aMxR4+m+atftdOkxFVVFfH2Wb8e31tvkffss+HTgqNGhYW/3zH8yjRsxKJQpBcXljUCvzUCAufEHBHiUEQN5ISJ9rjX4tP+jhD2wNneKGZYko1iSggEp3XplpFK0CvShxCYFRWYFRU0X3hhOFnbuzdG5+9Zs4bcF18MHw9WVIRH/KHRvzVoUHe0QKFoE1tQn0AgeELckSZcru3OPgGR2EBezzsI0RzOZW8UMwIw487XaGy6NSN1VoJekXGsQYNoGTSIlrPPDqeJw4fD+n7PunV4160j55VXIu6eAwdG1D7OO2qhl6JHk4tpTsQ04/eUsNC0qvBGMaF1Abb4tYW9lF6aWi5v194D7UEJekW3IPv3DwdvCyHq6/Fs2BCr+lmxAmGa4XNKQ6P+yZPxT56MOWqU2nRd0cPRsKxh+K1hwFmR1KgtIzM5mgcl6BU9CFlQgP+EE/CfEDUlbmrCs2kTnvXr6bdlC2LVKvL/8pdw3B8rPz+i8nF0/sGxY5W7p6LHE7tlZOZG86AEvaKnk5tLYOpUAlOnkhfyyAgEcG/eHKP6yXvySbRGezWA9PkIHH10rMfP+PFH3LFLoegOGptuJTfnk4yO5kEJekVvxOMhOGkSwUmTaLr8cjvNNHFv3x6j9sn9+9/JX7IEAOl22+6eIcF/zDEEJk5E5ud3Y0MUfR1LDkJqr2PJzLqUKkGvyA5cLoJjxhAcM4amSy6x06TEtWtXrM7/jTfIMwz7sBC2u2fUKt/A5MnIYhUMS5FdKEGvyF6EwBw+HHP4cJq/8hU7TUrb3TNqkZf3P/8h72+RpfDB4cNjgrsFjjkGa8CAbmqEQtF5lKBX9C2EwBo8mJbBg2k599xwsnboUGxo53XryP3HP8LHzcGDW7l7muXlKrqnolegBL1CAVglJbScfjotp58eThO1ta3dPd94A2FZAJj9+7dS+5gjRyp3T0WPQwl6hSIJsrAQ/0kn4T8pEghLNDXh3rgxRvVT8MgjiIAdLtEqKIhV+0yeTHDMGHCrv5qi+1BPn0LRDmRuLoHjjiNw3HGRRL8/7O4Ziuuft2QJWrO97F3m5NjuntEeP+PGgc+X8Bq5S5eGN5cZmGRzGYWiPShBr1B0Fq+X4OTJBCdPpumKK+w008S9bVusu+fzz5O/aBEA0uMhOG5cbFz/iRPJeeWVmO0i3VVVFM2ZA6CEvaLDqDDFvRTV5l6IZeHauTNG7eNZtw7XoUMASE0DTQvv8BVNcOhQ9v3nP11d4y6n1//GHUDtMKVQZBOahjlyJObIkTT/13/ZaVKi7dkTVvsUPPBAwlNdVVWUXHUV5vDhBEeMsN8d11FZWNiFjVD0RpSgVyi6EyGwystpKS+n5bzzyH36adxVVa2yydxctMOH8a5dG97cJYTZv394vUBMJzBihO0CqgzBfR71BCgUPYi6uXNjdPRg79Nb88tfhnX0oqYG165duD/9FNfOneF3z7p15Lz8cozqR7pc9h4BUcI/NBMIDh9urwJWawGynowIel3X/wpcBOwzDGNyJq6hUGQjIWEe8roxE3jdyKIigkVFBCcn+GuZJq49e3Dt3BnTCbg//ZScV17BdfBgTHarsDAs+MOdQOh96FDwejPaXkXXkKkR/aPA74BFGSpfochammbMoGnGjI4Z6UIj+IoKOOWUVodFfb0t+HfuxPXpp/b7zp24KyvJef11REtkQ2ypaZjl5ZjDhoVVQtGdgVVSomYDvYSMCHrDMN7UdX1kJspWKBQdRxYUEJw4keDE+F2QAMtC27s3thNw3nPeeAPXvn2x2fPzY+0CI0ZgDhtmdwYVFSosdA+iW3X0uq7fANwAYBgGZWVlHSrH7XZ3+Nzeimpz9tMt7R04EP5/e/caI9VZx3H8OzvsdhGWLpTC3rrpWrzQlAINYCuG0osNamMbjf8IxaT2BTFWU2ONoqlBXpgQXxgxqRfSVtuUtvkHbU0IKbyoC41Fa6tEoGiq3LpcCsp9BZedGV+cs9udnYGyu5w5zLO/T3LC7Jwz5zxP2Pzm7HOe8z8zZpS8nQNy3d1k9u2D3bvJ7N1LZs8esvGS2byZzIDrCgCF1lYKHR39CwNfT51a9q+B0fZ/DJXpc2Lz6OMz+vVDGKPXPPohUJ/DV1X9LRSoOXq0eDio7/rA/v1kDx0q2jxfX192ptCEWbP4d0MDhbFjU+pI5WkevYhUh0yG/JQp5KdM4fzcuaXrz50j29VV+iWwbx91W7dS093dv2kz0cPhi2YK9Q0JtbeTb2pS4bghUtCLSPLq68lNm0Zu2rTSdYUCNceOkd23j4nHj/PfHTv6p4/Wvf462Zde6q8YCtGjInvb2qL7BMpMGy2MH1/BjlWHpKZXPg8sBCabWRewwt2fTOJYIlLlMhny11wTLZMnc+auu4rX9/SQPXCg5CJxdv9+6t54g5pTp4o2z02aVBT8A2cK5ZqbIZutYOeuDEnNulmcxH5FZBSqqyPX0UGuo6Ps6syJE2VnCtVt20Z2/XoyuVz/toXaWnKtreVnCrW3U7j66kr1qqI0dCMiVa3Q2Mj5xkbO33xz6creXrIHDxZfJI7/rV2/nuzx40Wb5xsb37tzuFw5idraCvXq8lLQi0i4xozpn93TU2Z15tSpovDvLyexcyf1Gzf2P1AG4nISLS1lvwR629spTJx4xd5ApqAXkVGrMGFC/7MESuRy1Bw+XHamUP2mTWQHTYnMNzQUfwkMGBLKtbWVPGimkg+YUdCLiJSTzZJvbaWntRVuu61kdaa7u6icRN+XwJi33y4tJ5HJkGtu7p8pxJkzjN20qf8vhqQfMKOgFxEZhsK4cfROn07v9OmlK/N5ao4cKTtT6KrOTrLvvlvykZqzZ2lYtUpBLyJSFWpqyDc10dPUBPPmlaxubmsjU6YqQXaY1QHetzmJ7FVERC4oF5UuuOT3R0pBLyJSYaeXLyc/qJ5PfuxYTi9fnsjxNHQjIlJhl/KAmctJQS8ikoIRPWBmiDR0IyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgFPQiIoFT0IuIBE5BLyISuDFJ7djMFgGrgSzwhLuvSupYIiJyYYmc0ZtZFngc+BRwI7DYzG5M4lgiInJxSQ3dzAP+6e673b0HeAG4L6FjiYjIRSQ1dNMKvDPg5y7gY4M3MrNlwDIAd6elpWXYBxzJZ6uV+hy+0dZfUJ+TkNQZfabMe4XBb7j7Gnef4+5z4s8MazGzN0fy+Wpc1Ofwl9HWX/V52Mv7Sirou4DrBvzcBhxM6FgiInIRSQ3d/Bn4kJl1AAeALwJLEjqWiIhcRCJn9O7eC3wN2Ajsit7ynUkcK7YmwX1fqdTn8I22/oL6nIhMoVAydC4iIgHRnbEiIoFT0IuIBC6xEgiVYGZPAfcCR9z9prTbkzQzuw54BmgC8sAad1+dbquSZWb1wBbgKqLf13XuviLdVlVGfIf5G8ABd7837fYkzcz2AqeBHNAbT7sOmpk1Ak8ANxFNQX/I3bde7uNU+xn9r4FFaTeignqBR919OnAr8PAoKC3xP+BOd58JzAIWmdmtKbepUh4hmswwmtzh7rNGQ8jHVgMvu/tHgZkk9P9d1Wf07r7FzK5Pux2V4u6HgEPx69NmtovoLuS3Um1Ygty9AJyJf6yNl+BnEJhZG/AZ4IfAN1NujiTAzCYAC4AHAeJyMT1JHKuqg340i7/gZgN/SrkpiYuHMN4EpgGPu3vwfQZ+AnwbaEi7IRVUADaZWQH4pbuHPtXyg8BR4FdmNpPod/wRd+++3Aeq9qGbUcnMxgO/Ab7h7qfSbk/S3D3n7rOI7rCeZ2ZBX48xs77rTm+m3ZYKm+/utxBVvX3YzBak3aCEjQFuAX7u7rOBbmB5EgdS0FcZM6slCvm17v7btNtTSe5+Augk/Osy84HPxhcnXwDuNLNn021S8tz9YPzvEeBFoiq4IesCugb8hbqOKPgvOwV9FTGzDPAksMvdf5x2eyrBzK6NZyZgZmOBu4G/p9uqZLn7d929zd2vJyof8oq7L025WYkys3Fm1tD3GrgH2JFuq5Ll7oeBd8zsI/Fbd5HQ9baqHqM3s+eBhcBkM+sCVrj7k+m2KlHzgS8B281sW/ze99x9Q4ptSloz8HQ8Tl9DVE5jfcptkstvKvCimUGUS8+5+8vpNqkivg6sNbM6YDfw5SQOohIIIiKB09CNiEjgFPQiIoFT0IuIBE5BLyISOAW9iEjgqnp6pchgZnY7sILoJCYHfN/dXzOzk8BfiGrlPAS0AHe7+2Px534AdLp754B9fYCoFMGH48+tcfenR9C2RqICbaPqRjdJn87oJRhmNhlYCdzv7guB+4Gz8ert7n4H8ChRDZlLsQLYHO/rE8CeETaxEfjcCPchMmQ6o5eQfBp4tq/+j7ufBv46aJttRDVzLsXH3f078b4KRHXxMbOfEpVMPgU8QFRc7m53f8zMHow/2wk8BRwDOoD7gGXAJ82sE/iCux8dehdFhk5BLyFpAbYDmNkS4KvAH939WwO2WQD8Y7gHMLO5wDh3X2BmS4GvcOEKohOJSjYsBj5P9BDo9tDLGciVR0M3EpJDRGGPuz8HLAUmx+tmmNnvicJ/FXCO6KlVfep5b5jnYm4gGuuH6OlP0yiuj58Z8Potd88DB4iGbURSoTN6CckGYJ2ZubufpPj3u2+MHgAz6wFmm1nfyc4twI8G7e81M3vA3dfGBeXmE9UjuSdePwf4F3CSqCYPwAzgb/HrwV8A54HsSDooMhw6o5dgxGPeK4HfmdkrwM+InrFbbtv/EJV73gK8SvQs2mODNlsJ3B6Pqf8BuMHdXwfOmtmrwBLgF0TB3mJmG4BrL9LEw8AkM1tnZpOG2U2RIVNRMxGRwOmMXkQkcAp6EZHAKehFRAKnoBcRCZyCXkQkcAp6EZHAKehFRAL3f6WTmKqhdjYHAAAAAElFTkSuQmCC\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYEAAAEkCAYAAADJiI15AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOzdeZwUxd348U/1zO7sBQuygAIq3oooeIGIIqLiRTDxKExMHo8oJlGjER9Fo9EHE2PyqI/+jNGgxluhRI0EUfBGg6CgHN6ioHLIIcfex8z074/qWWZnZ3dnl5md3e3v29fKTHd1dfXsbH27q6urlOu6CCGE8Ccn2wUQQgiRPRIEhBDCxyQICCGEj0kQEEIIH5MgIIQQPiZBQAghfEyCQDtRSg1USrlKqaOzXZZUKKVuVkqtyHY5mpJYPqXU+UqpcDb2vQP5jPa+EwO89yl9R5RSq5RSN+zo/lMoX9o+U++4fp6OvETLlFJvKqUeTCWtBIH28x2wC7AQQCk1wPvDGJ3VUnUd04H+2S5EK83HfifWZrsgTeiMn6lopWC2C+AXrutGgO+zXY6uynXdKqAq2+VoDdd1a+nA34nO+JmK1uu0VwJKqaOVUv9RSpV5P0uVUid565JeViulViilbo577yqlLldKTVdKVSilvlVKnaWUKlZKPenl+7VS6sy4bWJ5/0wpNUcpVamU+kwpdaxSqr9SaraX1ydKqWOSbBcr03fev294y1c1cZwXKaVWJ8nnibhlFyil1iullPe+r1LqEaXURu8Y/qOUGhWXXimlHlBKfaWUqvKO8ValVKiZz3snL5+3lFI9mvvdeOlPV0p96H0+W5VS7ymlDolbv5dS6hml1GYvzTKl1DhvXU+l1BPe76NKKfW5UmpS7Pia2F+DpovYe6XUSKXUB94+3ldKHZaw3QlKqeVKqWqvDMem2nThfQe+9rZ9VSm1R9y6Rk1G3nfWVUoN9N43aA5qYh9DlFLzvX18oZTSKZQr1WM/Uik1z/uMtyilnlJK9UnMpw35Hud9lrHP9LgkZWzpO3qt970ZGLfsJqXUD819XnFpb1b2710rpb70yvovpVR3pdQZ3neqTCk1QylVHLfdoUqpl5RSG5RS5d7xnZyQ9yql1BSl1N3e93e9Uup2pVQgLs2JyjbJbFZKbfP+boYl5LOHUmqu9zl9q5S6VCU04yilgt6xrPTSfayUuiQhn92VUi97v8dvlVKXt/T5xOuUQcD7sGdim1YO9X5uBirbkN3vgdnAEGAW8BgwDXgFOAR4EXhMKdUrYbtbgPuAocCnwNPAo8AD3nafAk8ppXKa2O+h3r9nYpsEjmgi3WtAf6XUft7744GNwJi4NGOAN1zXdZVS+cAbQDfgFK8ss4FXlFIHeOkVsB74GXAAcCVwAXB9sgIopXYD3gHWAWNd193aRFlj6XcGnsF+JgcCI4C7gHDc+vlAT2A8cBBwIxD1sggBy4EfA4Own/X/AOc3t98kHODPwBXYz3sLYJRSQa8c/Wn4PfodcGeKee8C/AaYAByD/bz/pVTTgaq1vN/lbGArMBw4D/hvoE9z23laOvadgbnAamAY8CNgMPDsDubbD/t3tNhbPwm4O8lxtfQd/Sv29/K0VxEeA9wAXOC67mpSswv2MzvT289IYAZwEaCBU7G/u/jvfXfs3/9or/xzgJlKqX0T8r4c+/cwHPgt9m/ov+LWFwH3AkcCRwFfAi/H6hHve/I8UAyMwv4dnOZ9FvEeBM4ALsH+rU4B/qKU+mVCPr28Mo/3fg4lVa7rdrofbOXhAqObWD/QW390wvIVwM1x713grrj3vb1l9yTZ17iEvK+MS3OEt2xS3LJDvGWDk5UJGNDcMSSUeyXwG+/1k9gKsRQY5C1bDUz0Xp/vvQ8m5PF6/LEm2cfvgC/j3t/sfV4HA2uwX2gnxd9P7NgHNrH+FmwzSGErfud3A68kli/u/flAOOG9Cxwat+xIb9l+3vs/AauAQFyak700P2+mLDd7afaOW7avt+yEZOXzlh0d/7lg/2hdYEAT35GLgHKgZ1weg700NzRTvlSO/Rbve5Ibl2aIl2bUDnymfwS+if/+AePiP1NS/I5ig9064O/YK+e7W/F9uRl70lESt+xeIAL0TvheLWohr6XA7+PerwJmJqR5GXi6mTwcbMA813t/YpLv0E7YE9kHvfd7YE+M9k/I6w/AEu/1CV4++8at741txnswlc+qU94TcF13i3fJNEcp9TrwFvC867qftyG7pXH5blRKRYBlCfuqpfHZ19K417F23WVJlqVy1taSN7Bn+38HjsN+mQ8Hxnjl7Y/9AwIbkHYGtiaclIaIa99VSl2MrWQGAoXY+0OJV4a9gXnYL9PVrSjvMuwZ1EdKqVeAN4HnXNeNNYEdBsx3Xbci2cZKKQe4BjgHGyzzgBxs5dIaLg1/T2u8f/sCn2OvMt537f2amHdTzHuj67r1zT2u636hlNrk5flqK8vZlEHAp67rbonbz0dKqW0pbNvSsR8ILHDtfYlY3ku9vA/E/t7bku8g4D3XdeN7Fb2TkEdK31HXdTcopS7EXiUsxX4nWmON67qb4t5/D3zvuu7GhGXxTWC9sSdZY7wyBrHfv90T8l6SuC9spR3LZw/sWfsIL38HKIjLZxCwKeE7tFkpFV+HHY69al+U8DkFscEsPp8v4vLZmJBPszplEABwXfdipdTdwFhsVL1FKXWZ67r/YHuzQuKlebKmmboUlrk0riDrEtY3tSwdTW6vA3crpQ7EXkK/5y07Hvtl+C7uy+Rgm6J+kiSfSgCl1NnYQDIZG0BLgbOxZ8bxtmIr9NOVUne5KV6Gu64bUUqdgv1jPwF7OX6bUups13VnxZI1k8Uk4DrgKuADoAx7pXJaKvuPE02o4JP9ThLLsSPD6sZ/36Kk9v1rKb+2lqctx97S8lTyTVbmxPctfkfjHIv9jvfFNp1saKZsiZL9Hbf0t/0IsBs24KzEBqVpQG7CdrUJ7xPzmQVsAi7FXsXUYoNhbsI2zYnldxSNP5fYtjvyHWmwk07Jdd2PXNe903XdU4CHgIneqlik7xdLq+wNr47U3S32JQo0m8p6DXup+DtgnneW9Tr2D+QEtl8FACwC9gRKXdddkfAT64o4CvjQ++wWu677JfaKIFEdtj1yOfCWUirxbKhJrvWe67q3uq47ChtsLvBWLwZGKqUKm9h8FPCy67oPua77oRfg9kl1363wCXBE/A097JlbKnorpfaKvfHajHthKzewlVWfhLxTb6e1PgYGqbgb8d6JQHHTm7Qq7xFKqfpKSSk1xMv74x3Md3jCcSc+95DKdxSl1AnA1dg27m+AR9N5z6UJo4C/u64703Xd5djmqD1bk4HX7j8IuM113Tmu634CVNOwVeAT7Hdo77jtemKbFWMWe//uluRz+spb97GXzz5x+ZQk5NOsThkElFJ7K6X+omxvi92VUiOwN3g+gfqubf8BrlG2d8Vh2Bu+NdkrdSObsO29Y5VSO3tfgKRc112HvdQ+j+0V/hLs2eZ4GgaBJ7FnMC8qpcYq25touFLqOqXUj700nwMHKduDZy+l1BXYyj7ZvuuwN9EWYQNBi38QSqmjlFI3evvdTSl1PPbewidekr9jv3svKNvTZA+l1Djv6iFWvtHK9jLZVyn1R+wNuHT7O/YM8z6l1AHK9mKJXQ21dHZVCTyslDpMKXU4tlPAcrY3Bb2Bvfy/xfuMz8aeFbbGU9iroCe87/GRwD9JT7fNv2Fvgj6ilBqsbK+1x4F3XNd9ewfyvQ/bjDjV+0yPp/EVZovfUa9Z5nHgdtd1ZwM/xZ4RX7UDZUvF58C5SqmDlFJDsZ0bUjlRi7cFeyJ6sff9HeHlE/97exXbxPWYUuoILwA/jr2PYW9Y2pOffwIPKKV+4dV7Q5RSFyqlrvXyec3L5wml1DCvzE96+aSkUwYBoAJ7ZjgN+ALbo2E+cFlcmguxlex8L91UbFTvEFzXjWIrBY29XPywhU1ewzbfve5t72LPruuXecursVcIi4CHsZ/Pc9geILE29X9gv3APe/sdjr2R1lRZw9ieRO9gA0FLZ+XbsGfUL2B7RfwT+8W8xctvHfbssAzb3vsxtqKIneXd4h3bC9g2+p7A/2thn63muu4abBA9ChtU78b2QAF75tacddjv1LPYE44q4Cfe7wXv/tTF2PsaH2G/j0l7XzVTvkpsD5Ze2CbAJ4H/o3VNIk3lvR7blDoAeB/bfPERtuluR/Jdg+1pNIztn+lVCWma/Y56Z/uPYL+vN3rbrAR+BdzqBd1MuQBbL74H/At7w/f91mTg/W2fDeyFbU59BNs7bl1cGhfbHFYBvI39/F/CBqH4795E7O/899iTqNewJ4Nfx+XzY+zf3Dwvn9nYZtSUKO87K4QAlO2r/hZwsNccIES7UEp1w/aausF13Xvaa7+d9sawEOmglPo19nJ6LbYd9/+AhRIARKYppcZjm20+xd4vuAnbFGTasxydtTlIZJFS6npln6ZM+pPt8rXS7tjmws+x7dlv0/peSKIdefeZmvz+KaXOzXYZU1QA3I5tDp2FrY+P9prq2o00B4lWU0rthO2tlFR832ch0k3Zp5MHNpNkveu6Ze1UnE5PgoAQQviYNAcJIYSPSRAQQggfkyAghBA+JkFACCF8rNM9J6C1/id2aNoNxpjBLaTdDfs4fw/so9+TjTGzM19KIYToHDrjlcAj2DHfU3EDYIwxh2Af3/97pgolhBCdUae7EjDGzNNaD4xfprXeCzs0cm/swF4XG2M+wz59191LVkzHndBbCCGyojNeCSQzFbjcGHMYdujZ2Bn/zcDPtdarsYMqtWruTSGE6Oo6fRDQWhdhR4F8Rmu9BDtC5i7e6p8CjxhjBmBHY3xca93pj1kIIdKl0zUHJeEAW40xQ5Os+yXe/QNjzLta6zyghDQMxSuEEF1Bpz8rNsaUAiu11mcDaK2V1nqIt/pb7BSMaK0PwM4VujFpRkII4UOdbuwgrfXTwGjsGf167PCrr2NHgNwFO4/rNGPMFK31IOABoAh7k/gaY8zcbJRbCCE6ok4XBIQQQqRPp28OEkII0Xad7cawXLYIIUTbqGQLO1sQYO3atj3vVVJSwqZNm9Jcmo5Njtkf5Jj9YUeOuV+/fk2uk+YgIYToJJz1G+l+3Z8pGfvTtOXZ6a4EhBCNOes3UnTXg+QuXsamuU9nuzgizZz1Gwn8z//R99FnwI2iauvSlrcEASE6sUxWDr7iuvYnGoVIFKJRlOtCJAJRu1xF7fJYGuVG7bqkaVyIRlDRxDy3v06WR4P10SjOlq3kzX6d0MIPAVB14bQfugQBITqh2Jl/4fSZtjIJb68cQq++bSuUaKTZCqZBhRWJgOvaZbE0rouKq+AabBOJ2qATXwm6UVSkYSVoy7E9PZGEyjQal4dXCau4/ROJq0jjfoLKoXddbX3Zlbf/+so2bv/1ebpxZU84fhWNZvG3mV1dIgi4rktFRQXNPfNQVVVFOJz+KNoRKaUoLCzMdjFEplTX0Ev/muCKlUm7e/Q678qM7t51HHAUOI732vsJOKAc3EBsmQIngOulrV/vKAgE7PoG6bf/1KcJBnGduDTKW56XRzgStu8brLf7ceP2X19Wb/94ebve/gk4jY/FUbiBQH3+2/Nsej2OgxuXH47Cjds/AW99/fE3TlNfRi8PZ8tWCh57lvyXXrdBOQNXel0iCFRUVBAKhcjJycl2UTqEmpoa1q1bR7du3bJdFJFGwY8+o2DaTAqefwlnaynRgnyorQVUgyuBjbMebaGS214JN6y0GqdxEyo5lLI/WVZSUsIWn/QOqh1+KKUbNtH7/idwHjX2KkbuCTTkuq4EgDihUIjKykqef/55Ro0aRSgUynaRRBupzVspeP4lCqbNJOeTL3BDuVSdfBxVE35EzdHDcH7Y0qhyqDuk2Qn3RCcU7VNC5P/9kY2/+rntALBoadry7hJBQDQWCASoq6tj+fLlHH744dkujmiNSITQvIUUTHuBvLlvoWrrqD1of7b+6VqqTj8Jt2dxfdJMVg6i44n2KaH01slpzVOCQBcWCASoqanJdjFEigIrv6Ng+kwKnplF4PsNRHoWU/GLs6icMJ7wgfs2u20mKgfhD74NAvnPPUe3224jsHYtkX79KJs8maozzkhL3ps3b+aKK65g1apVhEIhBg4cyF/+8hd69epVn+bWW2/lH//4B1dccQVXXXVV/fJoNMoll1zCZ599RigUoqSkhNtuu42BAwcCcOGFF/Ltt9/iOA6FhYXccsstDB4sl/+dlaqsIm/WqxRMn0lowQe4jkPN6BFs+59JVJ84CkK52S6i6OJ8GQTyn3uO4muuwamqAiC4Zg3F11wDkJZAoJTi17/+NUcddRQAt9xyC7feeit33HEHALfffjtLly5l/vz5XH755YRCIS699NL67c8++2xOOOEEHMfh4Ycf5pprrsEYA8Bdd91F9+522uQ5c+YwadIk5syZs8NlFu3IdclZtIyC6S+QP/MVnIpKwgN3pXTypVSeNY7oLn2yXULhI10uCHT/wx/I+eSTZtPkLl6Mqq1tsMypqqLHpEkUPPVUk9vVDRpE6ZQpLZahZ8+e9QEA4NBDD+Wxxx4D4N577+Wrr77i8ccfJzc3l6eeeorLL7+cBx54gIsvvhjHcRg7dmz9tocddhgPPvjg9uPzAgBAaWkpjiMjf3QWzvqNFDw7m/xpL5Dz1TdEC/KpHncCleecTu2woR2i143wny4XBFKSEABaXL4DotEojz32WH3FHn/GD5CXl8cDDzzQ5PYPP/wwJ554YoNlV199NW+99Rau6/Lkk0+mvcwijWrryHvtbQqmzST0xnxUJELNEUPY8pvzqB53Am6RPM8hsqvLBYFUztT7DBtGcM2aRssj/fvzw4wZaS3PDTfcQGFhIRdccEGrt73vvvv48ssveeaZZxosv/322wGYMWMGf/zjH3n88cfTUlaRPsHPVlAwfSb5z84m8MMWIn1LKP/1L6g8+0dE9h6Y7eIJUS+jQUBrHQAWAWuMMeMS1oWAx4DDgB+ACcaYVZksT0zZ5MkN7gkARPPzKZuc3t4VU6ZMYeXKlTzyyCOtbrZ5+OGHef7555k+fTr5+flJ05x11llce+21bN68mZ122ikdRRY7QG0rI/+FORRMn0nuko9xc4JUnziKygnjqRk9AoJd7pxLdAGZ/lZeAXwKdE+y7pfAFmPM3lrrc4C/ABMyXB5g+83fTPUOArjttttYtmwZjz/+eKsf1nriiSd44oknMMbQs2fP+uUVFRVs3bqV/v37AzB37lx69OjRII1oZ9Eouf9ZZG/yvvQGqrqGuv33YtvNk6g64xSiveR3Izq2jAUBrfUA4DTgT8BVSZKcDtzsvZ4B/E1rrYwx7TJ7WNUZZ6S10o/3+eefc88997Dnnnsyfvx4AHbbbTceeuihFrctLy9n8uTJDBgwgHPOOQewTwDPmjWLyspKLrnkEqqqqnAchx49evDII4+g5IZiuwusXke++TcF02cSXL2OaPciKvWPqDzndOoOPkBu8opOI5NXAncB1wBNDWDTH/gOwBgT1lpvA3oBDQYE0VpPBCZ66SgpKWmUUVVcs05HsN9++7EmyT2HVBQVFbF69eqk63r37s2sWbNSysdxHBzHoaioKOln1lUFg8HMHW9VNc4LL+M8+gzqjf+gXJfomKMJ33od0fEnkZOfR3HLuaRdRo+5g5JjTmO+ac8R0FqPAzYYYxZrrUc3kSzZqVKjqwBjzFRgamx9sunV/DI6aGtEo1Gi0Sjl5eW+moYv7dMOui45yz6lYNoL5P/rZZzScsK79qPyqolUnT2OyK7etH0V5fanna3f6PDbyX2457bN9Ontn+GQZXrJ1mlueslMXQmMBMZrrU8F8oDuWusnjDE/j0uzGtgVWK21DgLFwOYMlUeIVnF+2EL+s7MpMDPJ+XQFbl6IqlPHUDlhPLVHHW5H1OwA7rqviPnvKe66v4hbbyzNdnFEJ5SRIGCMuQ64DsC7Erg6IQAAzATOA94FzgJeb6/7AUIkFQ4TevNdCqbPJO+Veai6MLWHHMjWP19nB24r7lhDc89/L5cnZxQSjSqeMIWUlTsUFboEAm5sKH4CzvbXjuMSDEIw4OI4ceuDEAwkrA/YZfV5BcAJuAQTXre4PugScGj4Oii3TFpr/UaHCb8Mcs9tTtqv+Nq1z5rWegqwyBgzE3gIeFxrvQJ7BXBOe5ZFiJjAV99QYLyB29ZvItKrJxUXTLADt+2/d7aLV8914bMvg8yak8+Lc/P48uscYi2okSi89GoeBQUukYgiEoFwBKIRRTgCkUjHqnWVchsFhkDAvg54gSgQsNMeBLzgFL8+Py9INNqrfn3iNvGBLHlQa8X6oBc0E8rrOE0fQ/3rQPJlTQXQ2JQNiTJ5xaeam42rA3LXrl3baGFZWZlMoJJg5cqVfPHFFxQVFTFy5MhsF6fdpNpuqsor7MBt014g9P5S3ECAmjEjqZzwI6qPPwZyO8b8FK4LH38W5MW5+cyam8/Xq4I4jsuhB9ey5KNcwuHtNUZeyOXdOeuTninGps8NRyASVkSiEA7b4BB7HY2qRsviA0rsdbJlDddD2NtH8mVxr8M2SEWjTa2HcERtX+/lpZxcamrqCMfWR+y/9fuKe93UMYS9/XQ0gYSrLKWgtEwBqtnfcXO8ewJJD1aeXhH+4brkvrfENvf8+xWcyirq9tqd0t//lsozTyXat3e2SwjYCnv5Jzm8ODePWXPzWfVtkEDA5ahhtUw8r5xTjq/mjnuLWPZxwxFGo1GaPFOMzcoYCAC58Sd+neoksJ4N9j+kJa9oLOBFE4JE3LL618mClreswXovEDW8GtsefJKuD0MkmrA+DO8syKW8Iqd+iuV0Xw1IEBBdnrNuAwUzZlEw/d8EV35LtLCAqh+fROWE06k77KAO0UDturD0oxzb1PNKHt+uthX/0cNruPSX5Zx8fDU79dx+9rd4aS61dQ3LXVunWLREhp5uLceB3PqPzU34N7vWb3R44pm+RKP2d11bp5j+fCFX/qo8bfcGfBkExpWM5dPcjxstP6D2QGZtmrvD+WdyPoGYO++8kzvuuIPXXnuN/ffff4fL3OXU1JL3yjw7Tv+b76KiUWqOPJSy315oB24rSD4UR3tyXfhweU59G//qtUGCQZdRI2q44pIyxo6pZqceySujuc9ub/LyY3dJv7jrviLchLo+3VcDvgwCh9YexoqcL6lT20cNzXFzObQ2PdMwZnI+AYDly5fzwQcf1A8fIbZTyz6h+/2Pkf/cbAJbthHZuQ/ll11Apf4RkT12zXbxiEZh8dIcXpxrK/613wfJCbqMGlnDpEvLGHtcNT2KO8ZZqMi+9rji63JB4Jbuf+DTnObnE6illjB1DZaFCfNJzkf8rNdZTW53QN0gbizN7nwCNTU1XH/99dx7772cffbZLZbFD9TWUvL/9TIF014gZ/lnBHNzqB57LJXnnE7NqOFeQ3j2RKOw6MNcZs3N48VX8vl+fYDcHJfRR1dz7RVlnDi6muLuUvGLxtrjiq/LBYFU5JJL70gfNgY24CoX5Sp6R3qTS/rbU9M9n8Dtt9/OmWeeyW677Zb2snYqkQih/7xP/rSZ5L/8BqqmlrpB+xK+82Y2nngM7k49sl083vsglxfn5jH7lXzWbwwQynU57phqxl1VzQmjq+lWJBW/yL4uFwRSOVMH2OCsZ3TfEdRQQ4gQMze9TO9o+qf1S+d8AosWLWLJkiVcf/316S5mpxH4ZjUF5t/km38TXLueaI/uVPzsJ1SeM57w4P0pKSnBzVL7eDgMCxfnMmtOPi+9msfGHwLk5UUZc0wN48ZWcfyxNRQVSsUvOpYuFwRS1Sfal7MqJvBU4eOcWTEhIwEg3fMJLFiwgK+++oojjzwSgHXr1nHuuedy5513cuyxx6a9/B2Fqqoib/Ybtk///EW4SlFz7JGU3ngl1WOPhbzWDdWdTuEwzH8/lxfn2or/h80B8vOjHD+qhnEnVTHm6BoKpeIXHZhvgwDAZeVX8mXOF1xefmXa887EfAKXXXYZl112Wf374cOH8+ijj3bN3kGuS86HH1EwbSb5M+fglFUQ3r0/pdf8hsqzTiPaf+esFa2uDua/F2LWnDxeei2PLVsDFORHOXF0NaeNrWbMMTXk50vFLzoHXweBPtG+PP3Ds2nPN1PzCfiBs/EHO3Db9JnkfPE10bzQ9snYhx+StYHbamvhnYUhXpybx8uv5bN1m0NRoa34x51UzbEjq8nPy0rRhNghvg4CmZKp+QQSLVy4sE376HDq6gi9MZ+CaS+Q99o7qHCE2sMOZuv/3kDVj07E7VaUlWLV1MLb74aYNSefuW/ksa3UoVtRlLFjqhl3YhWjRtZksyVKiLSQICCyJvjlSjsZ+4wXCWz8gUjvXlRcfK4duG2fPbJSpuoamPefELNeyeeVN/IoLXMo7h5l7HHVjDupimNG1BCSh3JFFyJBQLQrVVZO/sy5FEybSe4Hy3GDAapPOIbKCadTc9wIyGn/gduqquGt/+Qxa04er7yZR3mFQ4/uUU49sYrTxlZz9PCauGEFhOhaJAiIzItGyV3wgR24bdarONU11O27J9tuvJKqM08l2rtXy3mkWVWV4vW3bRv/q2/lUVHp0LNHhPEnVzHupGqOGlaTjXgkRLuTICAyxlnzPQXPzKLAzCT4zRqi3QqpOmscleeMp27oge0+cFtlpeK1t20b/2vzQlRVOfTaKcJPxlVx2tgqRhxeKxW/8B0JAiK9qmvIm/MWBWYmobcWoFyXmpFHUDbpV1SfehxufvsO3FZRoXj1rRCz5ubz+tshqqsdeveKcPZ4W/EfeXgtQfkrED6WqYnm84B5QMjbxwxjzE0Jac4H/heIdaP5mzHmQUSnFPzoMwqmzaTg+ZdwtpYS7r8z5VdeZAdu2619B7orK1e8+mYes+bm8eY7eVTXKPqURDjnJ1WMO6mKYYfWZns4ISE6jEydA9UAY4wx5VrrHOAdrfVLxpgFCemmG2MuS7J9u1i/0eE3V/fkvtu3pHXezkwOJT18+HBCoVD9A2i///3vGT16dNrK3hpq8/9FbnEAACAASURBVFYK/vUyBdNmkvPx57ihXKpOOY7KCadTe/QR7dqnv7RMMfeNPF55M8grb+xMTa1i5z4Rzj27gtPGVnPEIbUdZW54ITqUTE007wLl3tsc76fDPUJ5131FLFycm/aZejI9lPTUqVOz95RwJEJo3kLbp3/uW6jaOmoPPoCtf7qWqh+fjNuje7sVZes2W/HPmpvP2/ND1NYpBvRz+cWECsadVMVhQ+qk4heiBRlrDdVaB4DFwN7AvcaYZE82nam1HgV8AfzOGPNdknwmAhMBjDGUlJQ0yqSqqqr+9R/+3J1PPm/57l5tLXywLBfXVTw+vZCPPstpcVrZQfvVMeW6loNFJoeSTpXjODiOQ1FRUdLPrNVWrCLw+DM4j89Arfket1dPopf8F9HzzoaDDqAAKNjxvbRo8xaY+ZLDc7McXp+nqKtT7L6ry28uinLGuCgjhgWIRnMhAyPCdlTBYDA9v+NORI45jfmmPUePMSYCDNVa9wCe11oPNsZ8FJfk38DTxpgarfWvgEeBMUnymQpM9d66ycbTDofDrS7f6rXbG4VdYPWaAHsOjLQ6n5akeyhpoH78oCOOOILJkydTXFycdL/RaJTy8vI2j0GuKqvsZOzTZxJa8AGu41AzegSVN11F9Ymjtk/GnuFROzdvcXj5tTxenJvHOwtDhMOK3QaEuejn9gGuIYPr6jsaRaP+m2XLjzOLyTG3jjfRfFIZ7xdhjNmqtX4TOBn4KG55/CzRDwB/Scf+UjlTX7/RYcRJfXFdW3O4rmJbaYC//++mtN4bgPQOJQ3w3HPP0b9/f2pqarjpppu44YYbuOeee9JXYNclZ9EyCsxM8l+Yi1NRSXiP3Si97jIqzzyN6C7pH201mU0/OLz0Wh4vzs1n/nu5RCKKgbuG+dX55Zw2tpqDBtV1hKmBs6bRFKne33i6pkgV/pGp3kG9gTovAOQDJ5BQyWutdzHGrPPejgc+zURZkmmPeTsh/UNJA/VTSoZCIc4777w2BZdknPUbKXh2NvnTXiDnq2+IFuRTNX4sVRN+RO0RQ9ulT/+Gjdsr/nffzyUaVeyxe5jf/LKccWOrOHD/sK8r/niZniJVdAztEewzdSWwC/Cod1/AAYwxZpbWegqwyBgzE/it1no8EAY2A+dnqCyNtMe8nZkYSrqyspJwOEz37t1xXZcXXniBAw88sO2FrK0j77W3KZg2k9Ab81GRCDXDhrLl0vPtZOyFmW/lX7/RYbY37eKCRfYezd571vHbieWcNraKA/aVij+Zy8qv5JnC6Q2WuUTpEe3J1ML7UCgUeP/Hex/7IFWD98pNvjZp2th/7vb3NEqpkr6nwXa0nDZ+ndtwTbEqpjRU1qi8jcrgJsmrqbSxUrmNP4fWlq/ZtAnli+032T73Ce/LipwvqFPbp8NNd7BXrtvhOu00x127dm2jhWVlZXTr1i0LxUnu888/Z8yYMey5557k5dnxhVszlPT+++/PgAED6o8pNpT0N998w8UXX0w0GiUSibDPPvtwyy230Ldv30b5rFy5ki+++IKioiJGjhzZYF3wsxV24LZnZxP4YQuRviVUnv0j26d/r93T8Ak0b916h9mv5DNrTh7vf2gr/v32ruO0sbaNf7+9W3+PJ6YrtxV/76xjYehdFuS+y8LQfL4Jrsp2kUQW5Ll5vLn+3VZNhOXdE0h6OiVBoIuKBYFukSgnv72E0HtLqDjvbAqmzyR3yce4OUGqTzyWynPGU3PskWT6sdk16xxenJvPi3Pz66+4Dti3jtPGVjFubDX77NX2ij9eVwoC65y1vBda0KjS7x4tZljNcAbVDeb+bn+jVtUScvN4ecPrlER743r/Ad6/Dd83WK9IPW3caxRJ1yVNG3unaDItTeYDrmqctrhHD7Zu3ZK8DPVVXcvla1DGZsqXrIzNlS/Z59BojyqVtC7P58/g/dBCIipCjpuLrvgpU0pvpTWaCwLywHxXFYngzH6d7u9+SOG6zaioS4/r/kzdAXuz7eZJVJ1xCtFePVvOZwd8tybAi6/kMWtOPh8usxX/gfvXcc1vSzltbBV775H+3lidWUuV/s8rzmN4zVHsHz6AALZ32w/OJp4ufIKzKiawW6QNV3Gd6hxwuxK3hE11XSPYt2RUzWhG9x1BhAgBnLTPhChBoIsKrNuA+mAZKhJGRe1f+saXnqDuoP0zepP3m+8C3hl/Hks+shX/QYNque7KUk4dW8Weu0vFH9N8pX8kv6g4n+E1I9gvrtJPdFn5laws+DojU6SKjiE2H/rThU9kZD50CQJdVZJmvrqDD8jIrlZ+Yyv+WXPzWP6JrfiHDq7l91fZM/7dd5WKH9JT6SfqE+3LS+HX2BT1x1mxX2Uy2EsQ6KqUIpOjpH21yqv45+Tz8Wf2obFDDq7lxv/exmknVrNrf6n4M1HpC3/KZLCXINBFRXbpgzv0QNyFS3Bzc1C1dS1v1IIVXwf59xzbj//TL2zFf/jQWm66xlb8/fv5u+Jf56xlYehdFua+y4LQu3wrlb7oBCQIdFWBANFTxlA69jgq3llC7qKlbcrm8xVBXpxrb+5+viIHpVyOOKSWKddt45QTqui3c3qfsO5MWqr0/0sqfdEJSBDo4tzibpTeOjn19C589mWQWXPszd0vv7YV/5GH1/LH67dyyonV7NzHnxW/VPqiK5IggB0yoeiuB8ldvIxNc5/e4fwyOZ9AdXU1N998M2+//TZ5eXkcdthh/PWvf92h8roufPxZ0Lu5m8/Xq4I4jq34Lzh3K6ccX532MZU6A6n0hR/4OgjEKv/C6f8GN5qWdnPI7HwCf/rTnwiFQrzzzjsopdi4cWObyui68NGnOcyaY8fjX/VtkEDA5ahhtUw8r5xTjq+mpJe/Kv61zhreCy2QSl/4SpcLAt3/cDs5n3zRfKLaOgKr1xLYYAcyVXHdKXudNbHJzeoG7UvplKtbLEOm5hOoqKhgxowZLFq0COX19e/du3fSMkQi8Ng0h3P19mcCXBeWfpTDi95YPd98Zyv+o4fXcOkvyzlpTDW9dvJPxd9UpV8c7cERNcOl0he+0OWCQCpyvvgaVVae/BnqNEvnfAKrVq2iZ8+e3HnnncyfP5/CwkKuueYahg0b1mi70jKH79Yo5ryWx879c+rb+FevDRIMuowaUcNvJ5Yxdkw1O/XopI+NtlKqlf7+4UE4yJRkwh+6XBBI5Uzd2bDJawaaCdGGzUA/zJjazJatl875BCKRCN988w2DBw/mxhtv5IMPPuD888/nP//5T6OxkyoqFa4L77yXy4yXepMTdBk1soZJl5Yx9rhqehR3/Yq/uUrfNu9cwJHemb5U+sKvulwQSEW0Twmlt06m/MqLmgwG6ZDu+QQGDBhAMBjkxz/+MWCbmXbaaSe+/vprhgwZ0mD7WAuXAkYOr+aBu7ZQ3L1rV/xrnTW86szh1eK5LAy9y7fBbwCp9IVoji+DQExiMGhrX/pkMjGfwE477cRRRx3FvHnzOPbYY/nqq6/YtGlTfc+hZFwUi5eEqKlRdNrRwpqw1llT33unQaXvxCr9C6XSF6IFMpR0BmRqPgGAb775hkmTJrFlyxaCwSDXXnstY8Y0mpqZee98y+//5wsqa3Zhc+kJ/PTMirTOmpYNTVb63pn+8NoRnFxwKn037uyrSr8rDZ+dKjnm1mn3oaS11nnAPCDk7WOGMeamhDQh4DHgMOAHYIIxZlUmytPe9ttvP9asWdOmbYuKili9enWT63fffXdmzJjRqjzTPWtae2mp0k92pl+SX8Im/FU5CLEjMtUcVAOMMcaUa61zgHe01i8ZYxbEpfklsMUYs7fW+hzsHMQTMlQe39m1f4Qbro5QVLSVkSMbXz11RG2p9IUQOyYjQcAY4wLl3tsc7yex3el04Gbv9Qzgb1pr5W0rfEAqfSGyL2M3hr1J5hcDewP3GmMWJiTpD3wHYIwJa623Ab2g4bW81noiMNFLR0lJSaN9VVdXp738nZ3jODiOQ1FRUdLPLBtW8x3vOPN4W83jHectVqmVAPRwe3K0ewy/CV/O0e6xHOgOxgk4kI/9aYVgMNhhjre9yDH7Q6aOOWNBwBgTAYZqrXsAz2utBxtjPopLkuwmRaOrAGPMVCDWed9NdmMkHA5TU1PT6l44XVV1dTVbt24lGo1SXl6etRtoawNr7Fl+sjP96iP5ee35Sc/0N7O5zfuUG4b+IMfcOt6N4aQy3kXUGLNVa/0mcDIQHwRWA7sCq7XWQaAY2vbXX1hYyMqVK3Fdt8n++I7jEI12/SERXNelrKyMr7/+mlAo1K6BsdlKX5p3hOiQMtU7qDdQ5wWAfOAE7I3feDOB84B3gbOA19t6P0ApRb9+/XjppZeoqakhkGRGrfz8fKqqqtqSfacUiUQoLCxk8ODBGdvH2sAaFuTOr2/X/y74LQA9oj0YVjOC8yp+yZE1I9g3vL9U+kJ0UJm6EtgFeNS7L+AAxhgzS2s9BVhkjJkJPAQ8rrVegb0COGdHdpiXl8epp57Kt99+m/QeQXFxMdu2bduRXXQqeXl5HHLIIZSXl7ecOEUtVfrnV1wklb4QnUyXeFgsFdKG2HotVfrDa0d0uEpffs/+IMfcOu3+sJjonORMXwj/kSDgY1LpCyEkCHQx40rG8mnux9sXeD3DDqg9kKlbHpZKXwjRgASBLubQ2sNYkfMldaq2fpnjOnwb/IZj+trJZ6TS7xryn3uObrfdRmDtWvr060fZ5MlUnXFGtoslOhkJAh1YLbWUOtsoVdvY5myj1Cllm7ONMu/9NmcbpaqUUmerXae2scXZTB21DfKJEuXwmmGMqh0tlX4Xkf/ccxRfcw2O1+05uGYNxddcAyCBQLSK9A7KIBeXClVBqbONbWprfSVeqrbZyt2ruOMr8VKntL7ir3Kaf64h5OZRHC2me7Q73d1i73UxnwY/ZkXOl0RVlKAbZELFz5hS+ud2Ours6tK9RlyXwHffkbNkCT3++79xknT/dQFycnADAcjJgUAANxiEYHD7v4EAxC+LpQkEcHNy7LrE7eLS1Ocfn2dsXZL9xN4nW1Zflvg8m9ouLk2vvn3ZtHWrXa/aY6LY7Im/4ou08YrPt72Dmmsfn7Vpbkp51FFnK2av4k5eicev2362XuqUElGRJvNWrqKb253u0e4UR4vp5nZnz/Be9a9jlXqxW0w3L02x28NW+tHuhMhLmu8GZz2j+46ghhqCBLm8/Hcpf2ai43A2bCBnyRJyly4lx/sJbLYP1Td36lZ+ySWoSATq6iASQYXD9t/4995P/TrvX6eiAiIRu87Lo0Ga2HaJabLwNH5sIIQGwSMuUNQHtfiAlhgcE9PEvW8QAOOXJQuELaRJGhxjaZoJoKE5cyi++WYc79mnTFzxdekgkKx9POgG6RPty7P5psFZt624S7c3s3jLK53KZveR6+baijpaTHe3OztFe7FHeE+6u93pHleJ24q7mOJoD7p7FXyR2y0jzTJ9on05q2ICTxc+wZkVE+gd7ZP2fYj0Utu2kbN0aX2Fn7tkCYF16wBwHYfwfvtRPXYsdUOGUDd0KD0vuohgkjkrIv37U3bdde1dfIhGtwePxECRLHi0lKaurn5dfACK/VuYl0fl1q0N0yQEufo8mwuEtbU4lZWNt0sMcrH9eOtVONz+n7HHqaqi2223pS0IdOnmoPozYlXTbLqiaLf6Sjy+4i6Oxs7At1fc3bzlsWaYvNYOc9lONjjrmdTnt9y54R5fBYHO0BykqqrIWb6cnCVL6iv+4MqV9evDAwdSO3RofYVfN3gwbkFBgzwS7wkARPPz2fbXv/rinkDWf8+uawNfkoDW2iDX5HZ1dRRff33ykTaVYl0zk08l8m1zUOyMeHrh04RVHQE3wOjqMVxeflWDJpgAjcca6uz6RPvyUvg1NkU7doXY5dXWkvPZZw0r/M8/r28+ieyyC7VDh1I5YQK1Q4ZQd/DBuD16tJhtrKLf0bZi0UZK2SacQADXG6QxE6fTRffem/yKr5lRQVurS18JQMOrgTw3jzfXv+ubM+Osny1lQVaPORIh+NVXDdvxP/kEVWOvRCM9e9oz+yFDbIU/ZAjRvn13eLfye+660nXF59srAZD2cZEhcT116iv8ZcvsjVUgWlhI3cEHU3HBBbbCHzqUyK67dvmeLCK92uOKr8sHAYDLyq9kZcHXXF5+ZbaLIjqpRj11liwhsGULAG5uLnUHHkjV2WfXV/jhvfayzQVC7KCqM86g6owzMnb144sgIO3jojXU1q3kLltm2/GXLUveU+ekk7bfuN1/f8jNzXKphWibVgcBmQxedCWqspKcjz7afuN2yRKCq1bVrw8PHEjN8OHN9tQRojNry5XAnYA8fSQ6n1R76pxzTqt66gjRmbU6CBhjWgwAWutdgceAnYEoMNUYc3dCmtHAC0Csg/RzxpgprS2PEEml2FOn+qST0tpTR4jOpsUgoLV+yBjzS++1Ah4wxlzUwmZhYJIx5gOtdTdgsdb6FWPMJwnp3jbGjGtTyYWIieupE/jiC3otWCA9dYRIUSpXAnvGXhhjXK31Xi1tYIxZB6zzXpdprT8F+gOJQUCIVnPWr28wxEJiTx0lPXWESFkqQWCT1voiYD4wAvihNTvQWg8EDgEWJlk9Qmu9FFgLXG2M+TgxgdZ6IjARwBhDSUlJa3ZfLxgMtnnbzqpLHPOWLajFi1GLF+MsWmRfe09Quo6DO2gQ7umnEz7sMNzDDycwdCg4DjlATnZL3m66xO+5leSY05hvCmnOw1bClwKfA/+VauZa6yLgWeBKY0xpwuoPgN2NMeVa61OBfwH7JOZhjJkKTPXeum3tJ+uXJwzjdbZjTqWnTu0RR1B30UVN9tQpcZxOdczp0Nl+z+kgx9w6/ZoZZiKVIFADfA9EgPuwZ/Xvt7SR1joHGwCeNMY8l7g+PigYY2Zrrf+utS4xxvjrN+tX0lNHiA4hlSDwBPAW8FNjzD1a6z8DJzS3gXcD+SHgU2PMnU2k2RlY791nGAY4tLKpSXQS0lNHiA4rlSDQ2xhzv9ZatyLfkcAvgOVa6yXesuuB3QCMMfcDZwG/1lqHgSrgHHkIrQuQMXWE6FRSCQIbtNYTgHyt9U/wev00xxjzDk2MWBeX5m/A31Iqpeiw6nvqeM06OUuXypg6QnQiqQSBC4GLsDdyBwAXZ7REosNqMKZObPar778HZEwdITqrVILA/saYv2mt+wDnAwOBzzJZqHSKn6S5j0y8kbKUxtQ58kgZU0eITi6VIHAHcDwwBXuD+GHs8wIdXuKEDJmYpLlLkJ46QvhWKkGgQGsdAkLGmKe11r/KdKHSpdtttzWYkQfsJM3dp0whvNdeuDk5kJuLm5PT4HX9v4FAp7xh2ezVTyRCcMWKhk/cfvwxqrbWrpaeOkL4SqpdRF8AbtJa57F9wLcOL9DEVJSBjRvpfeqpLW7vKpU8SOTk4Ca8rl8We52wvNGy+ACUbFni+oQAlVieWLBKdvXTY9Ik8p9/3jbxLF/euKfOhRdKTx0hfKpLzzHcZ9iw5JM0l5Sw9Y47UHV1UFuLqqtr8Jq6OlRNzfbX8ctra5teFnsdyzNuef2ycDidn0e9WGBQVVWoJL9TF6g75JDt89t20Z468iSpP8gxt45v5xgumzw56STNpTfdRM0JzT7vljnRaOPAEHsdCywJ/zYKUMlee9sXTp2afL9KsWnWrPY9ViFEh9dkENBa9zTGbGnPwqRbe0zS3GqOA6EQbigE2DP0dMp78cXkVz/NjB0ihPCv5q4E/qy17gl8CcwF5htjMtOWkUGZnqS5o2nq6qds8uQslkoI0VE1GQSMMb8C0FrvA5wITPTGBFqInQVsdfsUUbRGh7z6EUJ0WC3eEzDGfIm9Gvi71joADAd2ASQIdFB+u/oRQrRdq24MG2Mi2MllhBBCdAFOtgsghBAie1oMAlrrk71/99Za36u1HpX5YgkhhGgPqVwJXO39ez326eE7MlccIYQQ7SmVewLdtNa7ARFjzLta64qWNtBa7wo8BuwMRIGpxpi7E9Io4G7gVKASON8Y80FrD0AIIUTbpXIl8Gfgj8D/emMHLUhhmzAwyRhzAHAkcKnWelBCmlOwE8vvg53I/r6USy2EECItUrkS2AOYbIyJDdrT4lNHxph1eDOQGWPKtNafAv2BT+KSnQ485k0puUBr3UNrvYu3rRBCiHaQShD4Gvir1roY+DcwwxizOdUdaK0HAodgHzKL1x/4Lu79am+ZBAEhhGgnqTws9gLwgtZ6J+B+4Hat9Rzgbm8u4SZprYuAZ4ErjTGlCauTjWjXaCgdrfVEbHMRxhhKSkpaKnJSwWCwzdt2VnLM/iDH7A+ZOuYWg4DW+hTgHKAn9kpgIrYCfwFosruo1joHGwCeNMY8lyTJamDXuPcDgEbjRBtjpgKxoTHdtj4B68enZ+WY/UGO2R/SMJR0Uqk0Bx0EXG+MaTA0pda6yQnnvZ4/DwGfGmPubCLZTOAyrfU07FAU2+R+gBBCtK9UgsB04Eavaec84AJjzIPGmM+b2WYk8AtgudZ6ibfsemA3AGPM/cBsbPfQFdguohe07RCEEEK0VSpB4CHgMuDvxpiI1vqnwIPNbeDdK2h2jkKvV9ClqRZUCCFE+qXynEDAGPNZK7cRQgjRCaRSob+utb4f6Ke1vht4JcNlEkII0U5aDALGmFuAvwE3YpuG/p7pQgkhhGgfTQYBrfUzWutcAGPMR8aYZ4Aa7FSTQgghuoDmrgSmAbO11j0AtNYnAY8D/9UeBRNCCJF5TQYBY8yzwB+AF7XWfwB+B4xNuEkshBCiE2uuOegW4GRgDXAV8CFwldZ6SjuVTQghRIY195zAq96/rwH3tkNZhBBCtLMmg4Ax5q32LIgQQoj2Jw9+CSGEj0kQEEIIH5MgIIQQPiZBQAghfEyCgBBC+JgEASGE8DEJAkII4WOpTCrTalrrfwLjgA3GmMFJ1o/GzlG80lv0nDFGnkQWQoh2lpEgADyCHX76sWbSvG2MGZeh/QshhEhBRpqDjDHzgM2ZyFsIIUT6ZOpKIBUjtNZLgbXA1caYj5Ml0lpPBCYCGGMoKSlp086CwWCbt+2s5Jj9QY7ZHzJ1zNkKAh8AuxtjyrXWpwL/AvZJltAYMxWY6r11N23a1KYdlpSU0NZtOys5Zn+QY/aHHTnmfv36NbkuK72DjDGlxphy7/VsIEdr7a+wLoQQHUBWgoDWemettfJeD/PK8UM2yiKEEH6WqS6iTwOjgRKt9WrgJiAHwBhzP3AW8GutdRioAs4xxriZKIsQQoimZSQIGGN+2sL6v2G7kAohhMgieWJYCCF8TIKAEEL4mAQBIYTwMQkCQgjhYxIEhBDCxyQICCGEj0kQEEIIH5MgIIQQPiZBQAghfEyCgBBC+JgEASGE8DEJAkII4WMSBIQQwsckCAghhI9JEBBCCB+TICCEED6WqZnF/gmMAzYYYwYnWa+Au4FTgUrgfGPMB5koixBCiKZl6krgEeDkZtafAuzj/UwE7stQOYQQQjQjI0HAGDMP2NxMktOBx4wxrjFmAdBDa71LJsoihBCiaRlpDkpBf+C7uPervWXrEhNqrSdirxYwxlBSUtKmHQaDwTZv21nJMfuDHLM/ZOqYsxUEVJJlbrKExpipwNRYmk2bNrVphyUlJbR1285Kjtkf5Jj9YUeOuV+/fk2uy1bvoNXArnHvBwBrs1QWIYTwrWxdCcwELtNaTwOGA9uMMY2agoQQQmRWprqIPg2MBkq01quBm4AcAGPM/cBsbPfQFdguohdkohxCCCGal5EgYIz5aQvrXeDSTOxbCCFE6uSJYSGE8DEJAkII4WMSBIQQwsckCAghhI9JEBBCCB+TICCEED4mQUAIIXxMgoAQQviYBAEhhPAxCQJCCOFjEgSEEMLHJAgIIYSPSRAQQggfkyAghBA+JkFACCF8TIKAEEL4WMaml9RanwzcDQSAB40xtyWsPx/4X2CNt+hvxpgHM1UeIYQQjWVqeskAcC9wInZS+fe11jONMZ8kJJ1ujLksE2UQQgjRskw1Bw0DVhhjvjbG1ALTgNMztC8hhBBtlKnmoP7Ad3HvVwPDk6Q7U2s9CvgC+J0x5rvEBFrricBEAGMMJSUlbSpQMBhs87adlRyzP8gx+0OmjjlTQUAlWeYmvP838LQxpkZr/SvgUWBM4kbGmKnA1FgemzZtalOBSkpKaOu2nZUcsz/IMfvDjhxzv379mlyXqSCwGtg17v0AYG18AmPMD3FvHwD+kqGyCCGEaEKm7gm8D+yjtd5Da50LnAPMjE+gtd4l7u144NMMlUUIIUQTMnIlYIwJa60vA+Zgu4j+0xjzsdZ6CrDIGDMT+K3WejwQBjYD52eiLEIIIZqmXDexqb5Dc9euXdtyqiSkDdEf5Jj9QY65dbx7Asnu1coTw0II4WcSBIQQwsckCAghhI9JEBBCCB+TICCEED4mQUAIIXxMgoAQQviYBAEhhPAxCQJCCOFjEgSEEMLHJAgIIYSPSRAQQggfkyAghBA+JkFACCF8TIKAEEL4mAQBIYTwsUzNMYzW+mTgbuzMYg8aY25LWB8CHgMOA34AJhhjVmWqPEIIIRrLyJWA1joA3AucAgwCfqq1HpSQ7JfAFmPM3sD/IRPNCyFEu8tUc9AwYIUx5mtjTC0wDTg9Ic3pwKPe6xnA8VrrpNOfCSGEyIxMNQf1B76Le78aGN5UGm9i+m1AL6DBJJpa64nARC9dbK7MNtmRbTsrOWZ/kGP2h0wcc6auBJKd0SfOaJ9KGowxU40xhxtjDve2adOP1nrxjmzfGX/kmP3xI8fsj580HHNSmQoCq4Fd494PANY2lUZrHQSKgc0ZKo8QQogkMtUc9D6wj9Z6D2ANcA7ws4Q0M4HzgHeBs4DXjTGNrgSEEEJkTkauBIwxYeAyYA7wqV1kPtZaT9Faj/eSPQT00lqv/y0UewAABQFJREFUAK4CJmeiLHGmZjj/jkiO2R/kmP0hI8esXFdOvoUQwq/kiWEhhPAxCQJCCOFjGRs2oqPQWv8TGAdsMMYMznZ52oPWelfskBw7A1FgqjHm7uyWKrO01nnAPCCE/V7PMMbclN1SZZ73dP4iYI0xZly2y9MetNargDIgAoS97uNdlta6B/AgMBjbjf5CY8y76crfD1cCjwAnZ7sQ7SwMTDLGHAAcCVyaZNiOrqYGGGOMGQIMBU7WWh+Z5TK1hyuwnS/85jhjzNCuHgA8dwMvG2P2B4aQ5t93l78SMMbM01oPzHY52pMxZh2wzntdprX+FPuE9idZLVgGed2Ly723Od5Pl+71oLUeAJwG/Anbw050MVrr7sAo4HwAbxie2nTuo8sHAb/zAuAhwMIsFyXjvKaRxcDewL3GmK5+zHcB1wDdsl2QduYCc7XWLvAPY0xX7i66J7AReFhrPQT7/b7CGFORrh34oTnIt7TWRcCzwJXGmNJslyfTjDERY8xQ7BPqw7TWXfYekNY6dp9rcbbLkgUjjTGHYkcpvlRrPSrbBcqgIHAocJ8x5hCggjQ/UyVBoIvSWudgA8CTxpjnsl2e9mSM2Qq8Sde+FzQSGO/dJJ0GjNFaP5HdIrUPY8xa798NwPPYUYu7qtXA6rir2hnYoJA2EgS6IG9I7oeAT40xd2a7PO1Ba93b60WB1jofOAH4LLulyhxjzHXGmAHGmIHYYVleN8b8PMvFyjitdaHWulvsNTAW+Ci7pcocY8z3wHda6/28RceT5nt7Xf6egNb6aWA0UKK1Xg3cZIx5KLulyriRwC+A5VrrJd6y640xs7NYpkzbBXjUuy/gYIcqmZXlMon06ws8r7UGW389ZYx5ObtFyrjLgSe11rnA18AF6cxcho0QQggfk+YgIYTwMQkCQgjhYxIEhBDCxyQICCGEj0kQEEIIH+vyXUSFSEZrfSxwE/ZEKALcaIyZr7XeBnyAHXvoQqAfcIIx5gZvu5uBN40xb8blVYAdwmFfb7upxphHd6BsPbCD4fnqIT+RHXIlIHxHa10C/A/wY2PMaODHQJW3erkx5jhgEnZcnlTcBLzl5XU0sHIHi9gDOGMH8xAiJXIlIPzoVOCJ2HhKxpgy4MOENEuwYxCl4ihjzLVeXi52XgO01v8PO6x1KXAudiC/E4wxN2itz/e2fRP4J7AZ2AM4HZgInKi1fhM42xizsfWHKERqJAgIP+oHLAfQWv8M+A2wwBhzdVyaUcDnbd2B1voIoNAYM0pr/XPgVzQ9kmtP7DAXPwXOxE4ovpsfhoEQ2SfNQcKP1mEDAcaYp4CfAyXeuoO01m9gA8NtQDV2trKYPLY3HTVnL+y9BbAzf+1Nw/kNVNzrT4wxUWANtilIiHYjVwLCj2YDM7TWxhizjYZ/B7F7AgBorWuBQ7TWsROmQ4G/JuQ3X2t9rjHmSW/wvpHYMV7GeusPB74CtmHHOAI46P+3d4c2CARBFIb/hA5IMFgKoYsn6OaaINRwwWCQCC4YJFVgwCIwiDtFCLmAnP9TK1bsmH2ZEbvAZVi/h8MTmPxToDSWnYDKGWbsDbBLcgDW9H8yf9p7o3+S+wh09H8X39+2NcBymOGfgEXbtmfgkaQDVsCG/tKfJ9kDsy9HvALTJNsk0x/LlEbxATlJKsxOQJIKMwQkqTBDQJIKMwQkqTBDQJIKMwQkqTBDQJIKewFEM1R+gBBVcgAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZgAAAEkCAYAAAASMydbAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydeZwUxd3/39Vz7cW1LPe1gsq1KIogIoeC+pjjMcZoa2KMV7wSfTBqlBhj8mBCzC/qo1Gj8VaMxlbRJEYjilEEFUE8OHTlVDlcbpa9Zqa76/dH9czOzs7uzsLM7jJbb168Zqe7urqqu6Y/XfX91reElBKNRqPRaDKN0d4F0Gg0Gk1uogVGo9FoNFlBC4xGo9FosoIWGI1Go9FkBS0wGo1Go8kKWmA0Go1GkxW0wOwnQohSIYQUQkxu77J0BlJdb+/7D9vo/G12rs6EEGKjEOKmDOTzGyHE2kyUSdMyQogTvN/EwObSaYHZf74C+gFLAIQQA70LfsKBZCqEmOzlU3rAJcx9+gHPddRzCSHWCiF+k53iZJ5MteFWMh74vzY8n6YN8bd3AQ5WpJQO8HV7nV8IEQBs2Ylnykop2+z6t+W5khFCCMAvpYy2VxmyhZRye3uXQZNFpJQd4j8wGVgM7PP+fwz8l7evFJDA5KRj1gK/SfgugauAZ4Bq4EvgTKAb8Fcv3/XA9xKOieX9A+BVoAb4DJgGDABe9vJaDUxJcdzkhHMn/t/YTF2/A3zonWsP8D5wVEKeif/f9I55DHjdq99GwAWKgABwK7AZiHjl/EHS+STwE2Cudw2+Aq5PStMTeNarawVwC/A48Hqa928g8DywA6j1rvPPE/b7gZuBdUDYK+/dCftnAh8BVSjh/hvQr6nrnVCvH7ZFPVt7LuDNFPey1Nt3qHet9gC7gfnAmIRjLwBs4ESvnUSAb6d5jX8DbADqgFXAZUn1KALu9Mob9trSjfvRhjcCs4G7gF3etbwN8CWkSadtbgRuamW+IeA+YK93/e4Dfg+sTcr7HK9N1Xn53gEUJrSDr4C7EtL3BrYCf0izzbf6eeMd9zvgU9Tv/yvgfqBbivt/PLDcS7cUGJeQRgAPon5PsbYwBwglnetqYJOXx6vAeV65ByakGYdqg1XAdmAeMCQpn6uS8vlRcj4pr1E6FzLb/wGf15juAA7z/n8X74FO6wTma+B81I/4z94FecW7aYcCd3uNoWdS3uuA04HDgReALagH+ne9bc97jSGQqkwogZDAGUBfoFcTde2L+rFdDxwCjESJ2xjvOpzm5TPeS1vsHfcYUOmVbayX3g/8EdgJnOWV80aU+MxIui4VwCXAMOB/vG0nJqT5B/A56qE2GngU9QNOV2D+4V2vsd61ORH4fsL+x4FtqAY+DJgI/Cxh/0zgJO+aHAe8A7yVsL9RGyD1Qz8r9WztuYBi1IP+Nu8+9vXubx9UG73Pu4fDUW1yZ6zNoNqqi3qoTAeGAr3SuMaPAZ8Ap3jX8WyUiF2c8FB6E/UwOt3LdypwSWvasJd2I+rhPgv1ez0b9VC8MCFNOm1zI40FpqV8/w/Vlr4DjPCucSUJAuNdw92o9har5yfA3IQ0U4Eo8N/etXkVeA/vN55Gm2/188Y77iZgincPZ6BeaB9PKrsLLPTSjUAJwDpUTxaUeeO3wLFePqehxPF/E/I5w7t2M71reQHquRYXBmAUSlj+1zvPGNQL2OdAnpfmO14+13j38WJU2z9oBKaHV9gTmthfSvoCc2fC917etrtTnOvbSXlfnZBmvLft2oRtsR9fWaoyod4um6xDinxKm9g/OdV+1MNjD1CUsK0A9Rb6k6S0LwBvJF2XPyWl+Qz4vff3YV6axB9+ACWo6QrMx4n3ImnfoV7+Z7aiTcSu04Cm2gCpH/pZqWdrz5WqfXrbfgO8l7RNoB4eV3vfL/Dyn5KUrrlrfAjqoTQiafvNwEfe3zO8fI9pIo+02rCXdiPwj6Rt/waebmXb3EhjgWku30JUj+SSpDTLaCgwG4HLk9JM9erXI2Hbr1E9wttRv69DWtFGW/28aSKf73rXyki6/0cnpJnobRveTD4/A9YkfF9MgqB6226locA8BvwtKU0IJZSne98XAX9NSnNbYj5N/e8QRn4p5W7gIeBVIcQrQohZQojh+5ndxwn5bgcc1JtL4rkiqO5wyuOot618kmJb8nGt5RPUm9JKIcQLQoiZQohBaR77qZSyKuH7oUAQ9aaTyFuot/NEPkr6vhn1Ng3qLQbU2xsAUo33L0uzXKCGXW4UQiwRQvxBCDE1Yd/R3uf8pg72vFJeFUJ8JYTYh2rUAENaUQbIfj3TPVdTjAfGCSGqYv9RQymlKAFMZGnS9+au8TEooVqWlPeNCfmOA3ZLKfe3vsk0V//WtM3W5DsM9QB8JylNrL0ghOiFajd3JF2LVxLKFuMW1Nv6NajhxA0tlC2ZVj9vhBBnCCEWCiG2eOX6K+pa9U3IV9LwmbTZ+4y3LyHEJV5bqPDy+T0Nfy+jSGjrHu8mfR8PfDfpOu0E8qhvN6No5no3R4cQGAAp5SWoH8BrKPvHSiHEZd5u1/sUSYcFUmSVyhCavE3SuO7RpP1NbTugayaVc8A3UEMfS4HvAZ8LIb6dxuHVTWWb9F2k2BZJcUxyXZKPSRsp5aOoxn0/yuPqFSHEk+kcK4QYjLJ1bUSNmx+D6vKD+uG1hqzWcz/OlYwBLEANcyX+H47q3cRwpJR1DTJv/hrHzjspKd8y4IikMmaK/bnWqdpma/IVCduaIpZ2Jg2vxZGoh+aKhLT9UMM+jvfZWlr1vBFCHIsaglqI6rkcDVzupUts6673rEjMg4R8zgLuRdl/vonq8c+m8TOxpWttoOyIye3xcNRLf7r5NJl5h0FKuVJKeYeU8hvAw8Cl3q6Yp0n/WFohRG+UEb6jEPtR+FpKKBXvSynnSCmnot7qLmxtPqghmDBKkBOZijLwpstq7/O42AYhhB8l+GkjpdwqpXxUSvkj1DjtuUKIrihDJSjbQCrGA/moIaLFUspyWu4J7A8ZqWcriND4Pi5DvcFvllKuTfrfokdVM9f4Ay/J4BT5rvP2fQAUCyGOaaa8pCjz/pCptpkq3wjKAJ7IpNgfUsoK1LDn8BTXYm1MuIUQBvCkV54zgZvbYF7bZGCHlPImKeUSKeXnqKHJ1jIV+NB7Xn4gpVyD6gUnspqEtu4xMen7MtQLyLoU12l3Qj7J1zv5e0o6hJuyEOJQlLH0n6iG0R9l3FoOIKWsFUIsBq4XQnyGKvfvUA24o7ADZSw7RQixCggn3KA4QohJqLHw+Sij3GGoG/ywl+QLVI/tm0KIZ7x89qY6oZSyRgjxJ+AWIcR21NDCWSij3MnpFlxKuUYI8U/gXq/XuB24FuhKmm8uQoh7UL2QclT3+gzUvdwnpawUQvwV+LMQIg/VTS8GJkkp7wLWeOe51kt3JMp2kFEyUc9WsgE43uuh1aAcWe5BCcOLQojfoq7RQFSv9l9SyuShiDhpXONHgAeFENejrnEhSjx7SSn/ALwBvA08I4S4BjWU0x8YKaV8iDTbcDpkqm2myLdaCHE/8FshRAXqWlyMMlBvS0j6S+BhIcQe4EVUr2Ik8A0p5WUJacYAY6WUm7x8/yqEGLu/9U6DcqCXEOJi4D8owfnJfuZzsRDiO8BKlJfhGUlpbkfd6/dRw4OTUN5fUN/e56C8WJ8UQtyF+k2UopxA7pJSrvfyedbL52WvzOelU8iO0oOpRj1o/4YaD30eNeZ3ZUKai1CN/x0v3QOoB3SHQErpAj8FTNSP/sMmku5FvVX8HfVgfQQ1BnuLl08F8AuUF81WL11z/BLlrngn6k3shyhj9IJWVuFCVEN9BeVptBk1XFnXzDGJCK8MK1Hd/0LUjznWkC8E/oLyfPkUZew9BEBK+QnKDfIy1NvSdSj3ymxwoPVsDb9GuayWo364g737exzqYT7P2/dX1NBXS+25pWt8KcrD6peo67gA5eG0HlTPGfgW6iFxv3fuJ4ESb3+6bThdMtU2k5mFEo25qIdjd9RwURwp5VxUPb7lpVmKGoLcDPEXvZuBi6SUm7zDrkMZ+hOHhjKKlPIl1MvxHNRQ3TnAz/cjq7+g6v8o6j4dS8MhVqSU81DeqrO8c52L8hYDr71LKT9FCU8Ryja8GnXP8lHXAinlC6gXsetRLyXnAjekU0hR3zY1mnqEED6UV9Q/pJTXtnd5skVnqadGAyCEuBmYKaXs2Rbn6xBDZJr2x/NI6o16G+qCcnksRbkx5gydpZ4ajRft41rqJ4ufiOot3dvccZlEC4wmhg81AexQ1Hj1StSkwRWeDWF1M8deJqX8axuUMRM0Wc92LZWmQ+HZY5oKbvqFlLIlV+uOgAROQIlMF5RNcA5qAmyboIfINC3ieVqVNpOkQkq5r42Ko9FkHc9LtWsTu6NSyi/asjwHK1pgNBqNRpMVOooXmUaj0WhyDC0wGo1Go8kKWmA0Go1GkxW0wGg0Go0mK+SUm7Jpmo+gQiZssyyrrIW0g1FrlHRHua7Osizr5eyXUqPRaDoHudaDeQw4Nc20NwGWZVlHocI1/DlbhdJoNJrOSE71YCzLWmiaZmniNtM0h6FmrvZCBRy8xLKsz1CTkGJ+7t1QK71pNBqNJkPkWg8mFQ8AV1mWNQ4VzC7WU/kN8EPTNDehQilc1T7F02g0mtwkpwXGNM0iVKTQZ03T/AgVgbSft/v7wGOWZQ1ELdgz1zTNnL4eGo1G05bk1BBZCgxgj2VZY1PsuxjPXmNZ1rumaeahwpZvS5FWo9FoNK0kp9/YLcuqBDaYpnkWgGmawjTNI73dX6IW/sI0zZGoBZxaXFFQo9FoNOmRU7HITNN8GhU9tASoQC349AZwH2poLAD8zbKs2aZpjkItrFOEMvhfb1nW/PYot0aj0eQiOSUwGo1Go+k45PQQmUaj0Wjaj1wy8uuumEaj0bQeka2Mc0lg2LJl/+ZKlpSUsGPHjgyXpmOj65z7dLb6gq5za+nfv3+GS9MQPUSm0Wg0mqygBUaj0Wg0WUELjEaj0WiyghYYjUaj0WSFnDLyJyOlpLq6mpbm+tTW1mLbdhuVqv2QUpKXl0cwGGzvomiyiFgcxH9eTyrDgkCoH/bcncjjI+1dLE0nJKcFprq6mlAoRCAQaO+idAiklOzbt499+/ZRUlLS3sXRZAGxOIj//GJEWHmeirDAf34x9uO7tMho2pycFhgppRaXBIQQdO3albVr17J27Vq6d+/e3kXSZBj/eT3j4hJD1Br4zZ7Qx4VCiSx0oUhCgUQWqW2x/7LIhQIJRRJZKKHQO6YolsZLr39WmjTIaYHRpMbn87Fs2TJOOumk9i6KJsMki0t8OwJneh1UG4gqATUCKgyM9X6oFlAtENXpm2RlSEKBmyBEnvgUSaQnUBS69fs84UoUKlkYSychX2Zxup+mvdAC00npDDanzogMyZQiI0MS57a9zR/sArUCqhIEp1p9F9UCEr/XCKgyvHTevn0GVAiMqvq0IpqeakgRE6LGPavEHpcStIR9RQ2Fzq0FwkL3sjoIWmCSyJ83jy633opvyxac/v3ZN2sWtWeckZG8d+3axcyZM9m4cSOhUIjS0lL+8Ic/0LNnz3iaOXPm8Je//IWZM2dyzTXXxLe7rstll13GZ599RigUoqSkhFtvvZXS0tK08tV0Duy5O5UNpra+NyLzXezHd7V8sEH9cBkgcerz2N8CRVCCVeP1nBKEKv69wT7RsJf1tYFRnX4vqwoIemsK6l5W+5NL0ZRlcqiYffv20aVLl7QzyJ83j27XX49RWxvf5ubns/f//b+MiMzu3bv59NNPmTRpEgC33HILe/bs4fbbbwfgtttuY+nSpdxxxx1cddVVzJgxg5/+9KeqHK7L66+/zkknnYRhGDz66KO88sorWJbVYr7JbNiwgfLyck499dQDrtPBRGcJIxLzIhNhgQzJ3PIia6GXVSS6UlVR3XQvyxMu0Y69rLgdq0ge0Ct+Ju6zFypGxyI7ULrefDOB1aubTRP84ANEpOENMmpr6X7ttRQ89VSTx0VHjaJy9uwWy9CjR4+4CAAcffTRPPHEEwDce++9rFu3jrlz5xIMBnnqqae46qqrePDBB7nkkkswDINTTjklfuy4ceN46KGHWsxX0/mQx0eIrt+am4LaQi8rWFKEu6O6dXmm08uqNpRIxXpWVd73aq+XVeX3tgtETSttWYmOFAVeT6pQNuhZJfe4xBc+fHd07fDeglkRGNM0HwG+DWyzLKssxf6fA+cmlGEk0MuyrF2maW4E9gEOYFuWdUw2ypiSSBM3pqntB4DrujzxxBNx0Yj1VGLk5eXx4IMPNnn8o48+ysknn9xivhqNpgWCQLGEYqfRUOB+je+4KLGpThCceI8roSeV3AOL9bgqDdgqMBLSCrvlToaoNfCf15Po+q37U+qskK0ezGPAPUDK12jLsv4I/BHANM3/Bn5mWVbiIPGJlmVl9NUrnR5G7wkT8G/e3Gi7M2AAO597LpPF4aabbqKwsJALL7yw1cfed999rFmzhmeffTaj+Wo0mgxgoHobRRL6ZMiWFaZBTyowo3fKZE15EbYXWQkVY1nWQiANqyIA3weezkY5Wsu+WbNw8/MbbHPz89k3a1ZGzzN79mw2bNjAfffdh2G07hY8+uijvPDCC8ydO5f8pLIeSL4ajaYDE0L1sgY5yBG2GlpLQVPb24t2tcGYplkAnApcmbBZAvNN05TAXyzLeqCZ4y8FLgWwLKvR7PTaBGN9OsQM+dnyIgO49dZb+eSTT5g7dy6hUKhVxz755JM8+eSTWJZFjx499jtfwzAwDKPTzeb3+/2dqs6drb7Qeeps/8Om5rue3SdGgaTwBRt/B6p/1rzITNMsBV5KZYNJSHM28EPLsv47YVt/y7K2mKbZG3gNuMrrEbXEAXuRZZvy8nKmT5/O0KFDycvLA2Dw4ME8/PDDLR5bVVXFiBEjGDhwYLxOoVCIl156qdX5ai+yzkFnqy90rjprL7KWOYek4THLsrZ4n9tM03wBmACkIzAdnuHDh7M5hY0nHYqKiti0aVPG89VoNAcnB4O3YLsN1Jum2Q2YBvw9YVuhaZpdYn8DpwAr26eEGo1GozkQsuWm/DRwAlBimuYm4Nd4gRssy7rfS/ZdYL5lWYlO632AF0zTjJXtKcuy/p2NMmo0Go0mu2RFYCzL+n4aaR5DuTMnblsPHJmNMmk0Go2mbdG+rBpNDnHK90r4xeyuVGzXP21N+6NboUaTQ6z6LMjf5hVy3H/14aobfFpoNO2Kbn0aTY4RiQrCYcHDcw2OPbkPl/2sO6vL/bhue5dM09lobzdljUaTAerC8N7ShhNsHUfgOPDS/AJeml+Azyfp1dOldy+H3iUufXo59O6lvvfp5dK7xInv0wvBajKBFpgEenQ9hYB/VaPtUXs0uyvnH3D+2VoPBuCiiy7iyy+/xDAMCgsLueWWWygra3KOqyYH2Fph8MbCPBYsDLHw3RC1tQ0HJAIBiRAwaXyYiceEqak12LbDYNt2H1srfHy8MsCOXQZSNp5nV9wjUXQailFf77N3L4f8vLaqreZgRAtMAlF7HH7fGoSonw0rZZConZmAzkIIrrjiigbrtsyZM6fBejAff/wx77zzDldddRWhUKhBlOWzzjqrwXow119/PZZlAXDnnXfStWtXAF599VWuvfZaXn311YyUW9MxcBz4cEWABW/lsWBhHqs+U92MAf1szO/UMmNaHT+6oifBgMQw4Pzvu1x+wXZ692p6bMy2YccuJToV29Xntu0GFTvU57btPtas97N9h49oioi+Xbs00SMqSegZ9XLoUqTETtO56DQCU1RwM35f8+vBqIUhoknbbPy+lXTvcmaTR9nOKKpq2m89GCAuLgCVlZU62GWOsLdS8ObiEAsW5vGft0Ps2u3D55McMzbCjT+rZMa0OoYfascf3qNHRDhmbISrL69i1Mhiduxo3vDi90Pf3i59ezefznVhz16Diu0GFds8MfJEqMITpeWfBKnYblBX17jt5eW5qXtEJUqE+vRWnz26u1qIcohOIzDpEcR1e2MY2xBCIqXAdXuhFozILNlYD+a6667jrbfeQkrJX//614yXWZN9pIQ16/wsWBji9bfyWPphEMcRdO/mMn1KHTOmhpl2fB09uqeOITj/+eyEDDEMKO7hUtzDZeThdrPl31clGvSIKnYk9Iy2+yhf6+ftd0NU7mssRAG/pFdJfc+ndy+XPiVJtqJeDiXFLn799OrwdJpblE4PA8AQFfTsfhxqAYYQuyv/jStTr71wIGRjPZjbbrsNgOeee47f/va3zJ07NyNl1WSXujC8837IG/oK8dVm9bMceXiUKy6q4qRpdRx9RBSfr50LmgZCQNcukq5dbA4d2nza2lrRpAht227w5SY/Sz802LW7ccUNQ9Kzh5vQG1LDdEMPMSjMz2sgRqHMvx9q0qTTCEy6uLIPteGzyQ/NpTZ8dlbEJbZuy2OPPbbf68E888wzjdaDiXHmmWdyww03sGvXLoqLizNRZE2G2fK1wYKFeSx4K49FS4LU1hrk5blMmRjhpz+uYvqUOgb0y22/4vx8Selgh9LBTrPpIhHYvtMToB1JtqLtPrbtMFhdHmDbDgPXFUDDNt+9W7J9qHGPqE+JS2Fhx1pLJRfQApOCmtqr8fs+p6b26oznnY31YKqrq9mzZw8DBgwAYP78+XTv3r3RmjGa9sNxYPknAV5/S4nKp58rA/2gATZnn17LSdPqOG5CmLzWNYlOQTAIA/q5nuAm20jrcRyQooTPyvd4vaAEMdqhxGjJsiDbtvuIRBsbegoL3JT2oZjHXMyG1L2bdlhIFy0wKXBlH/bsez7j+ZaXl3P33XczdOhQTjvtNKB168HMmjWLgQMHcs455wD168HU1NRw2WWXUVtbi2EYdO/encceewyhfwXtyu49grcWq2Gv/ywKsXuPMtCPPyrCTdfuZcbUMIcNs/XDKkP4fFBSAn7Dpmxk83aiPXtFUm+ooePCitUBKraFqKltPMIQCnp2ot7J9qGGwtSz2KWz+9pogWlDsrUeTK9evXjppZcOpGiaDCAllK/1x20pSz8M4rqC4h4O06eGmTG1jmmTwnTvpodi2hMhoEd3SY/uNsMPbT5tVbVoPCSXYDNau8HPO+/72FPZWEmyObH1lO+VMO7ICFdfUUUHWsCyEVpgNJoDoLYOFi+pN9Bv3qp+UqNHRLnyx1XMmFbHUWMODgO9pjFFhZKiQodhpc3bierCsL0J+9C27T62fO3jo5UBdrYwsTWVCPVJMbF11WdB1qwL8MyLhd58J4M+zcx3ai+0wGg0rWTzFh+vL1RzUxYvCVJXZ5Cf7zJlYpiZl1UxfWod/fp0vB+7JnvkhWDQAIdBAxyasxPZdr3DQrJ9KDaxtXxdgO07DOxmJrYCcTvSw3MNHn+6D2efXs3VV1R1KKHRAqPRtIBtw/KPg/G5KZ+tUeMZQwbZ/OB7NcyYGmbieG2g17SM3w/9+rgtvoC4LuzeY9S7cSfYh77e5mPt+voxtVjMublWIZ+vC/D84zuzXY200QKj0aRg1x7Bm4vyWPBWiDcX5bGn0sDvVwb6X123l5OmhRl2iDbQa7KDYUDPYuUoMGp4Y4eFAaPrpyjEQgOd/d1qrr68qi2L2SJaYDQalIH+08/93tyUEB98rAz0PYsdTj6xjhnTlIG+axdtoNd0DFoTc6690AKj6bTU1goWLQny+lt5vLEwxJav1c9hzKgI/3OpMtCPLYt2eldTTcejtTHn2gstMCnYZlQws8dP+NPu++jlZm4mfzbD9ce44447uP3221mwYAEjRozIWNlzha82+1jwljLQv/N+iLqwoCDfZeqkMD+7QhnoWwr8qNG0N9mKOZdpsiIwpmk+Anwb2GZZVqNFSUzTPAH4O7DB2zTPsqzZ3r5TgbsAH/CQZVm3ZqOMzXFP0Z0sDS7h7qI7mV05J2P5ZjNcP8CKFStYvnx5fEa/Rhnol30UVLaUxX5Wl/cBoHSQzblnVXPStDDHHhPO2XhVFYbgzqIgHwT9zN9R3d7F0XQystWDeQy4B3iimTRvW5b17cQNpmn6gHuBk4FNwFLTNP9hWVZLcfZb5JauN/NpoOVsIkT4KLgcKSRPFc5ldWAlwRaiKY+MjuJXle0brj8cDnPjjTdy7733ctZZZ7VYllxm126D/yxSHl9vLQ6x1zPQT54oOfO0vcyYVsewUienDfQVhuB/fTaP9ylCApFcrqymw5IVgbEsa6FpmqX7cegEYK1lWesBTNP8G/Ad4IAFJl02+xJny0s2+zZxiNNCWNj9INPh+m+77Ta+973vMXjw4IyXtaMjJaz6zDPQL8xj+ccBpBSU9HT4r+l1nDStjqmTwhxS2pMdOf4WX2EIbisK8lxhEBcXWwuLph1pTxvMcaZpfgxsAa6zLGsVMAD4KiHNJuDYpjIwTfNS4FIAy7IoSYqZUFtbG/87nR7GNqOCE/ochxTKU0gKSaVvL3ft+HNGbTGQ2XD9y5Yt46OPPuLGG29M63jDMDAMo9H1OpioroY33ha88rrBqwsMNm1RD9JxY11+ea3LN05yOfpIiWH4gSKgCL/ff1DXORWVSD4Wkg+F5CMhecFwqQFIoStv9ypmmhQUp9qZI+TiPW6Jjlzn9hKY5cAQy7KqTNP8JvAicBgpfxY06RdqWdYDwAOxdDt2NDR82XbTAe9ScU/RnbhJp3NwM26LyXS4/vfee49169YxceJEALZu3cq5557LHXfcwbRp0xrl4bouruuSfL06Ol9uUgb619/K492lIcIRQWGBy7RJdfzsijpOnBJuMIt5166Gx5eUlBx0dU5kpyFYGTBYEfCxMuBjRcBgo78+Bk0fx+XosEOlIfg04AMEiUGDzwnYCCk5IuoyJWxzfNhmfMQh9aIPBycH+z3eHw6kzv37989waRrSLgJjWVZlwt8vm6b5Z9M0S1A9lkEJSQeiejhtwvLgB0RFpMG2qIiwPLgsY+fIRrj+K6+8kiuvvDL+/dhjj+Xxxx8/6L3IolFY+mEwPjdljTd7+ZAhNuedXc1JJ9Rx7NERgjlmoJfAVkOwMhiW+IAAACAASURBVOCLC8qKoI+tvvqXkcG2S1nUwayJMiaq/u7t1r8cbTME9/fqxuOGg4uywby4o5q3gz7eDvm5vyjIPV1ChKRkfMRhSthmcthmTNRFh03TZIp2ERjTNPsCFZZlSdM0JwAGsBPYAxxmmuYhwGbgHOAHbVWul3bMz2r+2QrXn0vs3GXwxtvKjfitxWpZ3YBfMnF8hHPPUgb6oUOaDzx4MOECX/hiYuLzeicGOz0xEVJyqO0yMexQFo0wJuowOurQxIrJcXq7kj85fi7fvpc7i4IsC/oZH3EYH3G4pipClYAlQT9vh3wsCvn5fVcVRbGbKzne691MCTsMddwcHlDTZJtsuSk/DZwAlJimuQn4NRAAsCzrfuBM4ArTNG2gFjjHsiwJ2KZpXgm8inJTfsSzzeQE2QrXn8ySJUv26xztQcxA/9qbykD/0QploO9d4vDNk2uZMTXM1ElhinJgtUEbWOf3eiQBg1WeqOwz1CM8ICWH2y4n19mekLiMsh0OpOq9XcmcyjBqCfB6iiTMCNvMCNtAmO2GYHHIx9tBP2+H/Lycr3qL/W2XyRHb6+E07CVpNC0hpMyZBiO3bGk4mrZv3z66dOnSTsXpuGzYsIHy8nJOPfXUdjl/dbXg7fdCLHgrxBtv5/H1NjUoM7YswknT6pgxLUzZyMzPoG/L8fkwUJ5kL/nU76POE5M8VzLKduLDW2OiDodHXTIZL3N/6yuBjT7B2yElNu+E/Ozxyj0i6sR7N8dFbIo62OND22Bah2eDyVonVc/k17QJG7/0xW0p7y4NEYkKuhSpGfQzptYxfUqYXiUH5wz6GgGr/J69JKiGuT73G3EX4S6upCzqcF5NJC4ow2y3w/74BHCIIzmkJsqPaqI4wKqAERecvxYGebhI4JeSsVGHKWFlwzkq4rQwY0zT2eiobVxzkBOJwPvLPQP9whDrNqghl2GHRLngB9XMmFrHhIPQQL9H0Mhess5vID0x6em4jIm6TK9T9pKyqMNgR3IwhzPzAUdEXY6IRvhpVYQ6YJnnLLA45OeuoiD/1yVEgSuZGFFDaVPCNiNs96Cut+bA0QKjyRjbd9Qb6Be+E2JflUEwIDlufJjzz6lhxtQ6SgcfPAb6bYZoYC9ZEfDxlb/+kdnfdhkTdfhObZSyqPq7rytz3iieB0yOOEyOOLAvzB4B73q9m0VBH290Uy8TPR1XiY1nwxnodLDxNE3W0QKj2W9cF1Z+GojPTflopeqO9Onl8N//VcuMaWGmTAxT2MEN9BLY5BMN7CWrAj4qEtyCD7EdxsaHuRzKoi7F2uANQHcJ36iz+Uadmne22RAsCvlZ5Hmo/b1ACU6p7TI5rMRmUtihOHfsv5om0AKjaRVV1YKF74RYsDDEGwvz2LbDhxCSsWOiXHdlJSd5BvqOGqHEBdb7DFYGY95cPlYFfHEjtk9KDrPVRMSYvWR01EEvA5M+A1zJ2bVRzq6NIoHP/QaLQsol+sX8AE8WBhFSUuZN+JwS8SZ86mucc2iB0bTI+i98LHhLuRG/tzRI1FYG+hMmKwP9iZPDlPTseAb6KOrhFrOXlPujfNS3CzWemASlZGTU5Vu10bgn14iom1Mz29sbAQy3XYbbES6uVvfko4Av3rt5sCjIn0WIoJQcE5/w6XBE1NEPpxxA38NmyHSo82yuB3PssccSCoXi0QF++ctfcsIJJ+xXOSMRWLI8yOve3JQNX6hmctjQKBefpwz044+KEAi0kFEbUgt86hndY4LyWcCIRxEucCVHAefUROL2ksNslw5UhU5BABgfdRgfdfhZVYRqb8LnopByGvhD1zz+AHR1JZO84bTJEeV110E7xZpm0AKTgpiwPFMYzGio82yvB/PAAw+kFR7GSWFn37bdM9C/lcfCd0NUVSsD/aQJYS46V4nKkEEdw0BfKYhPUozZS9b4DRzvPnV3XcqiLhdXRxgTURMWhzouvUtK2FG5t51Lr0mkUML0sM10b8LnDkOwOKh6NwtDfv7tTfjs67jx3s3ksE1fbf86KOg0AnNz1xCrA81HWYoAm3wG23zqQSUThOXMngVNHjcq6jC7Mtzk/hjZXA+mNWyt8PHK64JgQYgPV6jFuD5epQz0ffs4fOebtZw0tY7JEyMUFLTvDzkxwGPMLTg5wGNZ1OHUOjs+zDXAyX1PrlylxJV8p87mO3U2EhVGZ5HnofZanp9nC1Q7PTxaP5w2MWLTVetNh6TTCEw6fB4w2CcEbWGhzvR6MEA84OX48eOZNWsW3bp1S3mslLD8Y8Fzr/QEJEeMjnL9/1QyY2odo0fY7WKgTwzwuCLoY6XfaHWAR01uIYBSR1JaE+WHNVFcYLU/YcJngZrw6UuY8Pkt4TIMMhoRQbP/dBqBSaeHsS1haCwWgTbGcztrMlqeTK4HAzBv3jwGDBhAOBzm17/+NTfddBN33313M7mougkBBfmSmZdVtboc+0uqAI8rAga7EgI8DtuPAI+a3MYAymyXMjvCFdURwsAH3oTPt0M+/lQU5E5hk9+3CxMjaihtcthmlJ7w2W50GoFJh1hgwKurIk0KTSbI9HowAAMGDABUhOXzzz+/ReHy+SR5IcnZ363m6suzJy42sDbuydU4wKNfSobbLqd4Q1xlGQjwqOkchIBJEYdJEYcb9sFeAatKink5XMPbIR+3dFMRoosdl+M9D7UpYZvBesJnm6EFJgXJQrMsmLnLlI31YGpqarBtm65duyKl5O9//zujR49uMh8hYGyZxLqhgt69MudenDLAY8BHnWgY4PG7tVHGZCnAo6bz0k3CadJgUmUdoIZcY/NvFof8/NNzGBjiTfic7Hmo6Qmz2UNHU25DysvLmT59OkOHDiUvT71dtWY9mBEjRjBw4MB4nWLrwXzxxRdccskluK6L4zgcdthh3HLLLfTp0ydlXmvXbmDt2gOLplwtYLUnIrFhrlQBHpXhvWMEeOxskXY7W32h6TpLVE/6bW9JgndD/ngvenS0vnczIeLQzn4trUZHU9YA2VsPZsiQIcyfn/5iab5WLlmYHOBxRcBgfRMBHmOeXAd7gEdNbiGAw2yXw2yXi6qj2MDHAV98wbWHC4PcXxQi4E34nOwtSXCknvB5QOhr14lJNZG0IubJ1UKAx9M7WYBHTW7hB8ZFHcZFHa6uilAj4H1v/s3bQT9/7JrHH1E98eO8cDZTwg6H6gmfrUILTCfEAV4RLlf0KcIBHCE4rziflQEf23SAR00npEDCCWGHE8IOEGZnwoTPRSE/8z37TR/HjfduJodt+unfQ7NogemEbPUZLBeygXfcFp/BVB3gUaMBoKcrOa3O5jQvQnRswueikJ//hPw87034PDS24FrE5riwnvCZjBaYToiERma9BdsPPNaaRpOrDHEkQ2qinJsw4TMmOH8rCPBoURBDSo70BGdy2OaYiNPpPSSzIjCmaT4CfBvYZllWWYr95wI3eF+rgCssy/rY27cR2IcaybEtyzomG2XszAjAp9+0NJr9InHC5+XehM/lMftNyMe9RUH+1CVEnis5NuIw2VtwbXS08034zFYP5jHgHuCJJvZvAKZZlrXbNM1vAA8AxybsP9GyrM7lX9mG9HNcxkrBV1JmZSKpRtOZCAHHRRyOizj8fB/sE/BuQoTo33VVUxJ6eBM+Y4uuDekEMfOyIqiWZS0EdjWz/x3LsnZ7X98DBmajHK3llO+V8IvZXanYnp33jF27dnHeeecxZcoUTjrpJH784x+zc+fOBmnmzJnDkCFDuOOOOxpsd12XSy65JH7sOeecw8aNG+P76+rqmDVrFscffzwzZszg+uuvb7IcPuBUafBuRRXfr44wOtIxoiRrNLlAFwmnhG1mV4b5z/ZqPvh6H3/aXctJYZtlQR+zuudzfJ8uHNe7iOu65fH3PD87jNyUmo5gg7kYeCXhuwTmm6Ypgb9YlvVAUweapnkpcCmAZVmUlJQ02F9bW9uqgqz6LMiadQGeebGQs0+v5uorquiTwZnu2QzX/7vf/Y5QKMSiRYsQQrB9+/Ymy2EYBoZhMKq4J/GLW5L7y2z5/f5GbSSX6Wz1hY5Z5xKgDLgMkI6k3IH/GC5vGIKXCwyeLlQOA0e4ghOlYLprMFkKitLs33TEOsfI2kx+0zRLgZdS2WAS0pwI/BmYbFnWTm9bf8uytpim2Rt4DbjK6xG1RLMz+W/+fVdWlze/vNS7S+tNckKo69K7xGFgf4dgsOnjRg2PMvsXlWkUsSH/+te/eOKJJ3jmmWe49957WblyJXfddRfBYJC6ujquuuoqJkyYwCWXXNLo2E8++YQrrriCxYsXU11dzTHHHMOyZcsoLCxs8bwbNmygvPzAZvIfjHS2me2drb5w8NXZBlYE6iNELwv6iAhBQErGxQN2qukCTT299Ez+FJimeQTwEPCNmLgAWJa1xfvcZprmC8AEIB2ByShSqmtesd1HTa1B2choRvPPZLj+jRs30qNHD+644w7eeecdCgsLuf7665kwYUJGy6zRaDKLHzgq6nJUNML/VEWoFbA0qMLZvB3yc3uXELd1FRS5koneZM8pYZvDkyZ8Znr13UzRLgJjmuZgYB5wnmVZnydsLwQMy7L2eX+fAszOxDnT6WEMGN0//ncwIDEM4tGGMxkUEjIbrt9xHL744gvKysr41a9+xfLly7ngggtYvHhxh4rFptFomidfwtSww9SwA/vC7BKCdzxngUUhH6/nqX5Mb2/C5xERh899UZ7vU5TR1XczRbbclJ8GTgBKTNPcBPwatRw3lmXdD9wM9AT+bJom1Lsj9wFe8Lb5gacsy/p3NsrYFNkWFsh8uP6BAwfi9/s5/fTTAbVSZnFxMevXr+fII4/MePk1Gk3bUCwl366z+bY34fMrb8Lna0E//8oLMK8gqFYQ7GDCEiMrAmNZ1vdb2P9j4Mcptq8H2u2JOHpEhGPGRrImLJCdcP3FxcVMmjSJhQsXMm3aNNatW8eOHTsoLS3NcOk1Gk17MsiRfL8mynP5ASIxTemY2gLocP1tSrbC9QN88cUXXHvttezevRu/388NN9zA9OnTU+a1dsNaZq/5Nbef+n/0cntnqHYdn4PNAHygdLb6Quepc8PVd0W92ACbt6TvcJRtI78WmE7IB198wM/Lf8YxZ43jF5W/wo8fPwF80keAAKIjvxIdAJ3l4ROjs9UXOl+dtxmC+3t143HDiU+a7kgC0xHmwWjamBpRjRSS5wstni+0Gu03pIGfAH7pw4efgPTjw49f+on/8/72ST9+fPhlAB8+AjLQ8Bgvrc9L40/69KFEzSd9CWnrjw94aWLHpCyP9Dc4NtW2gPQTIkQdtUpM8eWskAJsMyo4z382dxh3d6peamejtyv5k+Pn8u17M776biboWKXRtAmxB6tP+jgqMo5T6k7FFg42URwcokJ92sLGxsYRNlHv0/sLW6hPx0sTS1ttVDc8Brs+b+EQ9T5tog22txn1joKNxKxeNH1xgfUTSCGgvgYCG4gLbSrxbSyEDc/pa9B7rP+szy9xX4NyJRwb64XGtt1VdDvviXe4u+hOZlfOabvrq2kXYsu8q4XLOw5aYDohUsVTxhEOK4OfcM/uv7TrW65EKkFLEJ0mRckTtWgzgmULlVui8NnCJlQYpLKmMuEYtT2es2h8jJMgpurTJkKEWqMmfkyiKMfFOUUdpGj74einCp+g0tjDMZEJjI6OYWR0FHnkftQGTcdAC0wnx8Ft97dcgYi/rRN7BmfhWVySX8KOqvYbn3dxG4paXNwaimTiPjtZ9JoUwvpj/pX3T1YEP473DF/Ne4V/FvwdUL3WYfZhjI6WURY9grLoGEZGR1MoW44AodG0Fi0wnZyoiLA8uKy9i9EpMDAIEiQovbhDWRDRbUYFt3W9NS4uUkgMDF7c9jJb/VtZFVjBysAnvB16ixcKngNASMFQexijo2Moi45hdHQMo6NldJFdM19ATadCC0wnZKAziBucX3Lqls4Vi6wzcE/RnbhJyuXg8mzBM8yunMMpdfX3fJtRwcrAJ6wKrGRl4BPeD73HPwpeiO8fYpd6onMEZZEyRkfH0F32QKNJFy0wGk0OsTz4AVERabCtqV5qb7cP08MnMz18cnzbDmMHqwIrvJ7OCj4JfMTL+f+M7x9oD0rq6YyhxO2YkXw17U+rBcY0TWFZVs5MnklGLA7iP68nIiyQIYk9dyfy+EjLB6bBrl27mDlzJhs3biQUClFaWsof/vAHevbsGU8zZ84c/vKXvzBz5kyuueaa+HbXdbnsssv47LPPCIVClJSUcOutt1JaWspXX33FRRddFE9bWVlJVVUVq1atyki5NQcPL+2YH/97f+aElLglTAufyLTwifFte8RuJTjBlV6PZwWv5r8c39/X6UdZRPV0RkdVT6eP2/fAK6M56NmfHswdwM8yXZCOgFgcxH9+MSKs3HhFWOA/vxj78V0ZEZlsrQczaNAgXnvttXi6m2++GcfRi4hpMkN32YPjI1M5PjI1vm2fqGRVYGW8p7MqsIIFea/FPeV6Ob3jjgSxHk8/p39Ozz3SNKbVAmNZ1kEpLr6buyJWN78ejHgviJANfwCi1sB/dk/kxKYFRo6K4sxuefZsjx494uICKijlE0+oVaXvvfde1q1bx9y5cwkGgzz11FNcddVVPPjgg1xyySUYhhEP7Q8wbtw4HnrooUbniEQivPDCCzz11FMtlkej2V+6yK5MjExiYqS+PVeLaj4NrIoLzsrAChaG3sQVKq5fsVPcaHhtkDNYi04O06LAmKb5sGVZF3t/C+BBL1hlzpEsLonbMz0mmMn1YBKZP38+ffv2ZcyYMZktsEbTAoWykGMiEzgmUr8OUa2o5TP/as+us5KVwU94MHQ/tlDRgbu63eLDamWRIyiLljHEOQQjO6u5a9qYdHowQ2N/WJYlTdMclsXyZI10ehiBof3iw2OJyJDEfm5niiP2n0yuB5PIM888wznnnJOJImo0B0y+zOeo6DiOio6LbwsTpjzwWQNngicKHyFSpEYJitwiRkXLGgyxDbWH4cPXXtXQ7CfpCMwO0zR/DLwDHAdk9knbgbDn7lQ2mNr6tyeZ72I/viuj58n0ejAxvv76a959913uuuuuTBZXo8koIUIcET2SI6L1K3NEibLGX67sOkE1V+fpgiepM+oAyHfzGWmPZnREDbGVRccwzD6MQJMLCWs6AukIzPnApcBPgXLgR1ktUTsij49gP76roRdZhgz8MbKxHkwMy7KYMWMGxcXFmSquRtMmBAgwyi5jlF3GWbWqB25js86/tkFP5/mCZ5hrPApASOYxIjqyQU9nEse3ZzU0SaQjMGHga8AB7gOOApZms1DtiTw+QnT91qzkXV5ezt13383QoUM57bTTgNatBzNr1iwGDhwYHwJLXA8GlMDccsstWSm7RtPW+PEz3B7BcHsEZ9SeBYCDw0b/hrjgrAx8wj/yX+SpwrkABGSAw0tGMDo6hjGeI8Hw6Agdf62dSEdgngTeAr5vWdbdpmn+Hjgpu8XKTYYPH87mzZv369iioiI2bdrUbJpFixbtV94azcGCDx/D7EMZZh/KabXfBVSMty99X7AqsIL13daxVC5hfv7LWIXKk9InfRxmHx53JBgdLWOkPZoCWdCeVekUpCMwvSzLut80TTPrpdFoNJpWYmBQ6hxCqXMIJUUl7Ni5A4lks29TQk9nBf8JLeD5ArX+kSENhtmHNhheGxkdTRepFyjMJOkIzDbTNM8G8k3T/C6QnfEjjUajyRACwUBnEAOdQfxX3TcBtSxEhfG1mqfjORK8G3qHFwvmxY87xB7K6MiYhPk6ZXST3durGgc96QjMRcCPgeXAQOCSdDI2TfMR4NvANsuyylLsF8BdwDeBGuACy7KWe/vOB27ykv7WsqzH0zmnJj18xlcI+Tt6F18MQNQeze7K+S0cpdEc3AgEfd1+9A3346Rw/aTl7ca2BpNDlweX8ZK3vAHAYHtI/Vwdr7dT7GpHmnRIR2BGWJZ1j2mavYELgFLgszSOewy4B3iiif3fAA7z/h+LciA41jTNYuDXwDGogOYfmKb5D8uydqdxTk0rkTJI1D6mvYuh0bQbvdzenBiewYnhGfFtu4xdCd5rKuL0K/n/iu/vbw+IL2sQW1dHL03dmHQE5nZgBjAbZex/FDUfplksy1pommZpM0m+AzzhBc58zzTN7qZp9gNOAF6zLGsXgGmarwGnAk+nUVZNqzGoqb26vQuh0XQoit1ipoSnMSU8Lb5tr9jjxV/zgn4GV/Ba/r/j+3s7feJhcGLOBH3dfp06FE46AlNgmmYICFmW9bRpmpdn6NwDgK8Svm/ytjW1vRGmaV6KmqODZVmUlDQMG15bW5uhouYwAnr2uBTVMS1FiiHxv2EwiNbN1enI+P3+Rm0kl+ls9YXs1rmEEoZxKKdxutrgQmWkkhXiYz4WH/KR+JCPgx/yZuiNePy1XrI3R8qxHCmPYqx7NEfKsQxmSEZFpyPf53TdlP8O/No0zTxgQ4bOneoKy2a2N8KyrAeAB2JpkkOT27bdqgL1GXsKvu2NAxU4vXpS8dGB2yiyFa4f4LXXXuOPf/wjUkqklFxzzTV885vfbKIk6hJL6acu/A18vh0YxlJ8xosYROOppBS4sg+OMxjXHYjjDsZxBuN4f7tuPw6mJYX2J3z9wUxnqy+0T52HM5LhjMTkBwDUiBo+9a9qEJXgP/4FOD4V4by7210Nr0XqA38Odobsd/y1A6lz//799+u4dGnx6WBZ1r3AvQmbLsjQuTcBgxK+DwS2eNtPSNr+ZobO2SypxKW57a0lW+H6pZTMnDmTefPmMWLECFavXs3pp5/OqaeemjIUjSsLAUFt+FyqauYk7HEwjK/xGV/hM77EMDbh832Jz/iKgH8JIeNFhPdmBiClD9ftj+MO8sQnJjyDcJxBuLIP6KCFmk5GgSxgXHQ846LjlfsSEKaOzwKfejYdNcT2aNGDRIV6oevidmVUdLQXBkc5EpTahxz08dfa8/XzH8CVpmn+DWXk32tZ1lbTNF8F5pimGYuFcgrwiwM9WdebbyOw+vP9Pr7nmZc2uS866nAqZ1/XYh7ZDNcvhGDfvn2AWnCsd+/eTcY5k7IrMCiF7cWH6w7AdQcQZWKKIyP4jC0Yxlf4fF95QqTEKBj4D75QRdJ5gjjuACU6ziBPiAZ5vaBBSNmT1B1WjSa3CJHHkdGjODJ6VHxbhAifB8obhMKZW/g4EREGoMAt8IJ+1jsTDLMPxZ/w2N5mVHCe/2zuMO7ukE4GTQqMaZo9DsRzyzTNp1E9kRLTNDehPMMCAJZl3Q+8jHJRXovS+Qu9fbtM07yF+nA0s2MG/1wik+H6hRDcf//9XHjhhRQUFFBdXc3jjzfn2e1DivNwZWsbZBDHLcVxS4mmHH2sxWds9sTnSyVEnhgFgp9gGA2bkysLPOHxht/cQd73mAB1a2X5NJqDhyDBeODOGFGirPOv8dym1fIGVsFT1BrKnpzn5jHSHhVfT2dh8E3eE+9wd9GdzK6c08SZ2g8hZeqVTkzTvB/oAawB5gPvWJbVOqNG2yK3bNnSYMO+ffvo0iX9mbn9B4xrct+WzR/sd8FSceONN/L111/z0EMPtTqi8n333ce//vUvnn32WfLz87Ftm3PPPZfrrruO8ePHs3TpUn7yk5/w5ptvUlhY2Oj4DRs2UF5ezqmnnpqp6qSFoAojsefjDb/FhMgw9jVI77pd48Nvrjsw3vOJ9YKgdaE+OptNorPVF3Kzzg4O6/3rGvR0VgdWUmVUxdPkyTzerHi31b0YzwaTtWGEJnswlmVdDmCa5mHAycCl3uTIJcA8y7KaD4ylaZJMh+tftWoVFRUVjB8/HoDx48dTUFDAmjVrGDt2bMbLv79IinCckTjOyJR7hdiDz9ikej+++uE3v28tvsB/EKKuwRGu29MTm1ivZyCuO9j7PgDIa5N6aTTZxIeKpXaYfTin134PUPHXrul+JS/nv4QjHBzcDtmLScfIvwbVi/mzaZo+lL2kH8oYn1M4vXo26UWWKbIRrr9fv35s3bqVtWvXcuihh7JmzRq2b9/OkCFDMlbu7COQsge20wPbGUOCM5uHRIgd+Iwv8fmUCMWG3/z+FYSMfyNEw4Mct69yNvB6PciRBPw9Ejzg9FoimoOTHcZ25uf/G0coz7SoiPB84TNcVXV1h7LFNDlEdhBywENk2aa8vJzp06czdOhQ8vLU23VrwvWPGDGCgQMHxuuUGK5/3rx53HvvvQihervXXXddk0Ng7TVEll0cDFGRYP+p94BTw29bUnjA9Wsw5ObG/x6EK/tysHvA5eJwUUt0ljrf3PUXWIV/Iyrq16oKyCBm9fdb1YtptyEyTebJZrj+M844gzPOOGN/i5YD+HBlf1y7P1GOTbE/SknPWvbu/STB/rMJw/iSYOCtJjzg+icMucVcsQd5DgglaA84TXuxPPhBA3EB1YtZHlzWTiVKTYsCY5rmqZZl/ds0zUOBnwHPWJa1MPtF02gySQBEP6J218ajbwDUKfuPr97pIDYEFwq+gmE0dGSUMt8Tm+Y84LQAabLDSzvqJ3535F5bOj2Y64B/AzcCDwJ/AsZns1AaTduTh+MeiuMemnKvoNpzPPjSE6J6D7iAfxmGUdkgvet2UaLjOR8kesC5zmAkjb37NJpcIx2B6WKa5mDAsSzrXdM0q7NdKE12ySG7W5shKcRxRuA4I1Lub+gB92X8b5+xnmDgLYRoGBfPdXs0GHJzGwzBDUR7wGlygXQE5vfAb4HferHI3stukTJHLC5XzPCtUdckEom0nFDTKqTsju10x3bKmvCA2+l5wNW7Xxu+Tfj9qwgZ8xFJ4+mO2wfXqR9+S3THdt3+NOUB16PrKQT8q9QXF3p7y5boNX807UE6AnMIMMuyrJiL1qwsliej5OXlUVNTk3KyYWdESsnmzZtZt24dfr/2aZ4sBAAAHiVJREFU72g7BFKWYDsl2M7RKfa7SR5wX8X/DviXETL+gfDcUQGkNDwPuPrht5gHnO2MwO9b00Cw9Jo/mvYinafMeuD/mabZDfgn8NzBErolGAwSiURYt25dsxMaDcPAdd0m97ca20ZEIohIBGKfsWEpnw8ZDCKDQYh9tnKy5f4Q67msW7eOmpoaBg8enPVzatLFwJX9cO1+RJmQYn8Uw9iaMgJCMPA2vtDXDVKnGgGtqc3UKhsaTfqkPQ/GW2nyftTiX68Cd1mWtSiLZWstjebBxNizZw+LFy9ucmiooKCAmpqa7JXMdfFt347x9df4tm7F2LoV3656jXaKi3H79cPp1w+3b1+cXr2yJjpCCLp06cIZZ5zB3r17s3KOjkpH9rY5MMKezWcThu9L8kNP4vetQgiJlCAESBnAdsYQiU4kak8gao9H5uBa87l7j5smA+H6228ejGma3wDOQcUl+ydqgS+BWiNmarYKlkm6d+/Ot771rSb3t0ejFHv3Evz4YwLLlxP88EMC776Lb6eKIiDz8oiMGUP0qKOIHHUU0aOPxhkwQD0pMkQgoGex5w4hHHcYjjsMbIhETqFn9+OAMBBib9Uf8fvKCfiXUJD3IEL8GSkFtjOCqH2s+h891lteQaPJHOkMkY0BbrQsq8EMQdM0L8lOkToHsls3wlOnEp7qabSU+L76isCHHxL0RKfw8ccpekCtp+b06qXEJiY6Y8ciO1CUAk3HwZV9qA2fTX7ek9SGzyEc+R7h+N5aAv4PCfiXEAwsIS9kUZD3GAC2UxoXm6g9AcctRc/l0RwILQ6RmaY5BLUeSxFwPnChZVkPNXtQ+9DkEFlLdNhudSRC4NNPG4iOf/16AKQQ2IcdFhecyFFHYY8YAWka7ztsnbNIZ6qzISro2eN/2Ln77haWZYji960iEHiPoP99Av7348sqOG4fotEJRO1jidjHei7aHTt8Tme5x5lafbfdh8iAh4ErgT9bluWYpvl9oCMKTO4RDBI98kiiRx5JzQUXACD27CH40Udx0QnNn0/BM88A4ObnEz3iiAai4/bv32BoLX/ePLrceiu+LVvo3b8/+2bNorZTh5jJTVzZB2kswJUtPWwD2M5YbGcstVwOuPiMtQmC8x55oX+qPN1uRO1jiNoTidgTsO0jgGC2q6JJQbZX380U6QiMz7Ksz0zTjH3v2K8wOY7s3p3wCScQPuEEb4PE98UXyo7jiU7hI49Q5Dk0OH36xIfWqKqi6MEHMepU2Hv/5s10u/56AC0yGg8Dxz0cJ3w4deEfARLD2ETQv4SAfwmBwBJCwQUASJlH1D5a9XCixxK1x9HaNXo0uU06Q2S/AgagVqd8FaiwLKtjLTqgyL0hsv0lHCawenUD0fFv3NhkcnvAALa9/37bla+dyLn73ALZqq8Q21XvJrCEgP99z2PNRUq/56nmOQ7Y45GyR8sZZpDOco8ztThiuw+RWZZ1i2maZcACoBz4MluF0WSIUIhorNfiIXbtou+YMSlbkm/zZrr9/OdEJk4kPHEi7oABbVdWzUGHlL0IR79FOKo8MwX7CASWEfC/R8D/PgV5jyDE/QDY9ggi9gTPeWACruzfnkXXtDFNCoxpms8C51qWFbEsayWw0jTN4ajlk1PNBtN0YGRxMc6AAfhTLBcg8/LIf+klCp96CgB78OC42ESOOw5n0KCMukhrcgtJFyLRE4lET/S21BHwf6SG1Pzvkxd6noK8JwBwnMFKcLz5OI47FO2plrs014P5G/CyaZpnWtb/b+/ew6SqzwOOf8/Mmb1xkcuCsruAyyIqBBGysBANJl6oNRGM0ddLbGNMH562sU16eSxpk5qYtg8xT9KYxqahJql5ctE3mouNiZdKrDTuAhKtVDSEJRYBrT4SkcAyM+fM6R/n7OzsDXeXPTszO+/neeZh59z292MW3v2d3++8r74hIr8DfAb4/bFpmhltRzZs4JRbbiHR1ZN4MVdby+Hbb6dr3Trc55+nur2dqvZ2ah55hDpVALyGBjJRsEmvXInf3GwBx5xADVlvJVlvZfTew03u6pnDSW2mtvo+APzcjPDBz+i2muefDSSL1vJyMRbVd0fDCedgROQdwOcI517eAVyrqm8M5cIicilwB+FPy12qurHP/n8Eun/lqQNmquqUaJ8P7Iz27VPVtUP4ljYHMwSFq8j8E60iy+VwX3iB6o4Oqtrbqdq6Nf8gqH/aaaTb2sisWkVm1Sq8lpayCDiV9DlDKfc3iFaqbY0WD2wjmQyL6eVyk8h6y3seAPXOAYZeWrx0+xyfUn6Sf9AAIyKfAQLgLGAN8BWiPLGq+rcnuqiIJIHdwCXAfmA7cJ2q7hrk+D8BlqrqTdH736rqxGH2xQLMMAy7z0GA+6tfUdXeHgadjg6Sr74KRA+BtrWRXrWKzMqVeAsWjEl+teGqtM+5nPqbSBwIH/50O0iltuEmfwV0r1RbWjDKeTsBg//XUE59Hi2lHGBOdIvsP6I/HwPuHOZ1VwB7VHUvgIjcA6wDBgwwwHXArcP8HmYsOQ7eggV4CxZw7IMfDJdH792bDzbV7e3U/vjHAPhTp4a31KJ5HG/hwpIMOKZ05HKNpDNXks6Eo2nHeZ2Uu42qaKVaXc0/4dTeQRAk8fy3kfVW5POqBcG0IrfeDGbQAKOq/3kS120EXip4vx8GLJTenSmgGdhcsLlGRJ4CPGCjqv5wkHPXE+ZGQ1Wpr68fUWNd1x3xueVqVPo8Ywa0tcFHP4ofBPi//jWJLVtwtmyh5oknqP3pT4Hw2Z3gvPPIvfOdBKtXEyxZMuSMA6Op0j7n8u5vPXAm8HsABMERAjpw+Dmu+1+47jepq/nXcB9nAecTOOeTTF5AfX1lrVQr5c85rn/lAw25BpvsuZawBIBfsG2Oqh4UkXnAZhHZqaqdfU9U1U3Apu7rj3SYaMPqUTJ5MrznPeGLcPlzVXt7foTjPvggALmJE8msWJEf4WTPOQfGIPlmpX3O46+/S6PXzUCalPvf+ZVqKfdeEom7CLIQ+E3hCMdbSTbbFiYBHccr1UbhFlls4gow+4HZBe+bgMEmSK4FPlK4obu4maruFZHHCX+q+gUYU9r8xka6rrqKrquuAiDx8stUb92aDzqTN4eD1lxdHZnW1vxKtcySJVA99IldU4mqo7ID3U9M+LjJ55ky5X/I+o9RlXqCmurvA5DLTScTPYcTrlRbSHz/9ZlCcf0tbwfOEJFm4ABhELm+70HRczVTgfaCbVOBY6qaFpF64Dzg9pjaacZQbtYsuq64gq4rrgAg8dpr4egmmseZfHv4MQc1NWSWLcsvGsgsWwY1VqPenEg4N4PzLt787bWEK9X2RtkGwtVqNVU/ASAXTCSbbY0C1Eqy3hLAfr7iEEuAUVVPRG4mXN6cBL6uqs+JyG3AU6r6QHTodcA9qlp4++xs4KsikiPMe7ZxsNVnprzlZszg+OWXc/zyywFIHDpEVTTCqe7oYNIXvoATBARVVWHyzu5baq2tBHWW88qciBPWyEm3cDwd/m6bcA6SSoUJPKvcbVTXRb/QBFVkvXMLauO0EmClMEbDkCtalgFbpjwM5dBn5403qNq2LT/CSe3ciZPLEbgu2SVLekY4y5cTTHzrVe3l0OfRVGn9heH12XEOkXK3h8/ipLbhJp/FcXyCIIHnL8ovjc54bQRBaU6iQ2kvU7YAg/1DLBfOkSNUbd+eXzSQevZZHM8jSCbJLl7ck95mxQqCU07pd3459vlkVFp/4eT67HAU191BVTTKSblP4zhh5nHPb8mnt8l4beRyTZTKwgELMGPDAswwjIc+O0ePUrVjR37RQNXTT+NkswSJBNlFi3rS26xYQTB16rjo83BUWn9htPucwXWf7SlV4G4nkXgTAN9viAqxhQsHfP8MilXJxALM2LAAMwzjsc9OVxepHTvyt9SqfvELnHQ6rP551lkk3v1uDp97LpmVK8lNL62cTXEYj5/xW4m3zz5u8oVoWXSYcSCZCLNZ5HJT86ObbLYtXHAwRivVSjnA2Fo9M24EtbVkzj+fzPnnhxuOH6fqmWfyiwbcb3yDaVGiz+yCBb0yRudmnqissDEQrlRbhOcvoiv9IcKVai/mk3im3G1MqnoYgFxQh+e1komWRme9pUBtUVtfDDaCwX7TqxT1kydz+LHHekY427eTOHoUAG/evPyigfTKlWGp6TJXkZ9xkfuccF7JB5sqdyvJ5As4TkAQpMh6SwqSeC4nCCaPyvcs5RGMBRiK/0NZDNZnwPNI7dyZXzRQtW0biSNHwl1z5/aviVNm7DMuPsf5DSl3ez6vWrhSzSMIHDz/7PA5nGiUkwtGNoq2ADM2LMAMg/V5AL4flpruTm+zdSuJN8LqFF5jY++aOKefXvIlCuwzLkXHSLm/iAJOByl3R8FKteboOZy2aKXaHIbyf78FmLFhAWYYrM9D8FY1caKM0aVaE8c+43KQxU3ujLJGd69UC3+p8XOnRSUKwsUDvn8m3SvVpk5eQ8p9rv/VvEX85s1HhvzdbZLfmGJJJPAWLsRbuJCjN93UryZO9ZNPUvfDMNF3udTEMaUmhecvw/OXAX8E5Egmf5kvxJZKbaWm+kcA5HJT8sXYfH8ObnI3jpPNXynMSNBanG4MwgKMMUM1nJo406aRaWuzmjhmmBL4/tl0+WfTlb4RCEgk9kXZBsJRTnXVowD0v/mU4FjXx8a4vSdmAcaYkXIc/JYWjrW0cOwDHwgDzr59PYsGOjryNXFyU6aQjkoUZFatIrtoESSt9rx5Kw653FyOZ+ZyPCMAJJxXSblbqav9Im7yBRwnHL10pa8Z8UKBuFiAMWa0OA7+3Ll0zZ1L1zXXAJDcv79n0UBHB7WPhPfHc5MmkVm+PL9oILt48ZjUxDHlLxfMJJ29nKy3gulTVgFpSnH0AhZgjImV39RE19VX03X11UBUE6d70UDfmjjLl/euiVNVVcymmxKXC06lK30NtTXfKsnRC1iAMWZM5WbNout976Prfe8DIPHqq71r4nz2s+FxNTVk3/72nozRS5daTRzTz7Guj1Fbs7ckRy9gAcaYosrNnMnxtWs5vnYtENXEKVg0MOnznw9r4lRX96+JU1t5qUdMb7ngVILEY+SC0lyabQHGmBKSmzaN45ddxvHLLgMKauJEt9QmfulLTPriFwlSqbAmTvctteXLCSZMAKD2+99n0saNJA8eZGZDA0c2bKDryiuL2S1ToSzAGFPCgilTSK9ZQ3rNGgCcN9/sVRNn4le+gvPlL4c1cc45h9y0aVRv2YKTyQDgHjjAKbfcAmBBxow5CzDGlJFg8mTSF11E+qKLOEJUE+epp3pWqj32WL/HshNdXUy+9VayZ52F19wMdmvNjBELMMaUsWDCBNIXXED6ggsAmNXUNNATeCQPHWLmJZcQOA5+YyNeS0u/V27WrJJLd2PKW2wBRkQuBe4AksBdqrqxz/4bgc8BB6JNX1bVu6J9HwQ+EW3/O1W9O652GjOe+A0NuAcO9N8+cyaHP/Up3L17cTs7cTs7qbv33ny5AgiXSnvz5uHNn98r8Pjz5hHU1Y1lN8w4EUuAEZEkcCdwCbAf2C4iD6jqrj6H3quqN/c5dxpwK9AKBMCO6NzfxNFWY8aTIxs2cMott5CICqsB5GprefOTn+T4unW9Dw4CEq+8kg843a+qHTuo/dGPcApGQl5DA37hiCcKQv6sWZYCxwwqrhHMCmCPqu4FEJF7gHVA3wAzkN8BHlXVQ9G5jwKXAt+Nqa3GjBvdE/ndq8j8E60icxxys2aRmTWrpwpo/kJduC++GAadPXvCP/fupfb++/M1cyB8XsefN69f4PHmzSOYODHOrpoyEFeAaQReKni/H2gb4Lj3i8hqYDfwZ6r60iDnNg70TURkPbAeQFWpr68fUWNd1x3xueXK+jyOrV9Pbv16Eq5LzvOYAEwYyXVmz4Z3vrPXJi8I4P/+D+eXv8TZvRtn926Su3fjPvccPPggTi6XPzZoaCBYsKDfizlzYsvDVjGfcYFS7nNcAWagmcK+M4//DnxXVdMi8ofA3cCFQzwXAFXdBGzqPmakdSDKr4bEybM+j3+x9dd1YdGi8FUone4Z9XS/9uzBvfdekocP5w8LqqvDuZ7CkU/0CiafXBnhSvuMYVQKjsUmrgCzHyisMdsE9KoGpqqvF7z9V+CzBee+q8+5j496C40xo6u6Gu/MM/HOPLP39iAg8frrvYNOZyepXbuoeeghHN/PH+rPnJm/xdZrrmf2bMs+XYbiCjDbgTNEpJlwldi1wPWFB4jILFV9OXq7Fng++vph4B9EZGr0fg3w8ZjaaYyJm+OQq68nU19Ppq3PnfJMBnffvp55nuhV++CD+XLVAEFVFd7ppw+4vDqYMmWMO2SGKpYAo6qeiNxMGCySwNdV9TkRuQ14SlUfAP5URNYCHnAIuDE695CIfIYwSAHc1j3hb4wZZ6qqwlHK/Pn9diUOHcLt7CRZOPLZvZuaRx/F8bz8cf706fmRTmLxYqpPOy0c9cydG97OM0XjBAM8lFWmgoMHD771UQOw+7aVodL6PG77m82S3Lev3/Jqt7OT5Os9d96DVApv7tyBRz3TphWxA6NrFOZgYnu61sK7Maa8pFL4LS34LS2k++yqTyQ4vG1bfll1d+Cp2bwZJ9tTv96fOrXnuZ7CB0vnzrXCb6PIAowxZvyYNo1sayvZ1tbe2z2P5Esv9RvxVG/eTN299+YPC5JJ/Dlz+mUz8FpayE2fbql0hskCjDFm/HNd/OZm/OZm0hdf3GuXc/hwz2in4KHS6ieewEn3jJFyU6b0X1o9f3446qmuHuselQULMMaYihaccgrZpUvJLl3ae4fvkzxwoN8Kt+otW6j73vd6zk8kwlFPn6XVXksLuRkzKnrUYwHGGGMGEt0u8+fMIX3hhb12OUeO9Jrj6R79VD/5JM7x4/njcpMmDbjIwGturogS2BZgjDFmmIJJk8guWUJ2yZLeO3I5kgcP9r7d1tlJ9ZNPUnf//T3nOw7+7Nm9HyqNRj+5U08dN6MeCzDGGDNaEgn8pib8pqZ8jZ5uztGjvUY93c/31HV09M5+PWHCgKMef948goJiceVQGtsCjDHGjIFgwgSyixeTXby4945cjsTLL/deWr1nD1XbtlH3gx/0OtSLisUBVLe355del2ppbAswxhhTTIkEucZGMo2NZFav7rXL6eoi2Xeup7OT1M6dver1QFgae9LGjRZgjDHGvLWgthZv0SK8PpmrZzU1DXh8coTZTOJipeiMMabM+IOk2R9se7FYgDHGmDJzZMMGcgUT/hCWxj6yYUORWjQwu0VmjDFlZlilsYvIAowxxpShriuvpOvKK0s6a7bdIjPGGBMLCzDGGGNiYQHGGGNMLCzAGGOMiYUFGGOMMbGwAGOMMSYWsS1TFpFLgTuAJHCXqm7ss//PgT8APOA14CZV/d9onw/sjA7dp6pr42qnMcaYeMQSYEQkCdwJXALsB7aLyAOquqvgsKeBVlU9JiJ/BNwOXBPt61LVc+NomzHGmLER1whmBbBHVfcCiMg9wDogH2BU9WcFx3cAN8TUFmOMMUUQV4BpBF4qeL8faDvB8R8GflrwvkZEniK8fbZRVX840Ekish5YD6Cq1NfXj6ixruuO+NxyZX0e/yqtv2B9LjVxBZiB6n0GA2xDRG4AWoHC8m9zVPWgiMwDNovITlXt7Huuqm4CNnVff6TpEko51UJcrM/jX6X1F6zPw9UQc/bluFaR7QdmF7xvAvoVKhCRi4G/Adaqarp7u6oejP7cCzwOLI2pncYYY2IS1whmO3CGiDQDB4BrgesLDxCRpcBXgUtV9dWC7VOBY6qaFpF64DzCBQDGGGPKSCwjGFX1gJuBh4Hnw036nIjcJiLdS44/B0wEviciz4jIA9H2s4GnROS/gZ8RzsHswhhjTFlxgmDAqZFyFBwcYblQu29bGSqtz5XWX7A+D1c0BzPQnPmosCf5jTHGxMICjDHGmFhYgDHGGBMLCzDGGGNiYQHGGGNMLCzAGGOMiYUFGGOMMbGwAGOMMSYWFmCMMcbEwgKMMcaYWFiAMcYYEwsLMMYYY2JhAcYYY0wsLMAYY4yJhQUYY4wxsbAAY4wxJhYWYIwxxsTCAowxxphYWIAxxhgTCwswxhhjYuHGdWERuRS4A0gCd6nqxj77q4FvAm8HXgeuUdUXo30fBz4M+MCfqurDcbXTGGNMPGIZwYhIErgT+F1gIXCdiCzsc9iHgd+o6nzgH4HPRucuBK4FFgGXAv8cXc8YY0wZiesW2Qpgj6ruVdUMcA+wrs8x64C7o6/vAy4SESfafo+qplX118Ce6HrGGGPKSFy3yBqBlwre7wfaBjtGVT0ROQxMj7Z39Dm3caBvIiLrgfXRNWhoaBhxg0/m3HJlfR7/Kq2/YH0uJXGNYJwBtgVDPGYo5wKgqptUtVVVW6PzRvQSkR0nc345vqzP4/9Vaf21Po/4FZu4Asx+YHbB+ybg4GDHiIgLnAIcGuK5xhhjSlxct8i2A2eISDNwgHDS/vo+xzwAfBBoB64CNqtqICIPAN8RkS8ADcAZwLaY2mmMMSYmsYxgVNUDbgYeBp4PN+lzInKbiKyNDvsaMF1E9gB/DmyIzn0OUGAX8BDwEVX142hngU0xX78UWZ/Hv0rrL1ifS4oTBANObxhjjDEnxZ7kN8YYEwsLMMYYY2IRW6qYciAiXwfeC7yqqm8rdnviJiKzCdPznAbkgE2qekdxWxUvEakBngCqCX/e71PVW4vbqrERZcB4Cjigqu8tdnviJiIvAkcIU0x50eML45qITAHuAt5G+DjHTaraXtxW9aj0Ecy/EaajqRQe8BeqejawEvjIACl8xps0cKGqLgHOBS4VkZVFbtNY+SjhIptK8m5VPbcSgkvkDuAhVT0LWEKJfd4VPYJR1SdE5PRit2OsqOrLwMvR10dE5HnCLAm7itqwGKlqAPw2epuKXuN+ZYuINAHvAf6ecJWmGWdEZDKwGrgRIErLlSlmm/qq6ABTyaLAuhTYWuSmxC66VbQDmA/cqarjvs/AF4FbgEnFbsgYCoBHRCQAvqqqJbt8d5TMA14DviEiSwh/xj+qqkeL26welX6LrCKJyETgfuBjqvpmsdsTN1X1VfVcwqwQK0RkXM+3iUj3vOKOYrdljJ2nqssIs7h/RERWF7tBMXOBZcBXVHUpcJToecJSYQGmwohIijC4fFtVv1/s9owlVX0DeJzxP+92HrA2mvS+B7hQRL5V3CbFT1UPRn++CvyA8Z+FfT+wv2BEfh9hwCkZFmAqSFQO4WvA86r6hWK3ZyyIyIxopQ0iUgtcDLxQ3FbFS1U/rqpNqno6YZqmzap6Q5GbFSsRmSAik7q/BtYA/1PcVsVLVV8BXhKRM6NNF1Fi86kVPQcjIt8F3gXUi8h+4FZV/VpxWxWr84DfA3aKyDPRtr9W1Z8UsU1xmwXcHc3DJAjTFv24yG0yo+9U4AciAuH/a99R1YeK26Qx8SfAt0WkCtgLfKjI7enFUsUYY4yJhd0iM8YYEwsLMMYYY2JhAcYYY0wsLMAYY4yJhQUYY4wxsajoZcrG9CUiFwC3Ev7y5QOfVNUnReQw8AvCXGY3EZbzvlhVPxGd9yngcVV9vOBadYQpWxZE521S1btPom1TCBN3VtQDsqZ82QjGmIiI1AOfBq5Q1XcBVwBd0e6dqvpu4C8Ic3wNxa3Af0bXOh/49Uk2cQpw5Ulew5gxYyMYY3pcBnyrOz+bqh4Bnu5zzDOEOc2G4h2q+lfRtQLCujSIyJcISwe8CXyAMOnoxar6CRG5MTr3ceDrwCGgGVgHrAcuEZHHgatV9bXhd9GYsWMBxpgeDcBOABG5HvhjoENV/7LgmNXAL0f6DURkOTBBVVeLyA3AHzJ4RuuphKltrgPeD2wC5oz3tC9m/LBbZMb0eJkwyKCq3wFuAOqjfYtF5GeEQWcjcJywSma3Gnpup51IC+FcDoTVJufTuz6NU/D1LlXNAQcIb48ZU1ZsBGNMj58A94mIquphev/76J6DAUBEMsBSEen+JW0ZcHuf6z0pIh9Q1W9HiUbPI8wXtSba3wp0AocJc6YBLAaejb7uG3iyQPJkOmjMWLIRjDGRaE7j08CPRGQz8M/ANwc59nXCsgdPAFuA+1T1UJ/DPg1cEM2Z/BxoUdVtQJeIbAGuB/6FMKA0iMhPgBknaOIrwDQRuU9Epo2wm8aMGUt2aYwxJhY2gjHGGBMLCzDGGGNiYQHGGGNMLCzAGGOMiYUFGGOMMbGwAGOMMSYWFmCMMcbE4v8BULGFJKHEq5YAAAAASUVORK5CYII=\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZQAAAEkCAYAAAAID8fVAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAgAElEQVR4nOydd5wU5f3H38/s7u0V4A446h2IKBaaFAFRgQMVFYliG7EkxlhiEow1Bks0QWNMYvyZGEvURGN3VIpRVFSaiiKgAkpRFAUOhKNc39s2z++PZ/Zub2+vsnv1ed9rX7cz88zzfJ+dmeczT/0KKSUajUaj0RwsRksboNFoNJr2gRYUjUaj0SQELSgajUajSQhaUDQajUaTELSgaDQajSYhaEHRaDQaTULQgpIghBADhBBSCHFiS9vSEIQQvxdCbGlpO2oj1j4hxE+FEKFmSrvZ0upICCHynGckNwFxfSeEuD0RdmnqRwjxlBDi3frCaUFJHNuBPsBKACFErvPw5LWoVe2Hl4Cc1pqWEOJE53oPSIpFSUAIcbsQ4rtmTHIF6hnZ2YxpapoRd0sb0F6QUoaBH1rajvaKlNIH+NpbWvEQQqRIKQMtlX6ycPKkn5F2TKutoThvfB8KIUqcz1ohxKnOsbjNS0KILUKI30dtSyHENUKIl4QQZUKIbUKI84QQmUKI55x4vxVCnBt1TiTui4QQbwshyoUQm4QQk4QQOUKIhU5cG4QQE+KcF7Fpu/N/ibP/u1ryeYUQYkeceJ6N2neZEGK3EEI4272cKmiBk4cPhRATo8ILIcTjQohvhBA+J4/3CCG8dfze3Zx4lgkhsuq6Nk74s4QQnzm/T6EQ4hMhxMio44cJIV4WQux3wqwTQkx3jnUVQjzrXA+fEGKzEOLGSP5qSa9aM1RkWwhxghDiUyeNVUKI0THnnSyEWC+EqHBsmOT8vpckKi2nVvK+E3yrE//SqPNnCiE+d2z4TghxvxAiI+r4UiHEv4UQdwkhdgH5DfyNDxdCvOocOyCEWCSEGBaTl9FCiLeEEMVCiFInjnFCiJ8CdwGHOPZKEfXsxMQRaao6RQix3LFng3Cex6hwRwoh3nDSKRVC/E8IcXiceHIbGe8xQogVzu/3lRDCjGNjJyHE34UQ+U48nwkhzok6bgohAkKIsVH7fuLEOTI2vjjxR+6Byc795BPqWekrhJjopFcmhHhXCJETdd6hQoi5Qoidjl3rhRA/jol7qRDiCSHE74QQPwj1zDwVc4+MEkK8KYTY4/y2q4QQp8XE012oZ65MqPLiLiHEf0VMU5VQZeImJ+9fCyFuE0K4o453FVVl5m4hxN1Arc9mNaSUre4DuID9wP3AIOdzNjDBOT4AkMCJMedtAX4ftS1Rb0SXAocDDwPlwJvAT519DwJlQPeYuL8BZgBHAPNQ1fR3HTuOAF5FiYYnnk3ASGf7HKA30KOWvB7qhDvS2b4c2APsjArzDPCi8z0N2OCkf6yTh9sAP3C0E8YA7gbGOXadCewC/hAV5++BLc73/k6crwDeBlyf3kAAuNmx/2jgImBY1PHdzu91InAYcBYwLer4b4FRzvmXAKXAZfHsc7Z/CoRitm1gOTABOApY5Fw3txMmx7neTwCDgZOANc7vfUkd+WtUWqj79Uwn3jFO/rpFnXsA+DEwEJgIrAOeiYp/KVACPOrYOawBv3Ev1L39iBP+SNS9vA/nXgOGoO7tF1D3yiDgQmA86j66F3UP93Y+nWr5PfKcvK0FTnPieRooBLKi7svvgfeA0c5nCeqZTImJJ7eR8eYDC4FjHNtXOdf1dieMcNJairrfBgJXOb/fSVH5eNy5Zl1Qz3AJcE0Dy6TIPbAU9VyNAr5GvUgsBY5DPfObgJeizhsG/AoYjnoOrgFCwOSY618I/B/q3jrN2f5DzDW4FHV/HIF6vgPAEVFhXgO+AiY71/5JoAh4N+a5+h5Vjh0KTAO2AXdFhZnnXLcpTjzPAsXR8dT6O7W0eNRy8bo6N1peLccH0HBBeSBqu4ez78E4aU2Pifu6qDBjnH03Ru2LCMbQeDYBuXXlIcburcAvne/PAX9wLuBgZ98O4KqoG3sHTqEZFcfi6LzGSeN64OuYG2uLc6PnAw8BRgOvTyTvA2o5fheqsMtoxDX/O/BOrH0xD3RsIS+BUVH7jqO6OP8R+A5wRYU5jaYJSn1pnRjvN3HSvzpm30QnbFdneymqIDCiwtT3G/8e+Dhmn0AVmNc528+gCuu41xW4HfiuAdcmz7HlnKh9vZ19pzrbl6MK+eyoML1QTYc/iYknVlDqivcK1MtG16gwQ50wt0fFUwFkxtj9H2B+1HYa8CVgAZ9FH2vAbxC5B0ZE7fuNs290zHO2t564FgCPR20vBdbFhHkU+KieeNYCtznfBzm2RAuoB/XC8K6zne5co9Ni4vkJUOh8P9yJ55So4ymoMqJeQWmVfShSygNCiCeAt4UQi4FlwDwp5eYmRLc2Kt4CIUQY9YYYnVYA6FnbeVS1+66Lsy/2vKawBPU28DDq7eIh1BvlFMfeHJRgQNUbcKGo3kLkJardXwhxJephHABkoN6kY5s4e6Deup+QUt7UCHvXAW8DXwgh3kE9EHOllJFmvtHACillWbyThRAG6s17Jkp4U1E3//eNsAGq3m4j5Dv/ewGbUW9zq6Tq34rwUSPTaGhaNRBC9AAOAe4XQtwXfcj5fzjqbRtgjZTSjgpT3288BhgthCiNSTYNVbiAug5vxcR7MHwe+SKl/MG5N3s5u4YAG6SUe6PC7BZCbHaONTXewcBGKeWBqDBfCCGKos4fg1PoxTwTKahaROQ8nxDiAie93agaa2OQwPqo7drKhe5CCJeUMiyESAfuAH6EGpCQgnpWl8TE/XnMdj4wNbLh3Et/QJUTvVHPcyrq/gL1OwF8XGmslEEhxGqgs7NrCOr+eFUIIaPScgGpThqReFZExRMQQqwCOlEPrVJQAKSUVwoh/o76UU8B7hJCzJJS/gtV9YSa7XqeOFEFG7BPUrOwDcYcr21fIvqhFgN/F0IMQV38T5x9JwFhYLuUMjKE1gA2oqqssZQDCCHOR4nSbJQYFwPno97YoylEPQxnCSEekFLuoAE4D8rpqAf5ZOBc4F4hxPlSytcjweqI4kbgFuAG4FNU08P1wBkNST8KO0Ys4l2TWDvqsutg04olcuxaahYgoGqaEaqJbwN+YwPVvDQrTrzRhW1T8xuPeAMF6vqtQT2j9dlQV7wNOd9A5XlMA+KO9HFmoV4G99cTdzRx7wEpZbxyIVI2/RXV3HsjqjmsDPgbkFmPnbFl0lOopumbUS0aPuBFlEDFnlcbkfjOR9WIY9lPQ/tK6kmgVSKl/EJKeb+U8nTg36h2UYAC53/fSFghRE+ab1hpQ4jcIK4GhH0P6IYqVJdLKUMoQZmEKkwWR4VdjWojLpZSbon5RIZjTgQ+c367NVLKr1E1lViCqD6e9cAyIcQhccLERSo+kVLeI6WciBKuy5zDa4ATojsVY5iIenP+t5TyM0csB9US9mDYAIwRQkRfg/FJSAfiXG8p5W5Uk8ORca7VFillRV0R1vMbr0a9cebHiTfyfKwBTnZqhLXZ3JD7syF8CQwRQmRHdggheqHa+788yHgHi6iBIs6LV3SBvBolEKlxfottMefdD/wc1Y/6oqhjoEqCmAg8J6V8SUq5FvgW9Zs0JZ6HpZSvSSnXo/pEB0Yd3+D8r7y/nY726EEqX6KaBgfWcj+GqbpWx0fFk0J8sa5BqxQUoUav/FmokV6HCCHGozpDN0DlsM4PgZudESCjUZ15/pazugZ7UW2/U4UQvYUQXWsLKKXchWo2uZQq8fgcVRM7k+qC8hzqDeUNIcRUoUaFjRNC3CKEmOGE2QwME2qU0GFCiGtRwhEv7SBgoh7KZUKIgfHCRSOEON4ZkTJOCNFfCHESqi8mclM/jLq3Fgg1MupQIcR05407Yl+eUCNmjnBGkYyrL90m8DCq6eQRIcTRQojJVNXSEvnmDqq5zgamCSF6CiEiBd5twK+FmvMxVKiRUDOEEP+qK7IG/Mb/RInBfCHEBOc+OFEI8UchRKQw+AtKqJ8TQhzr3AvnO88TqPuotxBivBAi22meaSrPo170XhJqRNJo1Bt0Pmpez8HEWwI86zzrx6H6RqKHdS9GDQCZK4Q4WwgxUKjRbdc4Tb8IIVIde16TUv4buBLVfxrdFJkMNqNaAMYKIQYDjxH1ItzIeC4WQgwTQoxADbSIfnn5Gvgf8JBQIxkHA/9CDUCI1KRKgXuAe4QQs5x7cYhQoxD/7ITZgurcf8h5PgejBrV0pgG0SkFBVQsHoW6Ar1AjmlZQvXr/M1SBvcIJ9xhKtVsFTrv1r1CF9XZUJ2BdvIdqglzsnC9Rb6SV+5z9Faiay2rUKI6vgLnAWKr6IP6F6pB90kl3HKoTtzZbQ6gRRB+gRKW+2kIR6k1oAaqN+j8oobvLiW8XqmmhBDU650tUQR6pTt/l5G0Bqk+jK/CPetJsNFLKfJQgH48S6L+jOqJBvaklMq3dqGa82aj7cIGz/xnUPXAGqilzFepa5MeNqIr6fuPdzvG9qOu/2Tl+iJM+zptsHqqvbBnqN7gJ1YwKMB94GXgDJQY3NzH7kZe8qaiXuuVOemWoDuAmz6mRUpajRiJ1R/1+z6FGQ+2JCiNR13kuqgayCZWnM1CDFHDOyUDVTnD6ZC4GrhZCnNlU+xrA9ajncgnqGc9HjaZsLJehyutPUNftLar636LDfIGqfS110nqHqHtdSnmXY9MVqD7BD5zt76Li+RnqXnkddR3zUSO/6kU4vfgaTYdAqPk6y4DhToGr0bRLnKbeTaha2Y3NkWar7ZTXaBKBEOIXqDexnagRLP8HrNRiomlvOC9LPVGtEp1RNY8BqA79ZkELiqYGQohbgVtrOy6lrHf4YCviEFRTVGQi4DuoSZUaTSVxhl9Hc4+U8p5mM6bpuFBNuoejBtx8gZpA2WwvT7rJS1MDIUQ31KizuEQNYdZo2gUiaomYOOyXUjZmeHGHRQuKRqPRaBJCax3lpdFoNJo2hhYUjUaj0SQELSgajUajSQhaUDQajUaTENrVsGHTNP8DTAf2WJY1tJ6w/YH/otYAcgGzLctamHwrNRqNpn3S3mooT6H8XTSE2wHLsqyRqGXUH06WURqNRtMRaFc1FMuylpumOSB6n2mah6GWcu+BWt79SsuyNqEWTOviBMtEzaTWaDQaTRNpbzWUeDwGXGNZ1mjUwniRmsjvgUtM09yBWsDwmpYxT6PRaNoH7VpQTNPshFpp9mXTND9HrcLbxzl8IfCUZVm5qNVMnzFNs13/HhqNRpNM2lWTVxwMoNCyrBFxjl2O099iWdZHpmmmAtlELYut0Wg0mobTrt/ILcsqBraapnk+gGmawjTNY5zD23B8SpumeTTKP3NB3Ig0Go1GUy/tai0v0zRfQDkUygZ2A3einFM9gmrq8gAvWpY1xzTNwcDjQCdUB/3NlmUtagm7NRqNpj3QrgRFo9FoNC1Hu27y0mg0Gk3z0Z465XVVS6PRaBqPSFRE7UlQ2LmzaXMTs7Oz2bt3b4Ktad3oPLd/Olp+Qee5sfTt2zehtugmL41Go9EkBC0oGo1G08YxdhfQ5ZY/kT31wha1o101eWk0Gk1HwthdgOsP/0ev/74M0kYEgi1qjxYUjUajaU2EQhgHijD2F2LsOxD1P/K9ENeu3bi/3opxoAhIYK/6QdKuBUVKSVlZGfXNtfH5fIRCoWayquWQUpKamkpKSkpLm6LRdBiEz4exr7BKGOKKxAFcThhRVIyopcyyu3TC7tYVY+9+RGlZqxGSCO1aUMrKyvB6vXg8npY2pVUgpaSkpISSkhKys7Nb2hyNpu1h24iiEiUAB2IFwtmO3e+riBuVdLmwu2Vhd8/C7taV4OBB2N27YnfvSrib2hc5ZnfPwu6aBSmqLDP27KXTA0+Q8dJrYEtEINCcv0KttGtBkVJqMYlCCEGXLl3YsmULW7ZsISsrq6VN0mhalkCwes3hQCGuGrWIqNrFgSJEOBw3Kjs9TQlCtyzs7G6EjhjoCEbXyv3hbl0r98nMziCaVsewe2ZTfM9sSq+7gh6PPovxX0uJne5D0TQ3LpeL1atXc/LJJ7e0KRpN4pASiktwfb+jeg1hvyMSMcJh7DuAUVIWPyohsLO6VApBaGB/7GOPiRKIrMrvEZEgLbWZM6yEJfyPuym4+hI6PfAEKavXNrsN0WhB6aB0hD4jTRsnHFad07FisL9Q9T3sK8QV0xchAkF6xYlKpnicpiNV+AcOyVHfu1avQVR+z+oCLlezZ7mpRGosLY0WlBjS5s6l87334tq5k3DfvpTMno3vnHMSEvf+/fu59tpr+e677/B6vQwYMIA///nPdO/evTLMPffcw7/+9S+uvfZabrjhhsr9tm3z85//nE2bNuH1esnOzubee+9lwIABDYpXo2lxfBW4HCGICEJ1gajeQV1/57TqXwjn9CY47CjC3buS1i+HEq+nhkjIjPQmNy9pGo4WlCjS5s4l8+abMXw+ANz5+WTefDNAQkRFCMEvfvELjj/+eADuuusu7rnnHv72t78BcN9997F27VpWrFjBNddcg9fr5Ve/+lXl+eeffz4nn3wyhmHw5JNPcvPNN2NZVr3xajQJJ9I5vb/QEYnoWkRhlUhE1yzq65x2PqGjBxGINCdFRCGqdmF3q+qcjsWbnY2vgy290proMILS5Y478GzYUGeYlDVraoyWMHw+sm68kfTnn6/1vODgwRTPmVOvDV27dq0s9AFGjRrF008/DcBDDz3EN998wzPPPENKSgrPP/8811xzDY8//jhXXnklhmEwderUynNHjx7NE088UW+8mo6NsbtAta2vWcfeRS/UHjDSOR3ThFRrB/X+wto7p9NSq9UQQocfWjWaKarWEOl7kJmdwdCLdrQHOoygNIjaht4lYUiebds8/fTTlSIRXRMBSE1N5fHHH6/1/CeffJJTTjml3ng1HRPjhwJcN91Nr1dfV7WJUJiMR56u2UEdqUUUl9Yal52VSdgRg9Ch/bCPHa5qDNU6qKs6qmVaWjPmVNOa6DCC0pAaRM+xY3Hn59fYH87JYd8rryTUnttvv52MjAwuu+yyRp/7yCOP8PXXX/Pyyy8nNF5N28f11bdk3Xw3KavX1eh/yLz770iP26klqBpEoF/fKkHoFqeDOqsLuDtMMaE5SJJyp5im+R9gOrDHsqyhcY7/Brg4yoajgR6WZe03TfM7oAQIAyHLso5Nho3xKJk9u1ofCoCdlkbJ7MSOnpgzZw5bt27lqaeewmhkVf/JJ59k3rx5vPTSS6TFvAkeTLyaNoyUpHy0hvQX5pM2761aO7J3bVqG7JShO6c1SSNZrx5PAf8E4jbkW5b1V+CvAKZp/gi43rKs/VFBJluW1ew9a5GO92SN8gK49957WbduHc888wxer7dR5z777LM8++yzWJZF165dExavpm1i7NlL+suvk/78fNzfbcfu0gnf+dOh3Efau+/XmEEtO3dqQWs1HYGkCIplWctN0xzQwOAXAnX0FjYvvnPOSaiARLN582YefPBBBg4cyJlnnglA//79+fe//13vuaWlpcyePZvc3FxmzpwJgNfr5fXXXz+oeDVtjHAY79KPSH9hPqnvLEeEwvjHjaTk+iupOGNKZf9F8Z69rWoGtSY57C4wuOByNw/ea9Czh93S5rRsH4ppmunAacCsqN0SWGSapgT+ZVnWYy1iXBI48sgjyY/TR9MQOnXqxI4dOxIer6Zt4Nqxi/QXF5D+4mu4du0m3L0rZVdeTNnMswgfPqBG+NY2g1qTWGwbiksEd/21Cx+uFDzwaCfu+V1xS5vV4p3yPwI+jGnuOsGyrJ2mafYE3jFNc5NlWcvjnWya5lXAVQCWZdVY8NAX1ReiqcIwDAzD6HALRLrd7raV50AA8ca7uP79IuJd9QjIkycS/L/fI884mZSUFOpaN9rtdtNt8FHw2H0AtKGcN5k2d42BigrYdwD27xfq/wHYd0Cwfz/sLxRqe7+o2n8ADhRCOFzVF2bNz2DObSn07tmCGaHlBWUmMc1dlmXtdP7vMU1zHjAWiCsoTu0lUoORsX6V9fIi8bFtG9u2te/tVopry3dkvDCftJdfx7XvAOE+vSi77grKZ55FOLePClRc/9toW8lvImnJPIfDUFQiOFBoVPsUFjnfi6rvV9uCioraB9CkpdlkZUq6ZobpmmVzxGE2XbNsumbarPgkhc/WpxAOC8JhuOOPgUbXUhLtU77FBMU0zUxgEnBJ1L4MwLAsq8T5PhWof7yvRtPW8VWQ9sZ7pD8/D+/Kz5BuFxWnTKT8whn488a3qXWl2jpSgs/nCEORqCkEUeJQGHWsqFggZfwRdIYhycqMiIEkp0+YoUcH6ZplR+13/jufrEy71vUmdxcY/Ou/nSprKYGg4KV5GVx3dWmL9qUka9jwC0AekG2a5g7gTsADYFnWo06ws4FFlmVFL/fZC5hnmmbEtucty3orGTZqNK0B95dfkfH8PNLmLsQoLiU0oB/Ft15D+fnTsXu2raab1kgoBEXF0WIQIxBFcWoShQb+QO1DqzPSqxf8uX3DNcQgWiCyMm26dJYJXQzggUc6IWN0w7Zp8b4UUZ83wzaE3LlzZ7UdJSUldO7cuYXMab1s3bqVzZs3c9ppp7W0Kc1Ka2kCEiWlpM1/m/QX5pOydgPSm4Jv2hTKLzqbwPjRCZsn0lrymwikhLLyms1JBwqFEgJHDErLU9mzJ1S5XVxSeynudku6ZkbVELKqahCxNYXo795W4PB06rnZfLmppiFDjgqw6NWGX3OnySthE5Naug9Fo+kYSIlnzXoynp9H6muLMHwVBI86jKI5N1F+zjRk18yWtrDZCAaptdmoRg0i6ngwVHu517mTKvB7ZENWps2hh4SiREDGrUF0ypBtdo5ntGi0phcHLSgaTRIR+wtJf3Uh6S/Mx7P5G+z0NHxnn0b5hTMIjhzapmetK39Worog1OhvqF6DOFBoUFpWe60hxVO9dnD4oaEatYSuWdUFIrOLTcQxqypc99cavya5aEGJYnr2VDamfFlj/9GBIby+d9FBx58sfygAP/vZz9i2bRuGYZCRkcFdd93F0KE1Vr3RNAe2TcqHq9VSKG8uRgSCBEYOpfCvt+M7c6pa/iTJNHbCmz9AnOYkJQRVgiFq1Cyih67GktXFJssp9LO72Rw+MBS3ptAtqhaRnt52aw0aLSjVGBUYzRbP1wRF1XIVHpnCqEBilhNLlj8UgAceeIAuXboA8Pbbb3PjjTfy9ttvJ8RuTcMwfigg3fof6S/Ox/19PnZWF8p+fC7lM2cQGjyoWW25/6FOfLhScOMdWZx1ui/uCKXoGkS5r/ZaQ2pq9WajIw8PEa/juUogJJldbD0wrQPSYQTlri53sNFTtz+UAAFCVF+iIkSIDZ4vuKj7ebWed3RwML8rbjl/KEClmAAUFxfrxSGbi1AI7+IVaimU9z5AhMP4x4+m5De/wHfa5Gb1M27b8MmnKTz/ajqvvpYGCBYvT2XxcmWDYaiCvmuWalbq0yvM4CNrCkJsLaIFXKVr2igdRlAaQgop9Aj3pMC1BykkQgp6hHtQ93zkppEMfyg33XQTy5YtQ0rJc889l3CbNVW4tuWT/sIC0q3XcP1QQLhHd0qvvoTymTMID+zfbHZICes3eJi/MI3X3kpj1w8uXC7VbCSlGsn0o1N93HVrEZldEjt0VaOJpcMISkNqEAB7jN3k9RqPHz9evLy29y162IlfzyAZ/lDuu08tsfHKK69w991388wzzyTEVo2DP0Dq20vJeH4+3vdXIg0Df954iu6+mYqTJ1DZM9wMbPnWzfyFacxfmMbW79143JJJJ/iZdXkJc+7LxO9XHRGhkODNd9O44zfFGEa7mSKgaaV0GEFpKD3tXpxXdgHPZzzDuWUXJEVMkuUPJcJ5553Hb3/7W/bv30+3bt0SYXKHxv31VtKfn0faK2/g2l9IKKc3xTf9nHLzTOyc3s1mR/5OFwveVCLy5SYPQkjGjwnwi8tKmXaKj65ZklvmdGmVE940HQMtKHGYVXodX3u+4prS6xIedzL8oZSVlVFYWEhOTg4AixYtIisrq4bPFE3DEeU+Uv/3DukvzMe7aq1aCuXUPMovmoF/wrhmWwpl7z6D199OZf7CNFZ9pu6XkcMD/GF2EdNP9dG7Z3X1WLM2hUCw+jCpQFCw+vNWMBtP0+7RM+Wbkc2bNzNlyhQGDhxIaqrq6WyMP5SjjjqK3NzcyjxF/KEUFBRw2WWX4fP5MAyDrKws7rjjDoYNGxY3Lj1TvnY86zeS/tw80ua/hVFSRmjgIZRdPAPfedOxs5untldcInjz3VQWLEzjg5VewmHBUYOCnDXNx1mn+zikX7hB8bSmCW/Nhc5z49Az5dswyfKH0qNHD15//fWDMa1DI4pLSJv7ploK5YvNyFQvvjNOpvziswmMHdEskw99FfDuslTmv5HG4uWpBIKC/rkhfnl5KTOm+ThqkF45W9P60YKi6ZhIScqqz0l/bh6pr7+LUeEnOPgICv/4W3xnn47MTH7NNhiEZSu8LFiYxtuLUykrN+iZHebHF5Qx4wwfI4cF9SQ/TZtCC4qmQ2HsO0Day6+rpVC2fIfdKQPfedMpv2gGweFHJ702Ytvw8eoU5i9M441FaRQWGWR1sTnrdB9nTfMxfkxATwjUtFm0oGjaP7aN9/2VuF59k16vvY0IhgiMHs6B+++kYvrJyIz0pCYvJaz9Qs0V+d9bafywx0Vams2pkys4a5qPvBP8pOg+c007QAuKpt1i7NxN+kuvkf7iAtw7diG7d6XspxdQfuFZhI48LOnpf7VFzRVZ8GYa321Tc0UmT6jgjt/4OCXPT3p6uxkQo9EAWlA07Y1gkNTFH5L+3Dy8S1YgbBv/iWMpvvXXdLrkXIpLSpKa/PZ8FwucCYcbv/JgGJLjxwaYdUUpp5/sIytTi4im/aIFRdMucH23vWoplD37CPfKpvRXP6X8wrMIH5ILQCevF5IgKAV7Df73thKRNc58j1HHBJhzSxE/OtXXoi5ZNZrmRAtKHIAUD88AACAASURBVHYXGPzypq48ct+BhBYGyVy+PsL999/P3/72N9577z2OOuqohNneKqnwk/bWElUbWbFaLYVy0okUXjQD/5QTwJ2827uoWM0Vmb8wnQ9XpmDbgqOPCDL7umLOOt1H/9yGzRXRaNoTWlDi8MAjnVi5JiXhy1Ukc/l6gPXr1/Ppp59Wzphvr7g3bSH9+fmkv7oQo7CIUP8cim/+JeXmj7D7JH6pnAg+n2DRUjXMd8n7aq7IIf1CzLqilBln+DjycD1XRNOx6TCCcsefurBhc/2L9wUC8Om6FKQUPPNSBl9s8pBSz2mDjwwy55b6hSeZy9f7/X5uvfVWHnroIc4///x6bWlriLJy0l5bRPrz80n5dD3S46bitMmUXXQ2gRPHkKxldAOB6nNFyn0GvXqE+cnMMmZM8zFCzxXRaCpJiqCYpvkfYDqwx7KsGm4DTdPMAxYAW51dcy3LmuMcOw34O+ACnrAs695k2FgbO3ZWTQKQwI58FwMHJL75ItHL1993332ce+659O/ffEunJx0p8azdoBZmnP82Rlk5wUGHUnTnDfjOm4bdLTlrlYXDaq7IgoVpvPFO1VyRs89Qc0WOO1bPFdFo4pGsGspTwD+Bp+sI875lWdOjd5im6QIeAk4BdgCrTNN8zbKsuj1jNYCG1CB2FxiMP7UXUqpXTikFRcUuHv7r3oR3rCZy+frVq1fz+eefc+uttybUxpZCFBaTNu9NMp6bh2fj19ipXirOnErZRWcTPHZ4UiYfSgmfr6+aK7K7wEV6ms2pU9RckUnH67kimtZDDXflfdW/RLkrbypJERTLspabpjmgCaeOBbZYlvUtgGmaLwJnAQctKA3hgUc6NcvS34levv7jjz/mm2++4bjjjgNg165dXHzxxdx///1MmjQpYXYnFSlJ+fhT0p+fT9rC9xAVfgLDj6bwT7fgm3EqsktylkLZvMXN/DfSeO3NNL7b7ibFo+aKnDXNx9Q8P2lpbWuYb2staDSJJdnuyptKS/ahjDdNcy2wE7jJsqwvgRxge1SYHcC42iIwTfMq4CoAy7LIzs6udtzn8zXKoOZY+jsZy9fPmjWLWbNmVW6PGzeO//73v7WO8jIMA8MwavxeLcLuAoxnXsX15IuILVuRmV2wLzWxfzYTRgwlHUjUPHa32012djZbv4eX5xu8NM/gi40GhiGZPEFyyw0hZpxhk5XpAjo5n7bF8a4T+EZ+TSC2oHGNIpwdwsZG1vNnY0PcIyBFVZi64qABYeqKh3rDVLfQFiqMYQjCPcONSqthea8tnpqhq+VdNC7vsbHVZnOIEDbVm+JduLgzZU6LPtctJSifAodYllVqmuY0YD4wiPjLKNf6imhZ1mPAY5FwsUs4h0KNG3Wz6NXkLnu9efNmHnzwQQYOHMiZZ54JNG75+tmzZ5Obm8vMmTOBquXrG4tt29i23XLLfIfDeJd9rPywL1qGCIXxjx1B+QN/oGL6SciI47AE2renwGDxB9k89zJ8ula9IBw7IsDdt5Yz/dQKemSrqmkomNBkmxU/fo7xjuCpbtULmqAI8JzrGZ5zdRAPnq3UzbGQgug/AwMiWxJEzB9OmMotWbVfIHBLD2HCINRLw7llJq5iN3tp+A3sLF+fMFpEUCzLKo76vtA0zYdN08xG1Uj6RQXNRdVg2gXJWr4+lpUrVzYpjWTjyt9F+ouvkfbiAtw7dxPulkXZ5RdSftEMQocfmvD0CouU+9v5C9NY8YmaKzL4yCC3Xl/Mmaf76JfT9ueK7HTls9S7mGXexazwfkC5UY4hDYQUSCFxSRcjA8fyI9+ZlcVYbMFVVVBRWdDFFmwqTFWhFwlDjXii98RLi5gQ8dKqXrhWK3jjplOVVresrhQWFlXGUWdacQrxmmnVDFMjXw1JKwlEuyt3YSTFIWBjaRFBMU2zN7DbsixpmuZY1DvFPqAQGGSa5qFAPjATuKglbNQkiGCQ1HfeJ/35eXiXfgSAf+I4iu+4nopT86h3THYjKS8XvLPUy3xnrkgwJBjQP8Svryrlpxd56dG9jVY/HIIEWZOyiqWpi1nqXczXns0A5Ib6cY7vfPIqpnBYaBCn9ZyMHz8ePPzzwKNJcWXdGskmm72htn2NG0rEXfkLGc8mzV15Y0nWsOEXgDwg2zTNHcCdgAfAsqxHgfOAX5imGQJ8wEzLsiQQMk1zFvA2atjwf5y+FU0bw/XN96S/uIB063+49u4n3LsnpddeTvnMswj3S2w1OxCApR96mf9GGouWpuLzGfTuFeayi9VckeFD1FyR7Gxvm2zO2m38wLLUJSz1LuZD73JKjVI80sOYwDjOL7qASf4pHBY6vNqbcGsraDTJYVbpdWxN/7ZV1E5AuwDukCTNBbCvgrSFi5Uf9o/WIF0uKk6ZQPmFM/BPPj6hftjDYVixSs0VWfhOGkXFBl2zwpwxtYIZ03yMGx2oMdexrbiHDRHi85RPWep9j6Wpi9noUYMce4f7kFcxhTz/SYz3n0AnWfuggT3Gbm7s+Wvu3/NghxKUtnKNE4l2AaxpV7g3fE368/NIn7sQo6iE0IBcim+ZRfn507F79UhYOlLCp+s8LHDmiuzZ6yIj3ebUk5SITBzvx5PYFrRmY69RwDLvEpamLuYD73KKjSJc0sWxgbHcXHwbeRWTOSJ0VIPb43vavXgz9B577Y5VuGpaFi0omiYhSstIW/C2Wgrl8y+RKR5806ZQftHZBMaPTuhSKBu/Un5FXnszjW073HhTJFMmqrkiJ09se3NFAMKEWef5nKWpi1nmXcL6lLUA9Aj35FTf6eT5p3CCfwKdZZcWtlSjaThaUDQNR0o8n36hlkJ5bRFGuY/gUYdRNOcmys8+HdktK2FJfb/dpZxTLUxj8xYPLpfkxHF+rru6hNNPrqBL57YnIvuN/bzvXcoy7xKWe5dwwHUAQxqMDIzmhuKbyas4iaNDg52RRhpN20MLiqZexP5C0ucuVH7YN32DnZ6G76yplF94NsFRQxO2FMruAoPX3lQi8tl6NVdkzEg/f7ytkOmnVpDdvW35FbGx+cKznmXexSxLXcznns+QQtIt3J08/0lMqpjCBP9EsmRy1iTTaJobLSh1YOwuoNMDT5CyZh17F71w0PEl0x/KuHHj8Hq9lbPvb7vtNvLy8ppurG2TsmI16S/MJ+3NJQh/gMDIIRT+5TZ8Z52K7JTR9LijOFAoWPiOmivy0Sq1yvOQo4LcdoOaK5Lbt23NFSkShbzvXcay1CUs8y5hn2svQgqGB0fw65IbyPNPYWhwuK6FaNolWlDiEBGSjJf+B9JGBIIJiTfZ/lAee+yxg3aqZewuIN36n/LD/t0O7MzOlF18NuUXnk1o8KCDijtCWZlg0ZJU5i9MY9mHXoIhwaGHhLju6lJmTPNx+MC241dEItno/rJyWO9nKWsIizBZdhYTKvLI809hgj+P7nb3+iPTaNo4HUZQutxxH54NX9UdKBDEtWMnrj37ABBRQ6q7n3dVracFBx9B8Zyb6rUhmf5QmkJlDWz1Okp++0vSn59H6rsfIMJh/ONHU3Ljz/GdPgXSUg8qHQB/AJZ+oETknaVefD6DPr3D/OwSNVdk2OC241ekRJSwwvu+mqGeuoTdrh8AGBIYxtWls8irmMIxwZG40GvcazoWHUZQGoLnq28RJaVJWiihOon2hwJULhA5ZswYZs+eTWZmZvyTw2HEm4vp9Ys/QCiEsG26X3od4exulP78EjX58LBDDiJ3lcnw4Sdqrsib71bNFTnvTB8zpvkYO6rmXJHWiETylXuzM6z3PdakrCIkQnS2uzDBP5FJFVOY5J/coeZ7aDTx6DCC0pAahLFnr9PU9RrY1Zu69r3yWB1nNp5E+kMBmDt3Ljk5Ofj9fu68805uv/12Hnzwwbjnu3btQXy2HhGoWpF2/xN/peLkCRzsRA4pYc3aqrkiBftcdMqwOe0kNcx3wnFtY65ImSjjo5QPWZr6Hku9i9nlVpNmjwoezRWlVzPJP5mRgdF4aAOZ0WiaiQ4jKA3B7plN8T2zKb3uilqFJREk2h8KUOlH3uv1cumll9YtVFI6i/RVUXH6lEbZERMdG79ys2BhGgveTGN7vporctLECmac4WPKxIpEtJolFYlkq+sbtUZW6mJWpawkIAJk2Bmc4J/INaXXM7Eijz52YpeN0WjaE1pQ4hArLCmr1yYs7mT4QykvLycUCtGlSxeklCxYsIAhQ4bUHpEQSNfBtzVt/d7FAmeY71ffqLkiE8f7ufFXJZw6pfXPFfEJHx+nrGCZs9Didvc2AAYFj+AnZT9jUsVkjg2MJQXtqlGjaQhaUOogIiyJIln+UAoKCrjyyiuxbZtwOMygQYO45557ao0r3KcncsRQ5PYDja6B/bCnaq7I51+ognbcaD/3/K6Q6VMr6N6tdc8V+d71XeVKvSu9H+EXFaTZaRwfOJErS69mkn8KueF+9Uek0WhqoBeH7IBEFoecNurYyhpYXfNs9hcKFi5Sc0U+Xq3migwbHGDGNB8/Os1HTp/WKyJ+KvjEu5Kl3vf4IGM5W8TXABwaGsikisnk+U9irH8cXlp5m1wT0Asldgz04pCaVkFdNbCyMsHbUXNFQiHBYYcGueGXJZx5uo/DD229Ew7zXTtY6n2PZalLWJHyAT7Dh1emMkFO5KLinzCpYjIDwol36KXRdHS0oGgqqfDDkvdTWfCmmitSUWHQt3eIK39cxowzyhlyVKhVzhUJEGB1yifO7PTFfO1R8436hfpznu8CJlVM4bjA8fTr3o+9ZR3r7VWjaU60oHRAQiHJMy8ajBpt0K2rzYpPvMx7I4233kuluMSge7cwF8xQc0WOHdk654rsMnZWLm/yoXc5ZUYZHulhbOA4zKILyauYwqHhw5LmflWj0dSkXQuKEIJAIEBKih6lAyClpLCwkDWfF7MtH2Ze0Z19Bwz27nPRuZOaKzJjmo8Tj/PjbmV3RpAgn6WsqZxcuMmzEYA+ob6c6TubvIqTGB84gQyZmDXGNBpN42llxUZiycjIoKioiG3btuGqw1ugYRjYdjN0LNs2osJf+SHs9EN43EivF5nqRXpTEupLJEIoBCWlYd565wfefKcC2/aweYubkyZVMPNsNVcktXGjmJNOgbGHZd4lLEtdzPve5ZQYxbilm9GBMfy26HYm+SdzROhIXQvRaFoJ7VpQhBBkZmayadMmvv32W9xuNyJOJ0BaWho+ny/xBoTCuHbtxrU9H9e2XbgK1BphMtVLOLcP4f45hPv1xe5SuyvXplJSKsjf5WbHToP8XS6KS5RIuVw2bpfNrsJJeDyQ2yfMtFMqEp5+UwgTZq3ns8phvV+mrAegZ7gXp/vOYJJ/snY6pdG0YpIiKKZp/geYDuyxLGtonOMXA791NkuBX1iWtdY59h1QAoSBkGVZxx6MLUIIxo0bR79+/SgsLIxbE8nMzKSoqOhgklFIiSt/F551m0hZvxH3xi2IQADpchE6YiDBkycTHH40oYH9E+ZDJML+QoONX3nYsMnNxk0p/LBXCUinDMnRRwSZNDFIv75h/vyPLIpKcvEH1bpTL83L4LqrS+nZo2WG/u4z9vG+dylLvYt5P3UphUYhhjQYFTiWG4t/S17FFI4ODdG1EI2mDZCsGspTwD+Bp2s5vhWYZFnWAdM0TwceA8ZFHZ9sWVbChuMIIcjJyalcniSWgxnHbRTsw/v+SrzLPsb7/kpcu1U8wcMH4D/jVPwTxhEYPzph/kMi7C4w+OgTLys+SWHFKi9bv1eXMrOLzXHH+jHPD3D8WD9HHxGqbEG7ZU4XCksyCASrCmfbhgce7cQ9vytOqH21oZxOrWOpVy1xss7zOVJIuoezmVxxMnkVUzhRO53SaNokSREUy7KWm6Y5oI7jK6I2PwZyk2FHUvBV4F31Od7lSkQiS+KHu2YSmDCOiknH4Z8wDjund0KT3VNg8NHqFFZ84uWjVSl8s1UtStils8240QF+bJZxwjglILV1F61Zm1JNTAACQcHqz5M7aKFQHOB973KWpr7Hcu9S9rv2IaRgRHAk15bcyCT/FIYGh2mnUxpNG6c19KFcDrwZtS2BRaZpSuBflmXVusyvaZpXAVcBWJZFdnZ2kwxwu921nysl4otNiHeWY7z3AeKDlYgKP9LjQZ5wLKGLfos8aQJyxBBchkEGkIi6yJ4CeP8jwdIPDZavMNj0lRKCzp0kJx4nueInIfJOkBwzVOJyGUCa86mdT5cBBCrzHApFO7Jq2m8XD4lknVjLIvEW7xpv84n4GFvYdJPdOck+hamh0zjJPoXuZEMqNNck9Tqvczuko+UXdJ5bmhYVFNM0J6ME5cSo3SdYlrXTNM2ewDumaW6yLGt5vPMdsYkIjmxqs1V2djb7v9xY6e53/zP/wPv+J1XNWE5nevCIgfgvORf/pOMIHDcKmR5VgO/f36S0K08/YPDRqqoayOYtqgaSkW4zbrSfc6erJqyhRwerDek9cKBp6SV6iYoSUcwH3uUsTV3Mcu9S9rh2AzA0MJxf+n9NXsUUhgdHVDqdksBemneSYUdblqOj5Rd0nhuLs/RKwmgxQTFNczjwBHC6ZVn7Ivsty9rp/N9jmuY8YCwQV1ASgbG7ANfv/kqvZ15Vjqdsm96jTgMg3L0r/onj8E8Yh3/icdh9EudAaX+hYOVq1Qfy0SovG79SApKWZjN2ZIBzpvsYP9bP8MHBVuk/RDmd2sTS1MUs8y6JcTo1ibyKKUz052mnUxpNB6JFBMU0zf7AXODHlmV9FbU/AzAsyypxvk8F5iTDBmN3AZ3u+xcZL84HW1YbQ1R826+pmDiO0OAjEjYnpLBIsHKNlw9XRgTEjZSC1FSbMSOD/PbaYsaP8TNiaOsUEIBSUcoK7weO69vF/ODaBcDRwcFcWXo1ef6TGBEYhbtVtKRqNJrmJlnDhl8A8oBs0zR3AHeCcm1nWdajwB1Ad+Bh0zShanhwL2Ces88NPG9Z1lvJsLHrL28lZeVn1fzGRyj95aUHHX9RsWDlmqomrC83eZSAeCWjRwS4aVYJx48JMGJYgNY6kV8i+ca9hWXeKqdTQRGkk92JE/wTudZ/IxMr8uht92lpUzUaTSugXS9fXxfV3f3Kau5wd+avaXTixSWCTz6tEpAvNnqwbYE3RTLqmAAnjPUzfmyAkcMDeFuBgNTW7louyvk45UOWpS5hqXcxO9zbARgUPJI8/xQmVUxmdGBMm3Q61dHa1ztafkHnubHo5esTRLRXxh6PPovxX6tRzqZKyxwBWakEZN0GJSApHiUg111dyvgxfkYdE2g1S5pMz57KxpQvq3Y4/XGHBQ/novKfVDqdCgg/aXYaJ/gn8PPSXzLJP4WccNsZ2a3RaFqGDisoEeye2YT/cTcFV19Sp7vfsjLBqs9SKicSrvvSQzgs8LglI4cH+PVVSkBGjwi0Wv/powKj2eL5mqCoqo0h4RvPFu7KvIOBwcO4uOzH5PlPYox/HF5aiRJqNJo2QYcXlN0FBhdc7ubBe3vSM8rZVHm5mvD3oTMKa+0XHkIhgdstGTE0yK8uL2X8WD9jRgRJS2u9zYYhQnzj3sJ6z1p8ho8Q1WtgBgbXF/2GMyrO5JDwgJYxUqPRtAs6vKA88EgnVnwiuO+fnfnR6b7KJqzP16cQDAlcLskxQ4NcfVkpx48JMGZkgPT01ikgNjbfub5lfco61nk+Z71nHRs8X+Az1MKXGXYGPcI92esqwBY2HunBLLuIX5b9uoUt12g07YEOLSjbdhg890oGti147pV0nnslA8OQHDMkyFWXljJ+TIAxowJ0ymh9AiKRbHdtY71nbaWAfOlZT6lRCkCqncrg0FDM8osYHjyGYcFjODQ0kL1GAXm9xuPHjwsX15Re18I50Wg07YUOLSgP/6dTpUsSw4DJJ1bw0F8P0LlT6xIQiWSXsZP1KetY71nLF551rE9ZS6FRCECKTOGo4GDO8p3DsMAxDA8ew2GhQXHng/S0e3Fe2QW8kPEs55ZdoCceajSahNFhBWV3gYE1P4PIiDnbFny40ovPJ1pcUPYaBarJyhGQ9Z517HUVAOCSLo4MHcVU3zSGBYczPDiCI4JHNmoY76zS69ia/q2unWg0moTSaEExTVNYltW6XuGbwAOPdELGuABp7qXcAQ6I/dWEY33K2soZ6EIKDg8dwUR/HsMCxzAsOJyjg4NJrWcRyProaffizdB77LU71nh9jUaTXJpSQ7kfuD7RhjQ3LbGUe4ko5gvPetalrOULR0C2u7dVHj80NJCx/uMYFhzOsOAxDA4O1T7SNY0mbe5cOt97L66dO+nZty8ls2fjO+ecljZL0wHosDPlo0nG7NpyUc4Gzxes96xlnWct61PWstX9beXxfqH+DAsOZ2jwGIYFhjMsOLxZXdvqGcXtk7S5c8m8+WaMKJfWdmoqRX/8I74ZM1RnoWGAy5Vwr6GtgY5wjSNEvziEm/jikOiZ8vUKimma/7Ys63LnuwAetyzrikQZkEBaTFD8VLDRs6GyyWqdZy3fuL/GFqpNrXe4N8MCIyprHkODw+lmd2tyeomgIz14EdpVnsNhjB9+wJ2fj2vHjspP2iuvYPj9DY5GRsTFMJBCVImNYYAQyMh35yOjvlfbjgiUYVSdIwS4XFVhYrej44g6vzKOKHtqTTd6n8tFekYG5RUVVefExuHYEDcvMTbgctX4TeKmG70d75x421G/a404a4sj6py0+fNrvjikpVH0l780SlRaYumVgZEvlmVJ0zQPS1TibZEAAb7ybK7s8/jCs5bNnk2EhHJW1S3cneHBYzi94gyGBYYzNDicnnavFrZa0+YIBHDt3KmEIj8f9/btVcKRn49r505ENQdpEO7eHVGLmEig5JZbVEdhOKwcx9l2ze1wWO2TEhH5HvUR8bad87HtqnNityOfUAijvjij43C+x023lu1O4XDcRV/bO4bPR+d7723R5s2GCMpe0zSvAFYA44F99YRvE9S2rtXRgSG8vncRUH2W+foUJSAbPRsICPXQZtpZDAsM58rSXzDMabrqY/dFJE7wNe0U4fNVCcT27UokduzA7ewzdu+uVihKIbB79SKcm0tg1CjCZ55JODe36pOTg0xPp+fYsbjz82ukF87JoXTWrObMYotQWQuVslLUKkUpSqDqFakmniNi0qwh1A1JtwHi3vlvf4tbyria2EqTKBoiKJei3Oz+CtgM/CSpFjUT8da1cksP3eyu3NXljhqzzDvZnRgSHMZPyi5jeEBNFOwX7q/FQxMXUVRUe+1i+3ZcMR4+pdtNuG9fwjk5+CdOJJybS8gRinBuLuG+fWmIn4OS2bPjNoWUzJ5dx1ntECGqmq8cF6exdZa2XIdJf/HF+C8OCfbA2FgaIih+4AcgDDwCjARWJdOo5mBW6XW8kvFStX0hEeTD1A9YY69mSHAYF5RfxPDgCIYFhjMgPBCDxDjb0rRxpMTYt6/W2oVrxw6MkpLqp6SmEsrJIdyvH8GhQ6vVLkK5udi9eqk28oMk0txxsJ21mtZNa31xaIigPAssAy60LOtB0zT/BJycXLOST0+7F+eWmTyf8QwIMKTBWP947iiew2Ghw7XXwY5MOIyxe3c1gahWu8jPx6ioqHaK3blzpUD4jzuushkq3K8f4dxc7O7dm21Ule+cc/Cdc077GoSgqUZrfXFoSKnZw7KsR03HjWJ74prS63k1w8KPnxRSeKDwn3opko5AIIBr165KcXDH9mXU0uEdzs0ldOSR+E86SX3v16+ySUpmZrZQZjQdldb44tAQQdljmuYFQJppmmcDu5JsU7Oh17Vqn1Tr8I76uHfvptfWrU3u8NZoNHXTEEH5GXAF8CmQC1yZVIuaGb2uVdujWod3nJFSrn3VByJGOrw59FD8EyYQ7tevSR3eGo2mbhoiKEdZlvVP0zR7Aj8FBgCb6jvJNM3/ANOBPZZlDY1zXAB/B6YB5cBPLcv61Dl2KXC7E/Ruy7L+2wA7m4Re16qVEd3hHV27iOrHMIqrr7VW2eGdm1uzwzsnB7t3b3C5yM7OprCVNA1oNO2RhgjK34CTgDmozvknUfNR6uMp4J/A07UcPx0Y5HzGoUaQjTNNsxtwJ3AsamTfGtM0X7Ms60AD0tS0diId3pHaRGRIbdSM7wZ3eOfmEu7Xr1k7vDUaTe00RFDSTdP0Al7Lsl4wTfPqhkRsWdZy0zQH1BHkLOBpZ+Xij03TzDJNsw+QB7xjWdZ+ANM03wFOA15oSLqaFibS4R1bs4ie4R2s7oY43K2baoY64gj8U6ZUG06rO7w1mrZDQ4cNLwDuNE0zFdiaoLRzgO1R2zucfbXt17QChM9XVZuIqV24t2+vu8N75EjCP/pRteG0usNbo2k/1CsolmU9BDwUteunCUo7XhuFrGN/DUzTvAo1ix/LssjOzm6SIW63u8nntjWMF17AdccdsH07ffr1IzxnDvaFF1YFKCpCbNsG332H2Lat8sO2bYjvv0cUFFSLT7rdkJuLPOQQ5CmnYPfvr77374/s3x/69avs8HbTsh7dOtJ1ho6XX9B5bmla8vneAfSL2s4Fdjr782L2L40XgWVZjwGPOZuyqWOxW9M47mSSNncumb/5DSLSR7FtG67LL8e+7z5EOFxvh3d46tRaO7xrpbj5nJXVR0e5zhE6Wn5B57mx9E3wUi21Coppml2T3BH+GjDLNM0XUZ3yRZZl7TJN823gHtM0uzrhpgK3JNGO9oPfr/ovIk1Szmq17vx8te/bb2tU/0Q4jGfTJvx5ebrDW6PRHBR11VD+5BTqXwOLgBWWZYXqCF8N0zRfQNU0sk3T3IEaueUBsCzrUWAhasjwFtSw4cucY/tN07yLqvXC5kQ66Ds0UiIKC9Xci4hARITD6ew29uypsWx3uFcvwn37EhwyBNe338aPOxxm/3+TNjJbo9F0EBriYGsQcApwPKp/YyUw17KsHck386ctywAAEAhJREFUr1G0Ko+NjSYUwvXDD9VFIuZjlJVVO0WmplauUFs5Ua9v36paRp8+4PVWhq9tafNQTg57Pvkk6VlsaVrFdW5GOlp+Qee5sTS7gy3Lsr5G1VIeNk3ThWqe6oPq69A0EFFSUr1WsXNndfH44QflAyGKcPfuSiwOO0wtaZ6TU9UclZPT6Oao1rpCqUajaR80qlPesqwwytGWJhrbxti9u1Iw3PGao4qKqp0iPR5Vm+jbl8Dxx1fruwj17Yudk4NMS0uoma11hVKNRtM+6NBrtKfNnVtZuPaso3CtnHsRKxSRz65dNSbr2ZmZlQLhP+64Gs1Rdo8eCfF/0Vha4wqlGo2mfVCvoJimeZplWW+Zpnk4cD3wkmVZy5NvWnJJmzu3WvOPOz+frBtvxLt4MXa3btX8edfwrmcYhPv0IZyTQ2D06KqmqKjFBmXnzi2RLY1Go2kxGlJDuQl4C7gVeBz4BzAmmUY1B53vvbdaXwKACARInzcPOyOjsiYRPOaYquaoiGD06lXpVlSj0Wg0ioaUip1N0+wPhC3L+sg0zbJ6z2gDuGoZESaF4IfNm/XcC41Go2kkDXGS/ifgbuCvzlpeHyfXpOYhXMsM0XDfvlpMNBqNpgk0pIZyKDDbsqzIK327GGOqh9BqNBpNYmmIoHwL/MU0zUzgf8Ar7WHmuh5Cq9FoNIml3pnyERzHV4+ifJO8DfzdsqwPkmhbY2nbM+WbGZ3n9k9Hyy/oPDeWZp8pb5rm6cBMoCuqhnKVY8ACYGKiDNFoNBpN26YhTV7DgFsty6q2CJRpmlcmxySNRqPRtEUaIigvAb8zTbMTcClwmWVZT1iWtTm5pmk0Go2mLdGQYcP/Bh4A+jpreV1YT3iNRqPRdEAaIiguy7I2NfIcjUaj0XQwGiIOi03TfBToa5rm34F3kmyTRqPRaNog9QqKZVl3Af8Efodq/no42UZpNBqNpu1Rq6CYpvmyaZopAJZlfWFZ1suAH+UOWKPRaDSaatRVQ3kRWGiaZhaAaZqnAs8AP2kOwzQajUbTtqhVUCzLehW4A3jDNM07UL5QpsZ00Gs0Go1GA9QxD8U0zbsACeQDNwCPADeYpollWXfUF7FpmqcBfwdcwBOWZd0bc/z/gMnOZjrQ07KsSG0oDKx3jm2zLOvMRuVKo9FoNM1OXRMb33X+vwc81JhITdN0OeecAuwAVpmm+ZplWRsiYSzLuj4q/DXAyKgofJZljWhMmhqNRqNpWWoVFMuylh1EvGOBLZZlfQtgmuaLwFnAhlrCXwjceRDpaTQajaaFSZYf2xxge9T2DmBcvICmaR6C8rmyOGp3qmmaq4EQcK9lWfNrOfcq1GKVWJZFdnZ2k4x1u91NPretovPc/ulo+QWd55YmWYISbznk2tbJn4nysRKO2tffsqydpmkORE2sXG9Z1jexJ1qW9RjwWCT+pi7hrJe87hh0tDx3tPyCznNj6VuL59qmkqxlVHYA/aK2c4HanJXMBF6I3hHxDuk0mS2lev+KRqPRaFohyaqhrAIGmaZ5KGqU2EzgothApmkeifKz8lHUvq5AuWVZftM0s4ETgL8kyU6NRqPRJIik1FAsywoBs1CeHTeqXdaXpmnOMU0zegjwhcCLlmVFN4cdDaw2TXMtsATVh1JbZ75Go9FoWgkNdgHcBtAugBuBznP7p6PlF3SeG0uiXQDrpeg1Go1GkxC0oGg0Go0mIWhB0Wg0Gk1C+P/27j/Ys7qu4/hz22tSagFep7yAIwqVpMOPNnCiAUSgrZzFyXqLSCNBs2NBWNpUFA1BPwZ1RqMJy22x1hGkV1sWNQQ5AxtOSi4kRuxWs6LF5eLAgiEpQYu3P87Z+Hq70O7dz9nv3vt9Pma+s99zzudz7vsz++O153zO9/M1UCRJTRgokqQmDBRJUhMGiiSpCQNFktSEgSJJasJAkSQ1YaBIkpowUCRJTRgokqQmDBRJUhMGiiSpCQNFktSEgSJJasJAkSQ1YaBIkpqYGurEVbUWuBpYDWxMctWC4+cD7wUe6Hf9bpKN/bG3AZf1+38jyaah6pQktTFIoFTVauAa4ExgFthaVTcm2bag6R8nuXhB30OBy4E1wDxwV9/3S0PUKklqY6hbXicCO5Lcl+Qp4Abg7D3s+wPAx5M82ofIx4G1A9UpSWpkqFtehwH3j2zPAict0u5NVXUK8K/AzyW5/1n6HrbYD6mq9cB6gCRMT08vqdipqakl912uHPPKN2njBcc8bkMFyqpF9s0v2P5L4KNJnqyqtwObgNP3sC8ASTYAG3a32blz55KKnZ6eZql9lyvHvPJN2njBMe+tmZmZprUMFSizwBEj24cDc6MNkjwysvkHwLtH+p62oO+W5hVKkpoaKlC2AkdX1ZF0T3GdA5w72qCqXprkwX5zHbC9f38L8FtVdUi/fRZw6UB1SpIaGWRSPsku4GK6cNje7cq9VXVlVa3rm11SVfdW1WeBS4Dz+76PAr9OF0pbgSv7fZKkA9iq+flFpyeWo/m5ubn/v9UivO86GSZtzJM2XnDMe6ufQ1ls3npJ/KS8JKkJA0WS1ISBIklqwkCRJDVhoEiSmjBQJElNGCiSpCYMFElSEwaKJKkJA0WS1ISBIklqwkCRJDVhoEiSmjBQJElNGCiSpCYMFElSEwaKJKkJA0WS1ISBIklqYmqoE1fVWuBqYDWwMclVC46/E/hJYBfwMHBBkn/rjz0N3NM3/fck64aqU5LUxiCBUlWrgWuAM4FZYGtV3Zhk20izzwBrkny1qn4KeA/w5v7YE0mOG6I2SdIwhrpCORHYkeQ+gKq6ATgb+N9ASXLbSPs7gPMGqkWStB8MFSiHAfePbM8CJz1H+wuBvx7ZPqiq7qS7HXZVkj9frFNVrQfWAyRhenp6ScVOTU0tue9y5ZhXvkkbLzjmcRsqUFYtsm9+sYZVdR6wBjh1ZPfLksxV1SuAW6vqniSfW9g3yQZgw+7z79y5c0nFTk9Ps9S+y5VjXvkmbbzgmPfWzMxM01qGesprFjhiZPtwYG5ho6o6A/gVYF2SJ3fvTzLX/3ofsAU4fqA6JUmNDHWFshU4uqqOBB4AzgHOHW1QVccDHwTWJnloZP8hwFeTPFlV08DJdBP2kqQD2CBXKEl2ARcDtwDbu125t6qurKrdjwC/F3gh8CdVdXdV3djvfxVwZ1V9FriNbg5lG5KkA9qq+flFpzaWo/m5uf9zV22PeN91MkzamCdtvOCY91Y/h7LYnPeS+El5SVITBookqQkDRZLUhIEiSWrCQJEkNWGgSJKaMFAkSU0YKJKkJgwUSVITBookqQkDRZLUhIEiSWrCQJEkNWGgSJKaMFAkSU0YKJKkJgwUSVITBookqQkDRZLUhIEiSWpiaqgTV9Va4GpgNbAxyVULjj8f+DDwPcAjwJuTfKE/dilwIfA0cEmSW4aqU5LUxiBXKFW1GrgG+EHgGOAtVXXMgmYXAl9KchTwfuDdfd9jgHOA7wbWAh/ozydJOoANdcvrRGBHkvuSPAXcAJy9oM3ZwKb+/Wbg9VW1qt9/Q5Ink3we2NGfT5J0ABvqltdhwP0j27PASc/WJsmuqnoMeHG//44FfQ9b7IdU1XpgfX8OZmZmllzwvvRdrhzzyjdp4wXHPE5DXaGsWmTf/B622ZO+ACTZkGRNkjV9vyW9ququfem/HF+OeeW/Jm28jnnJr2aGCpRZ4IiR7cOBuWdrU1VTwLcCj+5hX0nSAWaoW15bgaOr6kjgAbpJ9nMXtLkReBvwKeBHgVuTzFfVjcD1VfU+YAY4Gvj0QHVKkhoZ5AolyS7gYuAWYHu3K/dW1ZVVta5vdi3w4qraAbwT+KW+771AgG3AzcBFSZ4eos4RGwY+/4HIMa98kzZecMxjtWp+ftHpCUmS9oqflJckNWGgSJKaGGzpleWgqj4EvAF4KMmrx13P0KrqCLrlbr4d+BqwIcnV461qWFV1EHA78Hy6P++bk1w+3qr2j36FiTuBB5K8Ydz1DK2qvgA8Trdk067+4wQrWlUdDGwEXk338YoLknxqXPVM+hXKH9Et7zIpdgHvSvIq4LXARYssibPSPAmcnuRY4DhgbVW9dsw17S/voHsoZpK8LslxkxAmvauBm5N8F3AsY/79nugrlCS3V9XLx13H/pLkQeDB/v3jVbWdbhWCbWMtbEBJ5oH/7Def179W/JMoVXU48MPAb9I9RakVpqq+BTgFOB+gX+bqqXHWNNGBMsn6ID0e+PsxlzK4/tbPXcBRwDVJVvyYgd8GfgF40bgL2Y/mgb+pqnngg0kOmMdpB/IK4GHgD6vqWLo/4+9I8pVxFTTpt7wmUlW9EPhT4GeTfHnc9QwtydNJjqNbdeHEqlrR82VVtXte8K5x17KfnZzkBLpVzi+qqlPGXdDApoATgN9LcjzwFfrP842LgTJhqup5dGFyXZI/G3c9+1OS/wC2sPLnzU4G1vWT1DcAp1fVR8Zb0vCSzPW/PgR8jJW/SvksMDtyxb2ZLmDGxkCZIP3XA1wLbE/yvnHXsz9U1Uv6J2Goqm8CzgD+ebxVDSvJpUkOT/JyumWPbk1y3pjLGlRVvaCqXrT7PXAW8E/jrWpYSb4I3F9V39nvej1jng+d6DmUqvoocBowXVWzwOVJrh1vVYM6Gfhx4J6qurvf98tJbhpjTUN7KbCpn0f5BrplgP5qzDWpvW8DPlZV0P27dn2Sm8db0n7xM8B1VfWNwH3AT4yzGJdekSQ14S0vSVITBookqQkDRZLUhIEiSWrCQJEkNTHRjw1LC1XVqcDldP/Zehr41SSfrKrHgH+gWwvsArqvpz4jyWV9v18DtiTZMnKub6ZbAuU7+n4bkmzah9oOplvocqI+kKrlwysUqVdV08AVwBuTnAa8EXiiP3xPktcB76JbI2tPXA78bX+u7wc+v48lHgz8yD6eQxqMVyjSM34I+Mju9c2SPA58ZkGbu+nWBNsT35fkF/tzzdN9LwtV9Tt0S+l/GXgr3SKdZyS5rKrO7/tuAT4EPAocCZwNrAfOrKotwI8leXjvhygNx0CRnjED3ANQVecCPw3ckeTnR9qcAvzLUn9AVX0v8IIkp1TVecDbefYVnw+hWyrmLcCbgA3Ay1b6MipavrzlJT3jQbpQIcn1wHnAdH/sNVV1G13IXAX8F923QO52EM/cHnsur6Sbi4Hu2xSP4uu/n2XVyPttSb4GPEB3u0s6oHmFIj3jJmBzVSXJY3z934/dcygAVNVTwPFVtfs/ZScA71lwvk9W1VuTXNcvzHky3XpLZ/XH1wCfAx6jW3MM4DXAP/bvFwbNfwOr92WA0pC8QpF6/ZzEFcBfVNWtwAeADz9L20fovgbgduATdN9V/+iCZlcAp/ZzHn8HvDLJp4EnquoTwLnA79MFyExV3QS85DlK/CJwaFVtrqpDlzhMaTAuDilJasIrFElSEwaKJKkJA0WS1ISBIklqwkCRJDVhoEiSmjBQJElN/A/+VEKtOoWvCAAAAABJRU5ErkJggg==\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "print(\"Unmanaged Memory Plots\")\n", + "system = \"summit\"\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_nomanaged\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_nomanaged\")\n", + " \n", + "print(\"Managed Memory Plots\")\n", + "for exp_type in EXP_TYPES:\n", + " for exp in EXPERIMENTS:\n", + " if exp == \"duplicate_keys\":\n", + " continue\n", + " plotThroughput(system, exp, exp_type, \"noindex_managed\")\n", + " # plotSpeedup(system, exp, exp_type, \"index_managed\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/hash-graph-dehornetify/experiments/snsg/experiment.sh b/hash-graph-dehornetify/experiments/snsg/experiment.sh new file mode 100755 index 0000000..a4ee179 --- /dev/null +++ b/hash-graph-dehornetify/experiments/snsg/experiment.sh @@ -0,0 +1 @@ +make ../../build/sing-hash && ../../build/sing-hash 268435456 268435456 16000 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/duplicate_keys.sh b/hash-graph-dehornetify/experiments/summit/duplicate_keys/duplicate_keys.sh new file mode 100755 index 0000000..ede4c4d --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/duplicate_keys.sh @@ -0,0 +1,73 @@ +keycount=29 +gpucount=6 +tablesizes=($(seq 20 1 30)) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,tablesize,gpucount,time" >> $resultsfile +echo "duplicate_keys" +echo "keycount,tablesize,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + temp space + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1) + ($ts * 8)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${ts},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc build | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +keycount=$((echo $keycount - 1) | bc) +for i in "${tablesizes[@]}" + do + let kc=$((echo 2^$keycount) | bc) + # let gc=$((echo 2^$gpucount) | bc) + let gc=$gpucount + let ts=$((echo 2^$i) | bc) + + # echo "tableSize: ${ts}" + # internal cuda malloc + keys + hashes + keyBinBuff + temp space + let gigs=$((echo "((($kc * $1) + ($kc * 8) + (2 * $kc * $1) + (2 * $ts * 8)) + ($kc * 8) + ($kc * 8) + ($kc * $1) + ($ts * 8)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${ts},${gc},oom" + continue + fi + ans=$(./$execpath/multi-hash $kc $ts $bincount $gc $bincount nocheck $kc intersect | grep "time") + + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${ts},${gc},${time}" >> $resultsfile + echo "${kc},${ts},${gc},${time}" + done diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..f4661c0 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,11 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6,0.121083 +67108864,4194304,6,0.126351 +67108864,8388608,6,0.129461 +67108864,16777216,6,0.136148 +67108864,33554432,6,0.13262 +67108864,67108864,6,0.137031 +67108864,134217728,6,0.162362 +67108864,268435456,6,0.210479 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..cccaac3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,11 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6,0.0649321 +67108864,4194304,6,0.0677294 +67108864,8388608,6,0.0694576 +67108864,16777216,6,0.0796526 +67108864,33554432,6,0.0865636 +67108864,67108864,6,0.0947823 +67108864,134217728,6,0.123947 +67108864,268435456,6,0.169846 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/noindex_nomanaged.txt new file mode 100644 index 0000000..2a7e132 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/build/noindex_nomanaged.txt @@ -0,0 +1,8 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6, +67108864,4194304,6, +67108864,8388608,6, +67108864,16777216,6, +67108864,33554432,6, diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..fe037f7 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,26 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6,0.121083 +67108864,4194304,6,0.126351 +67108864,8388608,6,0.129461 +67108864,16777216,6,0.136148 +67108864,33554432,6,0.13262 +67108864,67108864,6,0.137031 +67108864,134217728,6,0.162362 +67108864,268435456,6,0.210479 +67108864,536870912,6,0.280999 +67108864,1073741824,6, +67108864,2147483648,6, +intersect tests +33554432,2097152,6,0.174217 +33554432,4194304,6,0.16194 +33554432,8388608,6,0.164696 +33554432,16777216,6,0.166058 +33554432,33554432,6,0.163939 +33554432,67108864,6,0.193824 +33554432,134217728,6,0.260464 +33554432,268435456,6,0.374244 +33554432,536870912,6, +33554432,1073741824,6, +33554432,2147483648,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..bdc721e --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_managed.txt @@ -0,0 +1,12 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6,0.137518 +67108864,4194304,6,0.126625 +67108864,8388608,6,0.135282 +67108864,16777216,6,0.138432 +67108864,33554432,6,0.150138 +67108864,67108864,6,0.0921288 +67108864,134217728,6,0.116294 +67108864,268435456,6,0.154318 +67108864,536870912,6,0.239541 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..0c6ee65 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,26 @@ +duplicate_keys +keycount,tablesize,gpucount,time +build tests +67108864,2097152,6,0.0649321 +67108864,4194304,6,0.0677294 +67108864,8388608,6,0.0694576 +67108864,16777216,6,0.0796526 +67108864,33554432,6,0.0865636 +67108864,67108864,6,0.0947823 +67108864,134217728,6,0.123947 +67108864,268435456,6,0.169846 +67108864,536870912,6,0.260308 +67108864,1073741824,6, +67108864,2147483648,6, +intersect tests +33554432,2097152,6,0.0965066 +33554432,4194304,6,0.0906821 +33554432,8388608,6,0.0876623 +33554432,16777216,6,0.0932613 +33554432,33554432,6,0.120785 +33554432,67108864,6,0.151709 +33554432,134217728,6,0.205898 +33554432,268435456,6,0.328846 +33554432,536870912,6, +33554432,1073741824,6, +33554432,2147483648,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_index_managed.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt new file mode 100644 index 0000000..550c952 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_index_nomanaged.txt @@ -0,0 +1,12 @@ +intersect tests +33554432,2097152,6,0.174217 +33554432,4194304,6,0.16194 +33554432,8388608,6,0.164696 +33554432,16777216,6,0.166058 +33554432,33554432,6,0.163939 +33554432,67108864,6,0.193824 +33554432,134217728,6,0.260464 +33554432,268435456,6,0.374244 +33554432,536870912,6, +33554432,1073741824,6, +33554432,2147483648,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_noindex_managed.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt new file mode 100644 index 0000000..3399f60 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/duplicate_keys_noindex_nomanaged.txt @@ -0,0 +1,12 @@ +intersect tests +33554432,2097152,6,0.0965066 +33554432,4194304,6,0.0906821 +33554432,8388608,6,0.0876623 +33554432,16777216,6,0.0932613 +33554432,33554432,6,0.120785 +33554432,67108864,6,0.151709 +33554432,134217728,6,0.205898 +33554432,268435456,6,0.328846 +33554432,536870912,6, +33554432,1073741824,6, +33554432,2147483648,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/noindex_nomanaged.txt new file mode 100644 index 0000000..d53e0c5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/duplicate_keys/results/intersect/noindex_nomanaged.txt @@ -0,0 +1,9 @@ +intersect tests +33554432,2097152,6, +33554432,4194304,6, +33554432,8388608,6, +33554432,16777216,6, +33554432,33554432,6, +33554432,67108864,6, +33554432,134217728,6, +33554432,268435456,6, diff --git a/hash-graph-dehornetify/experiments/summit/experiments.lsf b/hash-graph-dehornetify/experiments/summit/experiments.lsf new file mode 100755 index 0000000..147cca6 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/experiments.lsf @@ -0,0 +1,146 @@ +#!/bin/bash +#BSUB -P BIF115 +#BSUB -W 2:00 +#BSUB -nnodes 1 +#BSUB -alloc_flags gpumps +#BSUB -J snmg-hg-experiments +#BSUB -o snmg-hg-experiments.%J +#BSUB -e snmg-hg-experiments.%J + +module load cuda +module load gcc + +buildpath="../../build" +includepath="../../include" + +declare -a modes=("noindex_nomanaged" "index_nomanaged" "noindex_managed" "index_managed") +# declare -a exps=("strong_scaling" "weak_scaling" "duplicate_keys") +declare -a exps=("strong_scaling" "weak_scaling") + +keysize=8 +for mode in "${modes[@]}" + do + bytes=8 + if [ "$mode" == "noindex_nomanaged" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_nomanaged" ]; then + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh + bytes=16 + elif [ "$mode" == "noindex_managed" ]; then + sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + elif [ "$mode" == "index_managed" ]; then + sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh + sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh + bytes=16 + fi + + make -C $buildpath multi-hash + + echo $mode + + for exp in "${exps[@]}" + do + echo $exp + jsrun -n 1 -g 6 ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.txt + head -n -$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/build/$exp\_$mode.txt + tail -n +$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/intersect/$exp\_$mode.txt + done + done + +# #!/bin/bash +# #BSUB -P BIF115 +# #BSUB -W 2:00 +# #BSUB -nnodes 1 +# #BSUB -alloc_flags gpumps +# #BSUB -J snmg-hg-experiments +# #BSUB -o snmg-hg-experiments.%J +# #BSUB -e snmg-hg-experiments.%J +# +# buildpath="../../build" +# includepath="../../include" +# +# module load cuda +# module load gcc +# +# declare -a modes=("noindex_nomanaged" "index_nomanaged" "noindex_managed" "index_managed") +# # declare -a exps=("strong_scaling" "weak_scaling" "duplicate_keys") +# declare -a exps=("strong_scaling" "weak_scaling") +# +# # 64-bit section +# sed -i 's/^#define B32/\/\/&/' $includepath/MultiHashGraph.cuh +# keysize=8 +# echo "64-bit keys" +# +# for mode in "${modes[@]}" +# do +# bytes=8 +# if [ "$mode" == "noindex_nomanaged" ]; then +# sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# elif [ "$mode" == "index_nomanaged" ]; then +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# bytes=16 +# elif [ "$mode" == "noindex_managed" ]; then +# sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# elif [ "$mode" == "index_managed" ]; then +# sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# bytes=16 +# fi +# +# make -C $buildpath multi-hash +# +# echo $mode +# +# for exp in "${exps[@]}" +# do +# echo $exp +# jsrun -n 1 -g 6 ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode.txt +# head -n -$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/build/$exp\_$mode.txt +# tail -n +$(cat ./$exp/results/$exp\_$mode.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode.txt > ./$exp/results/intersect/$exp\_$mode.txt +# done +# done +# +# # 32-bit section +# # sed -i 's/^\/\/.*#define B32/#define B32/' $includepath/MultiHashGraph.cuh +# # keysize=4 +# # echo "32-bit keys" +# # +# # for mode in "${modes[@]}" +# # do +# # bytes=8 +# # if [ "$mode" == "noindex_nomanaged" ]; then +# # sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# # sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# # elif [ "$mode" == "index_nomanaged" ]; then +# # sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# # sed -i 's/^#define MANAGED_MEM/\/\/&/' $includepath/MultiHashGraph.cuh +# # bytes=12 +# # continue +# # elif [ "$mode" == "noindex_managed" ]; then +# # sed -i 's/^#define INDEX_TRACK/\/\/&/' $includepath/MultiHashGraph.cuh +# # sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# # elif [ "$mode" == "index_managed" ]; then +# # sed -i 's/^\/\/.*#define MANAGED_MEM/#define MANAGED_MEM/' $includepath/MultiHashGraph.cuh +# # sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' $includepath/MultiHashGraph.cuh +# # bytes=12 +# # continue +# # fi +# # +# # make -C $buildpath multi-hash +# # +# # echo $mode +# # +# # for exp in "${exps[@]}" +# # do +# # echo $exp +# # jsrun -n 1 -g 6 ./$exp/$exp.sh $bytes $keysize > ./$exp/results/$exp\_$mode\_32.txt +# # head -n -$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/build/$exp\_$mode\_32.txt +# # tail -n +$(cat ./$exp/results/$exp\_$mode\_32.txt | grep -n intersect | cut -f1 -d:) ./$exp/results/$exp\_$mode\_32.txt > ./$exp/results/intersect/$exp\_$mode\_32.txt +# # done +# # done diff --git a/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.41036 b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.41036 new file mode 100644 index 0000000..dbe88e8 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.41036 @@ -0,0 +1,242 @@ +64-bit keys +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh(311): warning: variable "key" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(154): warning: variable "_d_temp_storage" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(286): warning: variable "size" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(83): warning: variable "seed" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "start" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "stop" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(411): warning: variable "buildTime" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(412): warning: variable "t1" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(413): warning: variable "t2" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(548): warning: variable "gpuCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(554): warning: variable "d_counterA" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(558): warning: variable "d_counterB" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(561): warning: variable "d_GlobalCounter" was declared but never referenced + +[ 40%] Linking CUDA static library libalg.a +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 60%] Built target alg +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 80%] Building CUDA object CMakeFiles/multi-hash.dir/test/MultiHashGraphTest.cu.o +[100%] Linking CUDA executable multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[100%] Built target multi-hash +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +noindex_nomanaged +strong_scaling +weak_scaling +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh(311): warning: variable "key" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(154): warning: variable "_d_temp_storage" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(286): warning: variable "size" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(83): warning: variable "seed" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "start" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "stop" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(411): warning: variable "buildTime" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(412): warning: variable "t1" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(413): warning: variable "t2" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(548): warning: variable "gpuCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(554): warning: variable "d_counterA" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(558): warning: variable "d_counterB" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(561): warning: variable "d_GlobalCounter" was declared but never referenced + +[ 40%] Linking CUDA static library libalg.a +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 60%] Built target alg +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 80%] Building CUDA object CMakeFiles/multi-hash.dir/test/MultiHashGraphTest.cu.o +[100%] Linking CUDA executable multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[100%] Built target multi-hash +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +index_nomanaged +strong_scaling +head: : invalid number of lines +tail: +: invalid number of lines +weak_scaling +Error: Remote JSM server is not responding on host batch504-17-2020 03:47:33:637 128487 main: Error initializing RM connection. Exiting. +head: : invalid number of lines +tail: +: invalid number of lines +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh(311): warning: variable "key" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(154): warning: variable "_d_temp_storage" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(232): warning: variable "keyCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(233): warning: variable "hashRange" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(83): warning: variable "seed" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "start" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "stop" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(411): warning: variable "buildTime" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(412): warning: variable "t1" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(413): warning: variable "t2" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(548): warning: variable "gpuCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(554): warning: variable "d_counterA" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(558): warning: variable "d_counterB" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(561): warning: variable "d_GlobalCounter" was declared but never referenced + +[ 40%] Linking CUDA static library libalg.a +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 60%] Built target alg +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 80%] Building CUDA object CMakeFiles/multi-hash.dir/test/MultiHashGraphTest.cu.o +[100%] Linking CUDA executable multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[100%] Built target multi-hash +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +noindex_managed +strong_scaling +Error: Remote JSM server is not responding on host batch504-17-2020 03:47:56:532 128803 main: Error initializing RM connection. Exiting. +head: : invalid number of lines +tail: +: invalid number of lines +weak_scaling +Error: Remote JSM server is not responding on host batch504-17-2020 03:47:58:077 128862 main: Error initializing RM connection. Exiting. +head: : invalid number of lines +tail: +: invalid number of lines +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh(311): warning: variable "key" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(154): warning: variable "_d_temp_storage" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(232): warning: variable "keyCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(233): warning: variable "hashRange" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(83): warning: variable "seed" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "start" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "stop" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(411): warning: variable "buildTime" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(412): warning: variable "t1" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(413): warning: variable "t2" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(548): warning: variable "gpuCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(554): warning: variable "d_counterA" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(558): warning: variable "d_counterB" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(561): warning: variable "d_GlobalCounter" was declared but never referenced + +[ 40%] Linking CUDA static library libalg.a +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 60%] Built target alg +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 80%] Building CUDA object CMakeFiles/multi-hash.dir/test/MultiHashGraphTest.cu.o +[100%] Linking CUDA executable multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[100%] Built target multi-hash +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +index_managed +strong_scaling +Error: Remote JSM server is not responding on host batch504-17-2020 03:48:20:744 129186 main: Error initializing RM connection. Exiting. +head: : invalid number of lines +tail: +: invalid number of lines +weak_scaling +Error: Remote JSM server is not responding on host batch504-17-2020 03:48:22:282 129251 main: Error initializing RM connection. Exiting. +head: : invalid number of lines +tail: +: invalid number of lines + +------------------------------------------------------------ +Sender: LSF System +Subject: Job 41036: in cluster Exited + +Job was submitted from host by user in cluster at Fri Apr 17 03:40:08 2020 +Job was executed on host(s) <1*batch5>, in queue , as user in cluster at Fri Apr 17 03:40:38 2020 + <42*h50n08> + was used as the home directory. + was used as the working directory. +Started at Fri Apr 17 03:40:38 2020 +Terminated at Fri Apr 17 03:48:23 2020 +Results reported at Fri Apr 17 03:48:23 2020 + +The output (if any) is above this job summary. + diff --git a/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45251 b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45251 new file mode 100644 index 0000000..c70efa3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45251 @@ -0,0 +1,416 @@ +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +nvcc warning : The -std=c++14 flag is not supported with the configured host compiler. Flag will be ignored. +In file included from /usr/include/c++/4.8.2/chrono:35:0, + from /autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu:24: +/usr/include/c++/4.8.2/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options. + #error This file requires compiler and library support for the \ + ^ +make[3]: *** [CMakeFiles/alg.dir/src/MultiHashGraph.cu.o] Error 1 +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: *** [CMakeFiles/alg.dir/all] Error 2 +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: *** [CMakeFiles/multi-hash.dir/rule] Error 2 +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: *** [multi-hash] Error 2 +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +noindex_nomanaged +strong_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +weak_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +nvcc warning : The -std=c++14 flag is not supported with the configured host compiler. Flag will be ignored. +In file included from /usr/include/c++/4.8.2/chrono:35:0, + from /autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu:24: +/usr/include/c++/4.8.2/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options. + #error This file requires compiler and library support for the \ + ^ +make[3]: *** [CMakeFiles/alg.dir/src/MultiHashGraph.cu.o] Error 1 +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: *** [CMakeFiles/alg.dir/all] Error 2 +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: *** [CMakeFiles/multi-hash.dir/rule] Error 2 +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: *** [multi-hash] Error 2 +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +index_nomanaged +strong_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +weak_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +nvcc warning : The -std=c++14 flag is not supported with the configured host compiler. Flag will be ignored. +In file included from /usr/include/c++/4.8.2/chrono:35:0, + from /autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu:24: +/usr/include/c++/4.8.2/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options. + #error This file requires compiler and library support for the \ + ^ +make[3]: *** [CMakeFiles/alg.dir/src/MultiHashGraph.cu.o] Error 1 +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: *** [CMakeFiles/alg.dir/all] Error 2 +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: *** [CMakeFiles/multi-hash.dir/rule] Error 2 +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: *** [multi-hash] Error 2 +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +noindex_managed +strong_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +weak_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +nvcc warning : The -std=c++14 flag is not supported with the configured host compiler. Flag will be ignored. +In file included from /usr/include/c++/4.8.2/chrono:35:0, + from /autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu:24: +/usr/include/c++/4.8.2/bits/c++0x_warning.h:32:2: error: #error This file requires compiler and library support for the ISO C++ 2011 standard. This support is currently experimental, and must be enabled with the -std=c++11 or -std=gnu++11 compiler options. + #error This file requires compiler and library support for the \ + ^ +make[3]: *** [CMakeFiles/alg.dir/src/MultiHashGraph.cu.o] Error 1 +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: *** [CMakeFiles/alg.dir/all] Error 2 +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: *** [CMakeFiles/multi-hash.dir/rule] Error 2 +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: *** [multi-hash] Error 2 +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +index_managed +strong_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +weak_scaling +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) +./../../build/multi-hash: /lib64/libstdc++.so.6: version `CXXABI_1.3.8' not found (required by ./../../build/multi-hash) + +------------------------------------------------------------ +Sender: LSF System +Subject: Job 45251: in cluster Done + +Job was submitted from host by user in cluster at Sun Apr 19 23:28:37 2020 +Job was executed on host(s) <1*batch5>, in queue , as user in cluster at Sun Apr 19 23:40:54 2020 + <42*h36n13> + was used as the home directory. + was used as the working directory. +Started at Sun Apr 19 23:40:54 2020 +Terminated at Sun Apr 19 23:41:34 2020 +Results reported at Sun Apr 19 23:41:34 2020 + +The output (if any) is above this job summary. + diff --git a/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45259 b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45259 new file mode 100644 index 0000000..53f0db2 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/snmg-hg-experiments.45259 @@ -0,0 +1,76 @@ + +Lmod is automatically replacing "xl/16.1.1-5" with "gcc/6.4.0". + + +Due to MODULEPATH changes, the following have been reloaded: + 1) spectrum-mpi/10.3.1.2-20200121 + +make: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[2]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target alg +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 20%] Building CUDA object CMakeFiles/alg.dir/src/MultiHashGraph.cu.o +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh(311): warning: variable "key" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(154): warning: variable "_d_temp_storage" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/include/MultiHashGraphHostOperators.cuh(286): warning: variable "size" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(83): warning: variable "seed" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "start" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(409): warning: variable "stop" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(411): warning: variable "buildTime" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(412): warning: variable "t1" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(413): warning: variable "t2" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(548): warning: variable "gpuCount" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(554): warning: variable "d_counterA" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(558): warning: variable "d_counterB" was declared but never referenced + +/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/src/MultiHashGraph.cu(561): warning: variable "d_GlobalCounter" was declared but never referenced + +[ 40%] Linking CUDA static library libalg.a +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 60%] Built target alg +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +Scanning dependencies of target multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[3]: Entering directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[ 80%] Building CUDA object CMakeFiles/multi-hash.dir/test/MultiHashGraphTest.cu.o +[100%] Linking CUDA executable multi-hash +make[3]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +[100%] Built target multi-hash +make[2]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make[1]: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +make: Leaving directory `/autofs/nccs-svm1_home1/alokt/code-share/hash-graph-dehornetify/build' +noindex_nomanaged +strong_scaling +weak_scaling +User defined signal 2 +ERROR: One or more process (first noticed rank 0) terminated with signal 12 + +------------------------------------------------------------ +Sender: LSF System +Subject: Job 45259: in cluster Exited + +Job was submitted from host by user in cluster at Sun Apr 19 23:42:21 2020 +Job was executed on host(s) <1*batch2>, in queue , as user in cluster at Sun Apr 19 23:58:28 2020 + <42*h28n05> + was used as the home directory. + was used as the working directory. +Started at Sun Apr 19 23:58:28 2020 +Terminated at Mon Apr 20 01:58:50 2020 +Results reported at Mon Apr 20 01:58:50 2020 + +The output (if any) is above this job summary. + diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/noindex_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_managed.txt new file mode 100644 index 0000000..730f1bf --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_managed.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..730f1bf --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..902c111 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_index_nomanaged_32.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777215,1, +16777215,2, +16777215,4, +16777215,6, +33554431,1, +33554431,2, +33554431,4, +33554431,6, +67108863,1, +67108863,2, +67108863,4, +67108863,6, +134217727,1, +134217727,2, +134217727,4, +134217727,6, +268435455,1,oom +268435455,2, +268435455,4, +268435455,6, +536870911,1,oom +536870911,2,oom +536870911,4, +536870911,6, +1073741823,1,oom +1073741823,2,oom +1073741823,4,oom +1073741823,6, +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom +8589934591,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..57e9085 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_managed.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6, +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..240edd9 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0944883 +16777216,2,0.0952142 +16777216,4,0.139944 +16777216,6,0.203342 +33554432,1,0.154184 +33554432,2,0.128598 +33554432,4,0.172594 +33554432,6,0.228132 +67108864,1,0.262018 +67108864,2,0.203599 +67108864,4,0.216038 +67108864,6,0.284226 +134217728,1,0.552984 +134217728,2,0.337325 +134217728,4,0.321773 +134217728,6,0.393368 +268435456,1,oom +268435456,2,0.676583 +268435456,4,0.529026 +268435456,6,0.575875 +536870912,1,oom +536870912,2,oom +536870912,4,1.05184 +536870912,6,1.14013 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,1.94676 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..f247f67 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,40 @@ +strong_scaling +keycount,gpucount,time +build tests +16777215,1,0.0201001 +16777215,2,0.0187013 +16777215,4,0.0216812 +16777215,6,0.0272865 +33554431,1,0.0398787 +33554431,2,0.0357919 +33554431,4,0.0446525 +33554431,6,0.0536556 +67108863,1,0.0798239 +67108863,2,0.0672225 +67108863,4,0.067372 +67108863,6,0.0937596 +134217727,1,0.146654 +134217727,2,0.131143 +134217727,4,0.128811 +134217727,6,0.174467 +268435455,1,0.281261 +268435455,2,0.25106 +268435455,4,0.245207 +268435455,6,0.322238 +536870911,1,oom +536870911,2,0.492784 +536870911,4,0.544543 +536870911,6,0.652678 +1073741823,1,oom +1073741823,2,oom +1073741823,4,1.19908 +1073741823,6,1.25135 +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom +8589934591,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged_4.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/build/strong_scaling_noindex_nomanaged_4.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/noindex_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_managed.txt new file mode 100644 index 0000000..ee1aa25 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_managed.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,oom +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..ee1aa25 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,oom +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..7736235 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_index_nomanaged_32.txt @@ -0,0 +1,41 @@ +intersect tests +8388607,1, +8388607,2, +8388607,4, +8388607,6, +16777215,1, +16777215,2, +16777215,4, +16777215,6, +33554431,1, +33554431,2, +33554431,4, +33554431,6, +67108863,1, +67108863,2, +67108863,4, +67108863,6, +134217727,1,oom +134217727,2, +134217727,4, +134217727,6, +268435455,1,oom +268435455,2,oom +268435455,4, +268435455,6, +536870911,1,oom +536870911,2,oom +536870911,4,oom +536870911,6, +1073741823,1,oom +1073741823,2,oom +1073741823,4,oom +1073741823,6,oom +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..b232037 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_managed.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..91bad75 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1,0.10551 +8388608,2,0.100897 +8388608,4,0.153937 +8388608,6,0.199881 +16777216,1,0.180962 +16777216,2,0.146225 +16777216,4,0.181449 +16777216,6,0.240525 +33554432,1,0.328701 +33554432,2,0.236525 +33554432,4,0.25259 +33554432,6,0.301977 +67108864,1,0.647717 +67108864,2,0.441698 +67108864,4,0.374241 +67108864,6,0.446872 +134217728,1,oom +134217728,2,0.795704 +134217728,4,0.648686 +134217728,6,0.727995 +268435456,1,oom +268435456,2,oom +268435456,4,1.15039 +268435456,6,1.31486 +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,2.16924 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..940aa99 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,41 @@ +intersect tests +8388607,1,0.0251873 +8388607,2,0.0234701 +8388607,4,0.0310979 +8388607,6,0.0396134 +16777215,1,0.0442276 +16777215,2,0.0415968 +16777215,4,0.0570452 +16777215,6,0.0638299 +33554431,1,0.0865465 +33554431,2,0.0792638 +33554431,4,0.0953404 +33554431,6,0.110044 +67108863,1,0.171829 +67108863,2,0.163637 +67108863,4,0.155304 +67108863,6,0.191941 +134217727,1,0.342287 +134217727,2,0.305476 +134217727,4,0.3514 +134217727,6,0.414157 +268435455,1,oom +268435455,2,0.606768 +268435455,4,0.679074 +268435455,6,0.787238 +536870911,1,oom +536870911,2,oom +536870911,4,1.24203 +536870911,6,1.48642 +1073741823,1,oom +1073741823,2,oom +1073741823,4,oom +1073741823,6,oom +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_4.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/intersect/strong_scaling_noindex_nomanaged_4.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_.txt new file mode 100644 index 0000000..1347ced --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_.txt @@ -0,0 +1,14 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0885921 +16777216,2,0.0898107 +16777216,4,0.135021 +16777216,6,0.197105 +33554432,1,0.13882 +33554432,2,0.122418 +33554432,4,0.153932 +33554432,6,0.222969 +67108864,1,0.242643 +67108864,2,0.18116 +67108864,4,0.206763 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_managed.txt new file mode 100644 index 0000000..079733a --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_managed.txt @@ -0,0 +1,84 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,6,oom +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,oom +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_nomanaged.txt new file mode 100644 index 0000000..079733a --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_index_nomanaged.txt @@ -0,0 +1,84 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,6,oom +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,oom +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed.txt new file mode 100644 index 0000000..7c1308f --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed.txt @@ -0,0 +1,84 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1, +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2, +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4, +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6, +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,6,oom +intersect tests +8388608,1, +8388608,2, +8388608,4, +8388608,6, +16777216,1, +16777216,2, +16777216,4, +16777216,6, +33554432,1, +33554432,2, +33554432,4, +33554432,6, +67108864,1, +67108864,2, +67108864,4, +67108864,6, +134217728,1,oom +134217728,2, +134217728,4, +134217728,6, +268435456,1,oom +268435456,2,oom +268435456,4, +268435456,6, +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6, +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..3b37afa --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_managed_32.txt @@ -0,0 +1,19 @@ +strong_scaling +keycount,gpucount,time +build tests +16777215,1,0.0102564 +16777215,2,0.0184064 +16777215,4,0.0220027 +16777215,6,0.0299407 +33554431,1,0.0209173 +33554431,2,0.0382484 +33554431,4,0.0418437 +33554431,6,0.0538255 +67108863,1,0.0422522 +67108863,2,0.0539505 +67108863,4,0.0814398 +67108863,6,0.0954061 +134217727,1,0.0753664 +134217727,2,0.134451 +134217727,4,0.139762 +134217727,6,0.161109 diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..09917ef --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged.txt @@ -0,0 +1,84 @@ +strong_scaling +keycount,gpucount,time +build tests +16777216,1,0.0944883 +16777216,2,0.0952142 +16777216,4,0.139944 +16777216,6,0.203342 +33554432,1,0.154184 +33554432,2,0.128598 +33554432,4,0.172594 +33554432,6,0.228132 +67108864,1,0.262018 +67108864,2,0.203599 +67108864,4,0.216038 +67108864,6,0.284226 +134217728,1,0.552984 +134217728,2,0.337325 +134217728,4,0.321773 +134217728,6,0.393368 +268435456,1,oom +268435456,2,0.676583 +268435456,4,0.529026 +268435456,6,0.575875 +536870912,1,oom +536870912,2,oom +536870912,4,1.05184 +536870912,6,1.14013 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,1.94676 +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom +8589934592,1,oom +8589934592,2,oom +8589934592,4,oom +8589934592,6,oom +intersect tests +8388608,1,0.10551 +8388608,2,0.100897 +8388608,4,0.153937 +8388608,6,0.199881 +16777216,1,0.180962 +16777216,2,0.146225 +16777216,4,0.181449 +16777216,6,0.240525 +33554432,1,0.328701 +33554432,2,0.236525 +33554432,4,0.25259 +33554432,6,0.301977 +67108864,1,0.647717 +67108864,2,0.441698 +67108864,4,0.374241 +67108864,6,0.446872 +134217728,1,oom +134217728,2,0.795704 +134217728,4,0.648686 +134217728,6,0.727995 +268435456,1,oom +268435456,2,oom +268435456,4,1.15039 +268435456,6,1.31486 +536870912,1,oom +536870912,2,oom +536870912,4,oom +536870912,6,2.16924 +1073741824,1,oom +1073741824,2,oom +1073741824,4,oom +1073741824,6,oom +2147483648,1,oom +2147483648,2,oom +2147483648,4,oom +2147483648,6,oom +4294967296,1,oom +4294967296,2,oom +4294967296,4,oom +4294967296,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..62d767b --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/results/strong_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,84 @@ +strong_scaling +keycount,gpucount,time +build tests +16777215,1,0.0201001 +16777215,2,0.0187013 +16777215,4,0.0216812 +16777215,6,0.0272865 +33554431,1,0.0398787 +33554431,2,0.0357919 +33554431,4,0.0446525 +33554431,6,0.0536556 +67108863,1,0.0798239 +67108863,2,0.0672225 +67108863,4,0.067372 +67108863,6,0.0937596 +134217727,1,0.146654 +134217727,2,0.131143 +134217727,4,0.128811 +134217727,6,0.174467 +268435455,1,0.281261 +268435455,2,0.25106 +268435455,4,0.245207 +268435455,6,0.322238 +536870911,1,oom +536870911,2,0.492784 +536870911,4,0.544543 +536870911,6,0.652678 +1073741823,1,oom +1073741823,2,oom +1073741823,4,1.19908 +1073741823,6,1.25135 +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom +8589934591,1,oom +8589934591,2,oom +8589934591,4,oom +8589934591,6,oom +intersect tests +8388607,1,0.0251873 +8388607,2,0.0234701 +8388607,4,0.0310979 +8388607,6,0.0396134 +16777215,1,0.0442276 +16777215,2,0.0415968 +16777215,4,0.0570452 +16777215,6,0.0638299 +33554431,1,0.0865465 +33554431,2,0.0792638 +33554431,4,0.0953404 +33554431,6,0.110044 +67108863,1,0.171829 +67108863,2,0.163637 +67108863,4,0.155304 +67108863,6,0.191941 +134217727,1,0.342287 +134217727,2,0.305476 +134217727,4,0.3514 +134217727,6,0.414157 +268435455,1,oom +268435455,2,0.606768 +268435455,4,0.679074 +268435455,6,0.787238 +536870911,1,oom +536870911,2,oom +536870911,4,1.24203 +536870911,6,1.48642 +1073741823,1,oom +1073741823,2,oom +1073741823,4,oom +1073741823,6,oom +2147483647,1,oom +2147483647,2,oom +2147483647,4,oom +2147483647,6,oom +4294967295,1,oom +4294967295,2,oom +4294967295,4,oom +4294967295,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/strong_scaling/strong_scaling.sh b/hash-graph-dehornetify/experiments/summit/strong_scaling/strong_scaling.sh new file mode 100755 index 0000000..1e67641 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/strong_scaling/strong_scaling.sh @@ -0,0 +1,81 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 6) +# gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "strong_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +# $1 is sizeof(keyval) +# $2 is 32-bit vs 64-bit +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile +for i in "${keycounts[@]}" + do + let kc=$((echo 2^$i) | bc) + kc=$((kc / 2)) + # echo "keycount: ${kc}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/noindex_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_managed.txt new file mode 100644 index 0000000..06c7061 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_managed.txt @@ -0,0 +1,40 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..06c7061 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged.txt @@ -0,0 +1,40 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..3af1fbd --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_index_nomanaged_32.txt @@ -0,0 +1,12 @@ +weak_scaling +keycount,gpucount,time +build tests +16777215,1, +33554430,2, +67108860,4,0.243323 +100663290,6,0.380083 +33554431,1, +67108862,2, +134217724,4, +201326586,6, +67108863,1, diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..06c7061 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_managed.txt @@ -0,0 +1,40 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..06c7061 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,40 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..71fe3b5 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,40 @@ +weak_scaling +keycount,gpucount,time +build tests +16777215,1,0.0177664 +33554430,2,0.0319191 +67108860,4,0.0732232 +100663290,6,0.114922 +33554431,1,0.0352295 +67108862,2,0.0624845 +134217724,4,0.118617 +201326586,6,0.254482 +67108863,1,0.0702833 +134217726,2,0.123615 +268435452,4,0.240805 +402653178,6,0.481772 +134217727,1,0.140661 +268435454,2,0.246276 +536870908,4,0.471656 +805306362,6,0.874932 +268435455,1,0.281388 +536870910,2,0.502079 +1073741820,4,1.2029 +1610612730,6,1.93455 +536870911,1,oom +1073741822,2,oom +2147483644,4,oom +3221225466,6,oom +1073741823,1,oom +2147483646,2,oom +4294967292,4,oom +6442450938,6,oom +2147483647,1,oom +4294967294,2,oom +8589934588,4,oom +12884901882,6,oom +4294967295,1,oom +8589934590,2,oom +17179869180,4,oom +25769803770,6,oom +8589934591,1,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged_4.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/build/weak_scaling_noindex_nomanaged_4.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/index_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/noindex_nomanaged.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_managed.txt new file mode 100644 index 0000000..4ba0238 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_managed.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..4ba0238 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..a0d4ca2 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_index_nomanaged_32.txt @@ -0,0 +1,13 @@ +intersect tests +8388607,1, +16777214,2, +33554428,4, +50331642,6, +16777215,1, +33554430,2, +67108860,4,0.459645 +100663290,6, +33554431,1, +67108862,2, +134217724,4, +201326586,6, diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..4ba0238 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_managed.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..4ba0238 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,41 @@ +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..11b75e7 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,41 @@ +intersect tests +8388607,1,0.0233174 +16777214,2,0.0444959 +33554428,4,0.10146 +50331642,6,0.161902 +16777215,1,0.0491827 +33554430,2,0.084906 +67108860,4,0.174984 +100663290,6,0.292526 +33554431,1,0.0971552 +67108862,2,0.183764 +134217724,4,0.312764 +201326586,6,0.558104 +67108863,1,0.171735 +134217726,2,0.30579 +268435452,4,0.676962 +402653178,6,1.03634 +134217727,1,0.342328 +268435454,2,0.608354 +536870908,4,1.33508 +805306362,6,2.23539 +268435455,1,oom +536870910,2,oom +1073741820,4,oom +1610612730,6,oom +536870911,1,oom +1073741822,2,oom +2147483644,4,oom +3221225466,6,oom +1073741823,1,oom +2147483646,2,oom +4294967292,4,oom +6442450938,6,oom +2147483647,1,oom +4294967294,2,oom +8589934588,4,oom +12884901882,6,oom +4294967295,1,oom +8589934590,2,oom +17179869180,4,oom +25769803770,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_4.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/intersect/weak_scaling_noindex_nomanaged_4.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_.txt new file mode 100644 index 0000000..3de6e47 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_.txt @@ -0,0 +1,3 @@ +weak_scaling +keycount,gpucount,time +build tests diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_managed.txt new file mode 100644 index 0000000..fc7b102 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_managed.txt @@ -0,0 +1,84 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +51539607552,6,oom +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged.txt new file mode 100644 index 0000000..fc7b102 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged.txt @@ -0,0 +1,84 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +51539607552,6,oom +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged_32.txt new file mode 100644 index 0000000..3d96b16 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_index_nomanaged_32.txt @@ -0,0 +1,56 @@ +weak_scaling +keycount,gpucount,time +build tests +16777215,1, +33554430,2, +67108860,4,0.243323 +100663290,6,0.380083 +33554431,1, +67108862,2, +134217724,4, +201326586,6, +67108863,1, +134217726,2,0.441943 +268435452,4, +402653178,6, +134217727,1, +268435454,2, +536870908,4, +805306362,6, +268435455,1,oom +536870910,2,oom +1073741820,4,oom +1610612730,6,oom +536870911,1,oom +1073741822,2,oom +2147483644,4,oom +3221225466,6,oom +1073741823,1,oom +2147483646,2,oom +4294967292,4,oom +6442450938,6,oom +2147483647,1,oom +4294967294,2,oom +8589934588,4,oom +12884901882,6,oom +4294967295,1,oom +8589934590,2,oom +17179869180,4,oom +25769803770,6,oom +8589934591,1,oom +17179869182,2,oom +34359738364,4,oom +51539607546,6,oom +intersect tests +8388607,1, +16777214,2, +33554428,4, +50331642,6, +16777215,1, +33554430,2, +67108860,4,0.459645 +100663290,6, +33554431,1, +67108862,2, +134217724,4, +201326586,6, diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_managed.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_managed.txt new file mode 100644 index 0000000..fc7b102 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_managed.txt @@ -0,0 +1,84 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1, +268435456,2, +536870912,4, +805306368,6, +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom +8589934592,1,oom +17179869184,2,oom +34359738368,4,oom +51539607552,6,oom +intersect tests +8388608,1, +16777216,2, +33554432,4, +50331648,6, +16777216,1, +33554432,2, +67108864,4, +100663296,6, +33554432,1, +67108864,2, +134217728,4, +201326592,6, +67108864,1, +134217728,2, +268435456,4, +402653184,6, +134217728,1,oom +268435456,2,oom +536870912,4,oom +805306368,6,oom +268435456,1,oom +536870912,2,oom +1073741824,4,oom +1610612736,6,oom +536870912,1,oom +1073741824,2,oom +2147483648,4,oom +3221225472,6,oom +1073741824,1,oom +2147483648,2,oom +4294967296,4,oom +6442450944,6,oom +2147483648,1,oom +4294967296,2,oom +8589934592,4,oom +12884901888,6,oom +4294967296,1,oom +8589934592,2,oom +17179869184,4,oom +25769803776,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_managed_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_managed_32.txt new file mode 100644 index 0000000..e69de29 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged.txt new file mode 100644 index 0000000..db836c3 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged.txt @@ -0,0 +1,18 @@ +weak_scaling +keycount,gpucount,time +build tests +16777216,1,0.0851772 +33554432,2,0.121721 +67108864,4,0.211312 +100663296,6,0.351578 +33554432,1,0.143053 +67108864,2,0.189815 +134217728,4,0.320321 +201326592,6,0.521299 +67108864,1,0.259133 +134217728,2,0.334976 +268435456,4,0.553515 +402653184,6,0.857243 +134217728,1,0.555585 +268435456,2,0.680266 +536870912,4,1.29227 diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt new file mode 100644 index 0000000..01ecb45 --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/results/weak_scaling_noindex_nomanaged_32.txt @@ -0,0 +1,84 @@ +weak_scaling +keycount,gpucount,time +build tests +16777215,1,0.0177664 +33554430,2,0.0319191 +67108860,4,0.0732232 +100663290,6,0.114922 +33554431,1,0.0352295 +67108862,2,0.0624845 +134217724,4,0.118617 +201326586,6,0.254482 +67108863,1,0.0702833 +134217726,2,0.123615 +268435452,4,0.240805 +402653178,6,0.481772 +134217727,1,0.140661 +268435454,2,0.246276 +536870908,4,0.471656 +805306362,6,0.874932 +268435455,1,0.281388 +536870910,2,0.502079 +1073741820,4,1.2029 +1610612730,6,1.93455 +536870911,1,oom +1073741822,2,oom +2147483644,4,oom +3221225466,6,oom +1073741823,1,oom +2147483646,2,oom +4294967292,4,oom +6442450938,6,oom +2147483647,1,oom +4294967294,2,oom +8589934588,4,oom +12884901882,6,oom +4294967295,1,oom +8589934590,2,oom +17179869180,4,oom +25769803770,6,oom +8589934591,1,oom +17179869182,2,oom +34359738364,4,oom +51539607546,6,oom +intersect tests +8388607,1,0.0233174 +16777214,2,0.0444959 +33554428,4,0.10146 +50331642,6,0.161902 +16777215,1,0.0491827 +33554430,2,0.084906 +67108860,4,0.174984 +100663290,6,0.292526 +33554431,1,0.0971552 +67108862,2,0.183764 +134217724,4,0.312764 +201326586,6,0.558104 +67108863,1,0.171735 +134217726,2,0.30579 +268435452,4,0.676962 +402653178,6,1.03634 +134217727,1,0.342328 +268435454,2,0.608354 +536870908,4,1.33508 +805306362,6,2.23539 +268435455,1,oom +536870910,2,oom +1073741820,4,oom +1610612730,6,oom +536870911,1,oom +1073741822,2,oom +2147483644,4,oom +3221225466,6,oom +1073741823,1,oom +2147483646,2,oom +4294967292,4,oom +6442450938,6,oom +2147483647,1,oom +4294967294,2,oom +8589934588,4,oom +12884901882,6,oom +4294967295,1,oom +8589934590,2,oom +17179869180,4,oom +25769803770,6,oom diff --git a/hash-graph-dehornetify/experiments/summit/weak_scaling/weak_scaling.sh b/hash-graph-dehornetify/experiments/summit/weak_scaling/weak_scaling.sh new file mode 100755 index 0000000..d38922a --- /dev/null +++ b/hash-graph-dehornetify/experiments/summit/weak_scaling/weak_scaling.sh @@ -0,0 +1,84 @@ +keycounts=($(seq 24 1 33)) +# gpucounts=($(seq 0 1 4)) +gpucounts=(1 2 4 6) +# gpucounts=(1 2 4 8) + +execpath="../../build" +resultsfile=$1 + +bincount=16000 + +# sed -i 's/^\/\/.*#define INDEX_TRACK/#define INDEX_TRACK/' ../../include/MultiHashGraph.cuh +# make -C $execpath multi-hash + +# rm $resultsfile +# echo "keycount,gpucount,time" >> $resultsfile +echo "weak_scaling" +echo "keycount,gpucount,time" +echo "build tests" +# echo "build tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + # echo "keycount / dev: ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc build | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + +echo "intersect tests" +# echo "intersect tests" >> $resultsfile + +for i in "${keycounts[@]}" + do + let kcdev=$((echo 2^$i) | bc) + kcdev=$((kcdev / 2)) + # echo "keycount / dev : ${kcdev}" + for j in "${gpucounts[@]}" + do + # let gc=$((echo 2^$j) | bc) + let gc=$j + # echo "gpucount: ${gc}" + + let kc=$(($kcdev * $gc)) + + # internal cuda malloc + keys + hashes + keyBinBuff + let gigs=$((echo "((($kc * $1) + ($kc * $2) + (2 * $kc * $1) + (2 * $kc * 8)) + ($kc * $2) + ($kc * $2) + ($kc * $1)) / 2^30") | bc) + let gpureq=$((echo "($gigs * 2 + 16) / 16") | bc) + + if (( $gpureq > $gc )) ; then + echo "${kc},${gc},oom" + continue + fi + + ans=$(./$execpath/multi-hash $kc $kc $bincount $gc $bincount nocheck $kc intersect | grep "time") + tokens=( $ans ) + time=${tokens[3]} + + # echo "${kc},${gc},${time}" >> $resultsfile + echo "${kc},${gc},${time}" + done + done + diff --git a/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_managed.pdf b/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_managed.pdf new file mode 100644 index 0000000..36d2049 Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_nomanaged.pdf new file mode 100644 index 0000000..8f3e635 Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_strong_scaling_build_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_managed.pdf b/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_managed.pdf new file mode 100644 index 0000000..620ddbd Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_nomanaged.pdf new file mode 100644 index 0000000..49f34f8 Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_strong_scaling_intersect_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_managed.pdf b/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_managed.pdf new file mode 100644 index 0000000..37c491d Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_nomanaged.pdf new file mode 100644 index 0000000..8349f03 Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_weak_scaling_build_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_managed.pdf b/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_managed.pdf new file mode 100644 index 0000000..0e6a7bd Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_managed.pdf differ diff --git a/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_nomanaged.pdf b/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_nomanaged.pdf new file mode 100644 index 0000000..fa9a4c2 Binary files /dev/null and b/hash-graph-dehornetify/experiments/summit_weak_scaling_intersect_noindex_nomanaged.pdf differ diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.cproject b/hash-graph-dehornetify/externals/cub-1.8.0/.cproject new file mode 100644 index 0000000..e76d1da --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.cproject @@ -0,0 +1,1223 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.project b/hash-graph-dehornetify/externals/cub-1.8.0/.project new file mode 100644 index 0000000..7aca9e0 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.project @@ -0,0 +1,27 @@ + + + GIT_CUB + + + + + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + clean,full,incremental, + + + + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + + + + org.eclipse.cdt.core.cnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature + org.eclipse.cdt.core.ccnature + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.settings/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/.gitignore new file mode 100644 index 0000000..d81d4c4 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/.gitignore @@ -0,0 +1 @@ +/language.settings.xml diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.codan.core.prefs b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.codan.core.prefs new file mode 100644 index 0000000..64da777 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.codan.core.prefs @@ -0,0 +1,72 @@ +eclipse.preferences.version=1 +org.eclipse.cdt.codan.checkers.errnoreturn=Warning +org.eclipse.cdt.codan.checkers.errnoreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false} +org.eclipse.cdt.codan.checkers.errreturnvalue=Error +org.eclipse.cdt.codan.checkers.errreturnvalue.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.checkers.nocommentinside=-Error +org.eclipse.cdt.codan.checkers.nocommentinside.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.checkers.nolinecomment=-Error +org.eclipse.cdt.codan.checkers.nolinecomment.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.checkers.noreturn=Error +org.eclipse.cdt.codan.checkers.noreturn.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},implicit\=>false} +org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation=Error +org.eclipse.cdt.codan.internal.checkers.AbstractClassCreation.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem=Error +org.eclipse.cdt.codan.internal.checkers.AmbiguousProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem=Warning +org.eclipse.cdt.codan.internal.checkers.AssignmentInConditionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem=Error +org.eclipse.cdt.codan.internal.checkers.AssignmentToItselfProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem=Warning +org.eclipse.cdt.codan.internal.checkers.CaseBreakProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},no_break_comment\=>"no break",last_case_param\=>true,empty_case_param\=>false} +org.eclipse.cdt.codan.internal.checkers.CatchByReference=Warning +org.eclipse.cdt.codan.internal.checkers.CatchByReference.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},unknown\=>false,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem=Error +org.eclipse.cdt.codan.internal.checkers.CircularReferenceProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization=Warning +org.eclipse.cdt.codan.internal.checkers.ClassMembersInitialization.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},skip\=>true} +org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.FieldResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.FunctionResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.InvalidArguments=Error +org.eclipse.cdt.codan.internal.checkers.InvalidArguments.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem=Error +org.eclipse.cdt.codan.internal.checkers.InvalidTemplateArgumentsProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem=Error +org.eclipse.cdt.codan.internal.checkers.LabelStatementNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem=Error +org.eclipse.cdt.codan.internal.checkers.MemberDeclarationNotFoundProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.MethodResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker=-Info +org.eclipse.cdt.codan.internal.checkers.NamingConventionFunctionChecker.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},pattern\=>"^[a-z]",macro\=>true,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem=Warning +org.eclipse.cdt.codan.internal.checkers.NonVirtualDestructorProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.OverloadProblem=Error +org.eclipse.cdt.codan.internal.checkers.OverloadProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem=Error +org.eclipse.cdt.codan.internal.checkers.RedeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem=Error +org.eclipse.cdt.codan.internal.checkers.RedefinitionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem=-Warning +org.eclipse.cdt.codan.internal.checkers.ReturnStyleProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem=-Warning +org.eclipse.cdt.codan.internal.checkers.ScanfFormatStringSecurityProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem=Warning +org.eclipse.cdt.codan.internal.checkers.StatementHasNoEffectProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>()} +org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem=Warning +org.eclipse.cdt.codan.internal.checkers.SuggestedParenthesisProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},paramNot\=>false} +org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem=Warning +org.eclipse.cdt.codan.internal.checkers.SuspiciousSemicolonProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},else\=>false,afterelse\=>false} +org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.TypeResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedFunctionDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true} +org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedStaticFunctionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true} +org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem=Warning +org.eclipse.cdt.codan.internal.checkers.UnusedVariableDeclarationProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true},macro\=>true,exceptions\=>("@(\#)","$Id")} +org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem=Error +org.eclipse.cdt.codan.internal.checkers.VariableResolutionProblem.params={launchModes\=>{RUN_ON_FULL_BUILD\=>true,RUN_ON_INC_BUILD\=>true,RUN_ON_FILE_OPEN\=>false,RUN_ON_FILE_SAVE\=>false,RUN_AS_YOU_TYPE\=>true,RUN_ON_DEMAND\=>true}} +useParentScope=false diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.core.prefs b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.core.prefs new file mode 100644 index 0000000..80b8e65 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.core.prefs @@ -0,0 +1,177 @@ +eclipse.preferences.version=1 +indexer/indexAllFiles=true +indexer/indexAllHeaderVersions=false +indexer/indexAllVersionsSpecificHeaders= +indexer/indexOnOpen=false +indexer/indexUnusedHeadersWithAlternateLang=false +indexer/indexUnusedHeadersWithDefaultLang=true +indexer/indexerId=org.eclipse.cdt.core.fastIndexer +indexer/skipFilesLargerThanMB=8 +indexer/skipImplicitReferences=false +indexer/skipIncludedFilesLargerThanMB=16 +indexer/skipMacroReferences=false +indexer/skipReferences=false +indexer/skipTypeReferences=false +indexer/useHeuristicIncludeResolution=true +org.eclipse.cdt.core.formatter.alignment_for_arguments_in_method_invocation=16 +org.eclipse.cdt.core.formatter.alignment_for_assignment=16 +org.eclipse.cdt.core.formatter.alignment_for_base_clause_in_type_declaration=48 +org.eclipse.cdt.core.formatter.alignment_for_binary_expression=16 +org.eclipse.cdt.core.formatter.alignment_for_compact_if=0 +org.eclipse.cdt.core.formatter.alignment_for_conditional_expression=48 +org.eclipse.cdt.core.formatter.alignment_for_conditional_expression_chain=18 +org.eclipse.cdt.core.formatter.alignment_for_constructor_initializer_list=0 +org.eclipse.cdt.core.formatter.alignment_for_declarator_list=16 +org.eclipse.cdt.core.formatter.alignment_for_enumerator_list=48 +org.eclipse.cdt.core.formatter.alignment_for_expression_list=0 +org.eclipse.cdt.core.formatter.alignment_for_expressions_in_array_initializer=16 +org.eclipse.cdt.core.formatter.alignment_for_member_access=0 +org.eclipse.cdt.core.formatter.alignment_for_overloaded_left_shift_chain=16 +org.eclipse.cdt.core.formatter.alignment_for_parameters_in_method_declaration=48 +org.eclipse.cdt.core.formatter.alignment_for_throws_clause_in_method_declaration=48 +org.eclipse.cdt.core.formatter.brace_position_for_array_initializer=next_line +org.eclipse.cdt.core.formatter.brace_position_for_block=next_line +org.eclipse.cdt.core.formatter.brace_position_for_block_in_case=end_of_line +org.eclipse.cdt.core.formatter.brace_position_for_method_declaration=next_line +org.eclipse.cdt.core.formatter.brace_position_for_namespace_declaration=end_of_line +org.eclipse.cdt.core.formatter.brace_position_for_switch=end_of_line +org.eclipse.cdt.core.formatter.brace_position_for_type_declaration=next_line +org.eclipse.cdt.core.formatter.comment.min_distance_between_code_and_line_comment=1 +org.eclipse.cdt.core.formatter.comment.never_indent_line_comments_on_first_column=true +org.eclipse.cdt.core.formatter.comment.preserve_white_space_between_code_and_line_comments=true +org.eclipse.cdt.core.formatter.compact_else_if=true +org.eclipse.cdt.core.formatter.continuation_indentation=1 +org.eclipse.cdt.core.formatter.continuation_indentation_for_array_initializer=1 +org.eclipse.cdt.core.formatter.format_guardian_clause_on_one_line=false +org.eclipse.cdt.core.formatter.indent_access_specifier_compare_to_type_header=false +org.eclipse.cdt.core.formatter.indent_access_specifier_extra_spaces=0 +org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_access_specifier=true +org.eclipse.cdt.core.formatter.indent_body_declarations_compare_to_namespace_header=false +org.eclipse.cdt.core.formatter.indent_breaks_compare_to_cases=true +org.eclipse.cdt.core.formatter.indent_declaration_compare_to_template_header=false +org.eclipse.cdt.core.formatter.indent_empty_lines=false +org.eclipse.cdt.core.formatter.indent_statements_compare_to_block=true +org.eclipse.cdt.core.formatter.indent_statements_compare_to_body=true +org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_cases=true +org.eclipse.cdt.core.formatter.indent_switchstatements_compare_to_switch=false +org.eclipse.cdt.core.formatter.indentation.size=4 +org.eclipse.cdt.core.formatter.insert_new_line_after_opening_brace_in_array_initializer=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_after_template_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_at_end_of_file_if_missing=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_before_catch_in_try_statement=insert +org.eclipse.cdt.core.formatter.insert_new_line_before_closing_brace_in_array_initializer=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_before_colon_in_constructor_initializer_list=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_before_else_in_if_statement=insert +org.eclipse.cdt.core.formatter.insert_new_line_before_identifier_in_function_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_before_while_in_do_statement=do not insert +org.eclipse.cdt.core.formatter.insert_new_line_in_empty_block=insert +org.eclipse.cdt.core.formatter.insert_space_after_assignment_operator=insert +org.eclipse.cdt.core.formatter.insert_space_after_binary_operator=insert +org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_arguments=insert +org.eclipse.cdt.core.formatter.insert_space_after_closing_angle_bracket_in_template_parameters=insert +org.eclipse.cdt.core.formatter.insert_space_after_closing_brace_in_block=insert +org.eclipse.cdt.core.formatter.insert_space_after_closing_paren_in_cast=insert +org.eclipse.cdt.core.formatter.insert_space_after_colon_in_base_clause=insert +org.eclipse.cdt.core.formatter.insert_space_after_colon_in_case=insert +org.eclipse.cdt.core.formatter.insert_space_after_colon_in_conditional=insert +org.eclipse.cdt.core.formatter.insert_space_after_colon_in_labeled_statement=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_array_initializer=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_base_types=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_declarator_list=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_enum_declarations=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_expression_list=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_parameters=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_declaration_throws=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_method_invocation_arguments=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_arguments=insert +org.eclipse.cdt.core.formatter.insert_space_after_comma_in_template_parameters=insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_arguments=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_angle_bracket_in_template_parameters=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_brace_in_array_initializer=insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_bracket=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_cast=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_catch=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_exception_specification=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_for=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_if=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_method_invocation=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_switch=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_opening_paren_in_while=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_postfix_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_prefix_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_after_question_in_conditional=insert +org.eclipse.cdt.core.formatter.insert_space_after_semicolon_in_for=insert +org.eclipse.cdt.core.formatter.insert_space_after_unary_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_assignment_operator=insert +org.eclipse.cdt.core.formatter.insert_space_before_binary_operator=insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_arguments=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_angle_bracket_in_template_parameters=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_brace_in_array_initializer=insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_bracket=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_cast=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_catch=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_exception_specification=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_for=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_if=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_method_invocation=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_parenthesized_expression=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_switch=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_closing_paren_in_while=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_colon_in_base_clause=insert +org.eclipse.cdt.core.formatter.insert_space_before_colon_in_case=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_colon_in_conditional=insert +org.eclipse.cdt.core.formatter.insert_space_before_colon_in_default=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_colon_in_labeled_statement=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_array_initializer=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_base_types=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_declarator_list=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_enum_declarations=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_expression_list=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_parameters=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_declaration_throws=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_method_invocation_arguments=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_arguments=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_comma_in_template_parameters=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_arguments=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_angle_bracket_in_template_parameters=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_array_initializer=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_block=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_method_declaration=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_namespace_declaration=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_switch=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_brace_in_type_declaration=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_bracket=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_catch=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_exception_specification=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_for=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_if=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_method_invocation=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_parenthesized_expression=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_switch=insert +org.eclipse.cdt.core.formatter.insert_space_before_opening_paren_in_while=insert +org.eclipse.cdt.core.formatter.insert_space_before_postfix_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_prefix_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_question_in_conditional=insert +org.eclipse.cdt.core.formatter.insert_space_before_semicolon=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_semicolon_in_for=do not insert +org.eclipse.cdt.core.formatter.insert_space_before_unary_operator=do not insert +org.eclipse.cdt.core.formatter.insert_space_between_empty_braces_in_array_initializer=do not insert +org.eclipse.cdt.core.formatter.insert_space_between_empty_brackets=do not insert +org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_exception_specification=do not insert +org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_declaration=do not insert +org.eclipse.cdt.core.formatter.insert_space_between_empty_parens_in_method_invocation=do not insert +org.eclipse.cdt.core.formatter.join_wrapped_lines=true +org.eclipse.cdt.core.formatter.keep_else_statement_on_same_line=false +org.eclipse.cdt.core.formatter.keep_empty_array_initializer_on_one_line=false +org.eclipse.cdt.core.formatter.keep_imple_if_on_one_line=true +org.eclipse.cdt.core.formatter.keep_then_statement_on_same_line=false +org.eclipse.cdt.core.formatter.lineSplit=80 +org.eclipse.cdt.core.formatter.number_of_empty_lines_to_preserve=1 +org.eclipse.cdt.core.formatter.put_empty_statement_on_new_line=true +org.eclipse.cdt.core.formatter.tabulation.char=space +org.eclipse.cdt.core.formatter.tabulation.size=4 +org.eclipse.cdt.core.formatter.use_tabs_only_for_leading_indentations=false diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.ui.prefs b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.ui.prefs new file mode 100644 index 0000000..ca73f82 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.cdt.ui.prefs @@ -0,0 +1,3 @@ +eclipse.preferences.version=1 +formatter_profile=_B40C +formatter_settings_version=1 diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.core.runtime.prefs b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.core.runtime.prefs new file mode 100644 index 0000000..2e6330e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/.settings/org.eclipse.core.runtime.prefs @@ -0,0 +1,4 @@ +content-types/enabled=true +content-types/org.eclipse.cdt.core.cxxHeader/file-extensions=cuh +content-types/org.eclipse.cdt.core.cxxSource/file-extensions=cu +eclipse.preferences.version=1 diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/CHANGE_LOG.TXT b/hash-graph-dehornetify/externals/cub-1.8.0/CHANGE_LOG.TXT new file mode 100644 index 0000000..ed7f395 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/CHANGE_LOG.TXT @@ -0,0 +1,403 @@ +1.8.0 02/15/2018 + - API change: change to the interfaces of cub::ShuffleIndex, cub::ShuffleUp, and + cub::ShuffleDown to better compute the PTX shfl control constant for + logical warps smaller than 32 threads + - Bug fixes: + - Issue #112: Bug in WarpScan's broadcast of warp-wide aggregate for + logical warps < 32 threads + + +//----------------------------------------------------------------------------- + +1.7.5 02/08/2018 + - Added radix-sorting support for __half keys + - Updated sorting policies for improved 8b-key performance + - Bug fixes: + - Syntax tweaks to mollify Clang + - Issue #127: DeviceRunLengthEncode::Encode returns wrong results + - Issue #128: 7-bit sorting passes fail for sm61 w/ large-values + + +//----------------------------------------------------------------------------- + +1.7.4 09/20/2017 + - Bug fixes: + - Issue #114: Can't pair non-trivially-constructible values in radix sort + - Issue #115: WarpReduce segmented reduction broken in CUDA 9 for logical warp sizes < 32 + +//----------------------------------------------------------------------------- + +1.7.3 08/28/2017 + - Bug fixes: + - Issue #110: DeviceHistogram null-pointer exception bug for iterator inputs + +//----------------------------------------------------------------------------- + +1.7.2 08/26/2017 + - Bug fixes: + - Issue #104: Device-wide reduction is now "run-to-run" deterministic for + pseudo-associative reduction operators (like floating point addition) + +//----------------------------------------------------------------------------- + +1.7.1 08/18/2017 + - Updated Volta radix sorting tuning policies + - Bug fixes: + - Issue #104 (uint64_t warp-reduce broken for cub 1.7.0 on cuda 8 and older) + - Issue #103 (Can't mix Thrust 9.0 and CUB) + - Issue #102 (CUB pulls in windows.h which defines min/max macros that conflict with std::min/std::max) + - Issue #99 (Radix sorting crashes NVCC on Windows 10 for SM52) + - Issue #98 (cuda-memcheck: --tool initcheck failed with lineOfSight) + - Issue #94 (Git clone size) + - Issue #93 (accept iterators for segment offsets) + - Issue #87 (CUB uses anonymous unions which is not valid C++) + - Issue #44 (Check for C++ 11 should be changed that Visual Studio 2013 is also recognized as C++ 11 capable) + +//----------------------------------------------------------------------------- + +1.7.0 06/07/2017 + - Compatible with CUDA9 and SM7.x (Volta) independent thread scheduling + - API change: remove cub::WarpAll() and cub::WarpAny(). These functions served to + emulate __all and __any functionality for SM1.x devices, which did not have those + operations. However, the SM1.x devices are now deprecated in CUDA, and the + interfaces of the these two functions are now lacking the lane-mask needed + for collectives to run on Volta SMs having independent thread scheduling. + - Bug fixes: + - Issue #86 Incorrect results with ReduceByKey + +//----------------------------------------------------------------------------- + +1.6.4 12/06/2016 + - Updated sm_5x, sm_6x tuning policies for radix sorting (3.5B and 3.4B + 32b keys/s on TitanX and GTX 1080, respectively) + - Bug fixes: + - Restore fence work-around for scan (reduce-by-key, etc.) hangs + in CUDA 8.5 + - Issue 65: DeviceSegmentedRadixSort should allow inputs to have + pointer-to-const type + - Mollify Clang device-side warnings + - Remove out-dated VC project files + +//----------------------------------------------------------------------------- + +1.6.3 11/20/2016 + - API change: BlockLoad and BlockStore are now templated by the local + data type, instead of the Iterator type. This allows for output iterators + having \p void as their \p value_type (e.g., discard iterators). + - Updated GP100 tuning policies for radix sorting (6.2B 32b keys/s) + - Bug fixes: + - Issue #74: Warpreduce executes reduction operator for out-of-bounds items + - Issue #72 (cub:InequalityWrapper::operator() should be non-const) + - Issue #71 (KeyVairPair won't work if Key has non-trivial ctor) + - Issue #70 1.5.3 breaks BlockScan API. Retroactively reversioned + from v1.5.3 -> v1.6 to appropriately indicate API change. + - Issue #69 cub::BlockStore::Store doesn't compile if OutputIteratorT::value_type != T + - Issue #68 (cub::TilePrefixCallbackOp::WarpReduce doesn't permit ptx + arch specialization) + - Improved support for Win32 platforms (warnings, alignment, etc) + +//----------------------------------------------------------------------------- + +1.6.2 (was 1.5.5) 10/25/2016 + - Updated Pascal tuning policies for radix sorting + - Bug fixes: + - Fix for arm64 compilation of caching allocator + +//----------------------------------------------------------------------------- + +1.6.1 (was 1.5.4) 10/14/2016 + - Bug fixes: + - Fix for radix sorting bug introduced by scan refactorization + +//----------------------------------------------------------------------------- + +1.6.0 (was 1.5.3) 10/11/2016 + - API change: Device/block/warp-wide exclusive scans have been revised to now + accept an "initial value" (instead of an "identity value") for seeding the + computation with an arbitrary prefix. + - API change: Device-wide reductions and scans can now have input sequence types that are + different from output sequence types (as long as they are coercible) + value") for seeding the computation with an arbitrary prefix + - Reduce repository size (move doxygen binary to doc repository) + - Minor reductions in block-scan instruction count + - Bug fixes: + - Issue #55: warning in cub/device/dispatch/dispatch_reduce_by_key.cuh + - Issue #59: cub::DeviceScan::ExclusiveSum can't prefix sum of float into double + - Issue #58: Infinite loop in cub::CachingDeviceAllocator::NearestPowerOf + - Issue #47: Caching allocator needs to clean up cuda error upon successful retry + - Issue #46: Very high amount of needed memory from the cub::DeviceHistogram::HistogramEven routine + - Issue #45: Caching Device Allocator fails with debug output enabled + - Fix for generic-type reduce-by-key warpscan (sm3.x and newer) + +//----------------------------------------------------------------------------- + +1.5.2 03/21/2016 + - Improved medium-size scan performance for sm5x (Maxwell) + - Refactored caching allocator for device memory + - Spends less time locked + - Failure to allocate a block from the runtime will retry once after + freeing cached allocations + - Now respects max-bin (issue where blocks in excess of max-bin were + still being retained in free cache) + - Uses C++11 mutex when available + - Bug fixes: + - Fix for generic-type reduce-by-key warpscan (sm3.x and newer) + +//----------------------------------------------------------------------------- + +1.5.1 12/28/2015 + - Bug fixes: + - Fix for incorrect DeviceRadixSort output for some small problems on + Maxwell SM52 architectures + - Fix for macro redefinition warnings when compiling with Thrust sort + +//----------------------------------------------------------------------------- + +1.5.0 12/14/2015 + - New Features: + - Added new segmented device-wide operations for device-wide sort and + reduction primitives. + - Bug fixes: + - Fix for Git Issue 36 (Compilation error with GCC 4.8.4 nvcc 7.0.27) and + Forums thread (ThreadLoad generates compiler errors when loading from + pointer-to-const) + - Fix for Git Issue 29 (DeviceRadixSort::SortKeys yields compiler + errors) + - Fix for Git Issue 26 (CUDA error: misaligned address after + cub::DeviceRadixSort::SortKeys()) + - Fix for incorrect/crash on 0-length problems, e.g., Git Issue 25 (Floating + point exception (core dumped) during cub::DeviceRadixSort::SortKeys) + - Fix for CUDA 7.5 issues on SM 5.2 with SHFL-based warp-scan and warp-reduction + on non-primitive data types (e.g., user-defined structs) + - Fix for small radix sorting problems where 0 temporary bytes were + required and users code was invoking malloc(0) on some systems where + that returns NULL. (Impl assumed was asking for size again and was not + running the sort.) + +//----------------------------------------------------------------------------- + +1.4.1 04/13/2015 + - Bug fixes: + - Fixes for CUDA 7.0 issues with SHFL-based warp-scan and warp-reduction + on non-primitive data types (e.g., user-defined structs) + - Fixes for minor CUDA 7.0 performance regressions in cub::DeviceScan, + DeviceReduceByKey + - Fixes to allow cub::DeviceRadixSort and cub::BlockRadixSort on bool types + - Remove requirement for callers to define the CUB_CDP macro + when invoking CUB device-wide rountines using CUDA dynamic parallelism + - Fix for headers not being included in the proper order (or missing includes) + for some block-wide functions + +//----------------------------------------------------------------------------- + +1.4.0 03/18/2015 + - New Features: + - Support and performance tuning for new Maxwell GPU architectures + - Updated cub::DeviceHistogram implementation that provides the same + "histogram-even" and "histogram-range" functionality as IPP/NPP. + Provides extremely fast and, perhaps more importantly, very + uniform performance response across diverse real-world datasets, + including pathological (homogeneous) sample distributions (resilience) + - New cub::DeviceSpmv methods for multiplying sparse matrices by + dense vectors, load-balanced using a merge-based parallel decomposition. + - New cub::DeviceRadixSort sorting entry-points that always return + the sorted output into the specified buffer (as opposed to the + cub::DoubleBuffer in which it could end up in either buffer) + - New cub::DeviceRunLengthEncode::NonTrivialRuns for finding the starting + offsets and lengths of all non-trivial runs (i.e., length > 1) of keys in + a given sequence. (Useful for top-down partitioning algorithms like + MSD sorting of very-large keys.) + +//----------------------------------------------------------------------------- + +1.3.2 07/28/2014 + - Bug fixes: + - Fix for cub::DeviceReduce where reductions of small problems + (small enough to only dispatch a single thread block) would run in + the default stream (stream zero) regardless of whether an alternate + stream was specified. + +//----------------------------------------------------------------------------- + +1.3.1 05/23/2014 + - Bug fixes: + - Workaround for a benign WAW race warning reported by cuda-memcheck + in BlockScan specialized for BLOCK_SCAN_WARP_SCANS algorithm. + - Fix for bug in DeviceRadixSort where the algorithm may sort more + key bits than the caller specified (up to the nearest radix digit). + - Fix for ~3% DeviceRadixSort performance regression on Kepler and + Fermi that was introduced in v1.3.0. + +//----------------------------------------------------------------------------- + +1.3.0 05/12/2014 + - New features: + - CUB's collective (block-wide, warp-wide) primitives underwent a minor + interface refactoring: + - To provide the appropriate support for multidimensional thread blocks, + The interfaces for collective classes are now template-parameterized + by X, Y, and Z block dimensions (with BLOCK_DIM_Y and BLOCK_DIM_Z being + optional, and BLOCK_DIM_X replacing BLOCK_THREADS). Furthermore, the + constructors that accept remapped linear thread-identifiers have been + removed: all primitives now assume a row-major thread-ranking for + multidimensional thread blocks. + - To allow the host program (compiled by the host-pass) to + accurately determine the device-specific storage requirements for + a given collective (compiled for each device-pass), the interfaces + for collective classes are now (optionally) template-parameterized + by the desired PTX compute capability. This is useful when + aliasing collective storage to shared memory that has been + allocated dynamically by the host at the kernel call site. + - Most CUB programs having typical 1D usage should not require any + changes to accomodate these updates. + - Added new "combination" WarpScan methods for efficiently computing + both inclusive and exclusive prefix scans (and sums). + - Bug fixes: + - Fixed bug in cub::WarpScan (which affected cub::BlockScan and + cub::DeviceScan) where incorrect results (e.g., NAN) would often be + returned when parameterized for floating-point types (fp32, fp64). + - Workaround-fix for ptxas error when compiling with with -G flag on Linux + (for debug instrumentation) + - Misc. workaround-fixes for certain scan scenarios (using custom + scan operators) where code compiled for SM1x is run on newer + GPUs of higher compute-capability: the compiler could not tell + which memory space was being used collective operations and was + mistakenly using global ops instead of shared ops. + +//----------------------------------------------------------------------------- + +1.2.3 04/01/2014 + - Bug fixes: + - Fixed access violation bug in DeviceReduce::ReduceByKey for non-primitive value types + - Fixed code-snippet bug in ArgIndexInputIteratorT documentation + +//----------------------------------------------------------------------------- + +1.2.2 03/03/2014 + - New features: + - Added MS VC++ project solutions for device-wide and block-wide examples + - Performance: + - Added a third algorithmic variant of cub::BlockReduce for improved performance + when using commutative operators (e.g., numeric addition) + - Bug fixes: + - Fixed bug where inclusion of Thrust headers in a certain order prevented CUB device-wide primitives from working properly + +//----------------------------------------------------------------------------- + +1.2.0 02/25/2014 + - New features: + - Added device-wide reduce-by-key (DeviceReduce::ReduceByKey, DeviceReduce::RunLengthEncode) + - Performance + - Improved DeviceScan, DeviceSelect, DevicePartition performance + - Documentation and testing: + - Compatible with CUDA 6.0 + - Added performance-portability plots for many device-wide primitives to doc + - Update doc and tests to reflect iterator (in)compatibilities with CUDA 5.0 (and older) and Thrust 1.6 (and older). + - Bug fixes + - Revised the operation of temporary tile status bookkeeping for DeviceScan (and similar) to be safe for current code run on future platforms (now uses proper fences) + - Fixed DeviceScan bug where Win32 alignment disagreements between host and device regarding user-defined data types would corrupt tile status + - Fixed BlockScan bug where certain exclusive scans on custom data types for the BLOCK_SCAN_WARP_SCANS variant would return incorrect results for the first thread in the block + - Added workaround for TexRefInputIteratorTto work with CUDA 6.0 + +//----------------------------------------------------------------------------- + +1.1.1 12/11/2013 + - New features: + - Added TexObjInputIteratorT, TexRefInputIteratorT, CacheModifiedInputIteratorT, and CacheModifiedOutputIterator types for loading & storing arbitrary types through the cache hierarchy. Compatible with Thrust API. + - Added descending sorting to DeviceRadixSort and BlockRadixSort + - Added min, max, arg-min, and arg-max to DeviceReduce + - Added DeviceSelect (select-unique, select-if, and select-flagged) + - Added DevicePartition (partition-if, partition-flagged) + - Added generic cub::ShuffleUp(), cub::ShuffleDown(), and cub::ShuffleIndex() for warp-wide communication of arbitrary data types (SM3x+) + - Added cub::MaxSmOccupancy() for accurately determining SM occupancy for any given kernel function pointer + - Performance + - Improved DeviceScan and DeviceRadixSort performance for older architectures (SM10-SM30) + - Interface changes: + - Refactored block-wide I/O (BlockLoad and BlockStore), removing cache-modifiers from their interfaces. The CacheModifiedInputIteratorTand CacheModifiedOutputIterator should now be used with BlockLoad and BlockStore to effect that behavior. + - Rename device-wide "stream_synchronous" param to "debug_synchronous" to avoid confusion about usage + - Documentation and testing: + - Added simple examples of device-wide methods + - Improved doxygen documentation and example snippets + - Improved test coverege to include up to 21,000 kernel variants and 851,000 unit tests (per architecture, per platform) + - Bug fixes + - Fixed misc DeviceScan, BlockScan, DeviceReduce, and BlockReduce bugs when operating on non-primitive types for older architectures SM10-SM13 + - Fixed DeviceScan / WarpReduction bug: SHFL-based segmented reduction producting incorrect results for multi-word types (size > 4B) on Linux + - Fixed BlockScan bug: For warpscan-based scans, not all threads in the first warp were entering the prefix callback functor + - Fixed DeviceRadixSort bug: race condition with key-value pairs for pre-SM35 architectures + - Fixed DeviceRadixSort bug: incorrect bitfield-extract behavior with long keys on 64bit Linux + - Fixed BlockDiscontinuity bug: complation error in for types other than int32/uint32 + - CDP (device-callable) versions of device-wide methods now report the same temporary storage allocation size requirement as their host-callable counterparts + + +//----------------------------------------------------------------------------- + +1.0.2 08/23/2013 + - Corrections to code snippet examples for BlockLoad, BlockStore, and BlockDiscontinuity + - Cleaned up unnecessary/missing header includes. You can now safely #inlude a specific .cuh (instead of cub.cuh) + - Bug/compilation fixes for BlockHistogram + +//----------------------------------------------------------------------------- + +1.0.1 08/08/2013 + - New collective interface idiom (specialize::construct::invoke). + - Added best-in-class DeviceRadixSort. Implements short-circuiting for homogenous digit passes. + - Added best-in-class DeviceScan. Implements single-pass "adaptive-lookback" strategy. + - Significantly improved documentation (with example code snippets) + - More extensive regression test suit for aggressively testing collective variants + - Allow non-trially-constructed types (previously unions had prevented aliasing temporary storage of those types) + - Improved support for Kepler SHFL (collective ops now use SHFL for types larger than 32b) + - Better code generation for 64-bit addressing within BlockLoad/BlockStore + - DeviceHistogram now supports histograms of arbitrary bins + - Misc. fixes + - Workarounds for SM10 codegen issues in uncommonly-used WarpScan/Reduce specializations + - Updates to accommodate CUDA 5.5 dynamic parallelism + + +//----------------------------------------------------------------------------- + +0.9.4 05/07/2013 + + - Fixed compilation errors for SM10-SM13 + - Fixed compilation errors for some WarpScan entrypoints on SM30+ + - Added block-wide histogram (BlockHistogram256) + - Added device-wide histogram (DeviceHistogram256) + - Added new BlockScan algorithm variant BLOCK_SCAN_RAKING_MEMOIZE, which + trades more register consumption for less shared memory I/O) + - Updates to BlockRadixRank to use BlockScan (which improves performance + on Kepler due to SHFL instruction) + - Allow types other than C++ primitives to be used in WarpScan::*Sum methods + if they only have operator + overloaded. (Previously they also required + to support assignment from int(0).) + - Update BlockReduce's BLOCK_REDUCE_WARP_REDUCTIONS algorithm to work even + when block size is not an even multiple of warp size + - Added work management utility descriptors (GridQueue, GridEvenShare) + - Refactoring of DeviceAllocator interface and CachingDeviceAllocator + implementation + - Misc. documentation updates and corrections. + +//----------------------------------------------------------------------------- + +0.9.2 04/04/2013 + + - Added WarpReduce. WarpReduce uses the SHFL instruction when applicable. + BlockReduce now uses this WarpReduce instead of implementing its own. + - Misc. fixes for 64-bit Linux compilation warnings and errors. + - Misc. documentation updates and corrections. + +//----------------------------------------------------------------------------- + +0.9.1 03/09/2013 + + - Fix for ambiguity in BlockScan::Reduce() between generic reduction and + summation. Summation entrypoints are now called ::Sum(), similar to the + convention in BlockScan. + - Small edits to mainpage documentation and download tracking + +//----------------------------------------------------------------------------- + +0.9.0 03/07/2013 + + - Intial "preview" release. CUB is the first durable, high-performance library + of cooperative block-level, warp-level, and thread-level primitives for CUDA + kernel programming. More primitives and examples coming soon! + \ No newline at end of file diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/LICENSE.TXT b/hash-graph-dehornetify/externals/cub-1.8.0/LICENSE.TXT new file mode 100644 index 0000000..a678e64 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/LICENSE.TXT @@ -0,0 +1,24 @@ +Copyright (c) 2010-2011, Duane Merrill. All rights reserved. +Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the NVIDIA CORPORATION nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/README.md b/hash-graph-dehornetify/externals/cub-1.8.0/README.md new file mode 100644 index 0000000..d2a338e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/README.md @@ -0,0 +1,128 @@ +
+

About CUB

+ +Current release: v1.8.0 (02/16/2018) + +We recommend the [CUB Project Website](http://nvlabs.github.com/cub) for further information and examples. + +CUB provides state-of-the-art, reusable software components for every layer +of the CUDA programming model: +- [Device-wide primitives] (https://nvlabs.github.com/cub/group___device_module.html) + - Sort, prefix scan, reduction, histogram, etc. + - Compatible with CUDA dynamic parallelism +- [Block-wide "collective" primitives] (https://nvlabs.github.com/cub/group___block_module.html) + - I/O, sort, prefix scan, reduction, histogram, etc. + - Compatible with arbitrary thread block sizes and types +- [Warp-wide "collective" primitives] (https://nvlabs.github.com/cub/group___warp_module.html) + - Warp-wide prefix scan, reduction, etc. + - Safe and architecture-specific +- [Thread and resource utilities](https://nvlabs.github.com/cub/group___thread_module.html) + - PTX intrinsics, device reflection, texture-caching iterators, caching memory allocators, etc. + +![Orientation of collective primitives within the CUDA software stack](http://nvlabs.github.com/cub/cub_overview.png) + +

+

A Simple Example

+ +```C++ +#include + +// Block-sorting CUDA kernel +__global__ void BlockSortKernel(int *d_in, int *d_out) +{ + using namespace cub; + + // Specialize BlockRadixSort, BlockLoad, and BlockStore for 128 threads + // owning 16 integer items each + typedef BlockRadixSort BlockRadixSort; + typedef BlockLoad BlockLoad; + typedef BlockStore BlockStore; + + // Allocate shared memory + __shared__ union { + typename BlockRadixSort::TempStorage sort; + typename BlockLoad::TempStorage load; + typename BlockStore::TempStorage store; + } temp_storage; + + int block_offset = blockIdx.x * (128 * 16); // OffsetT for this block's ment + + // Obtain a segment of 2048 consecutive keys that are blocked across threads + int thread_keys[16]; + BlockLoad(temp_storage.load).Load(d_in + block_offset, thread_keys); + __syncthreads(); + + // Collectively sort the keys + BlockRadixSort(temp_storage.sort).Sort(thread_keys); + __syncthreads(); + + // Store the sorted segment + BlockStore(temp_storage.store).Store(d_out + block_offset, thread_keys); +} +``` + +Each thread block uses cub::BlockRadixSort to collectively sort +its own input segment. The class is specialized by the +data type being sorted, by the number of threads per block, by the number of +keys per thread, and implicitly by the targeted compilation architecture. + +The cub::BlockLoad and cub::BlockStore classes are similarly specialized. +Furthermore, to provide coalesced accesses to device memory, these primitives are +configured to access memory using a striped access pattern (where consecutive threads +simultaneously access consecutive items) and then transpose the keys into +a [blocked arrangement](index.html#sec4sec3) of elements across threads. + +Once specialized, these classes expose opaque \p TempStorage member types. +The thread block uses these storage types to statically allocate the union of +shared memory needed by the thread block. (Alternatively these storage types +could be aliased to global memory allocations). + +

+

Stable Releases

+ +CUB releases are labeled using version identifiers having three fields: +*epoch.feature.update*. The *epoch* field corresponds to support for +a major change in the CUDA programming model. The *feature* field +corresponds to a stable set of features, functionality, and interface. The +*update* field corresponds to a bug-fix or performance update for that +feature set. At the moment, we do not publicly provide non-stable releases +such as development snapshots, beta releases or rolling releases. (Feel free +to contact us if you would like such things.) See the +[CUB Project Website](http://nvlabs.github.com/cub) for more information. + +

+

Contributors

+ +CUB is developed as an open-source project by [NVIDIA Research](http://research.nvidia.com). The primary contributor is [Duane Merrill](http://github.com/dumerrill). + +

+

Open Source License

+ +CUB is available under the "New BSD" open-source license: + +``` +Copyright (c) 2010-2011, Duane Merrill. All rights reserved. +Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the NVIDIA CORPORATION nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +``` diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/common.mk b/hash-graph-dehornetify/externals/cub-1.8.0/common.mk new file mode 100644 index 0000000..82893ab --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/common.mk @@ -0,0 +1,233 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + + +#------------------------------------------------------------------------------- +# Commandline Options +#------------------------------------------------------------------------------- + +# [sm=] Compute-capability to compile for, e.g., "sm=200,300,350" (SM20 by default). + +COMMA = , +ifdef sm + SM_ARCH = $(subst $(COMMA),-,$(sm)) +else + SM_ARCH = 200 +endif + +ifeq (700, $(findstring 700, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_70,code=\"sm_70,compute_70\" + SM_DEF += -DSM700 + TEST_ARCH = 700 +endif +ifeq (620, $(findstring 620, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_62,code=\"sm_62,compute_62\" + SM_DEF += -DSM620 + TEST_ARCH = 620 +endif +ifeq (610, $(findstring 610, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_61,code=\"sm_61,compute_61\" + SM_DEF += -DSM610 + TEST_ARCH = 610 +endif +ifeq (600, $(findstring 600, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_60,code=\"sm_60,compute_60\" + SM_DEF += -DSM600 + TEST_ARCH = 600 +endif +ifeq (520, $(findstring 520, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_52,code=\"sm_52,compute_52\" + SM_DEF += -DSM520 + TEST_ARCH = 520 +endif +ifeq (370, $(findstring 370, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_37,code=\"sm_37,compute_37\" + SM_DEF += -DSM370 + TEST_ARCH = 370 +endif +ifeq (350, $(findstring 350, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_35,code=\"sm_35,compute_35\" + SM_DEF += -DSM350 + TEST_ARCH = 350 +endif +ifeq (300, $(findstring 300, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_30,code=\"sm_30,compute_30\" + SM_DEF += -DSM300 + TEST_ARCH = 300 +endif +ifeq (210, $(findstring 210, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_20,code=\"sm_21,compute_20\" + SM_DEF += -DSM210 + TEST_ARCH = 210 +endif +ifeq (200, $(findstring 200, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_20,code=\"sm_20,compute_20\" + SM_DEF += -DSM200 + TEST_ARCH = 200 +endif +ifeq (130, $(findstring 130, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_13,code=\"sm_13,compute_13\" + SM_DEF += -DSM130 + TEST_ARCH = 130 +endif +ifeq (120, $(findstring 120, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_12,code=\"sm_12,compute_12\" + SM_DEF += -DSM120 + TEST_ARCH = 120 +endif +ifeq (110, $(findstring 110, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_11,code=\"sm_11,compute_11\" + SM_DEF += -DSM110 + TEST_ARCH = 110 +endif +ifeq (100, $(findstring 100, $(SM_ARCH))) + SM_TARGETS += -gencode=arch=compute_10,code=\"sm_10,compute_10\" + SM_DEF += -DSM100 + TEST_ARCH = 100 +endif + + +# [cdp=<0|1>] CDP enable option (default: no) +ifeq ($(cdp), 1) + DEFINES += -DCUB_CDP + CDP_SUFFIX = cdp + NVCCFLAGS += -rdc=true -lcudadevrt +else + CDP_SUFFIX = nocdp +endif + + +# [force32=<0|1>] Device addressing mode option (64-bit device pointers by default) +ifeq ($(force32), 1) + CPU_ARCH = -m32 + CPU_ARCH_SUFFIX = i386 +else + CPU_ARCH = -m64 + CPU_ARCH_SUFFIX = x86_64 + NPPI = -lnppist +endif + + +# [abi=<0|1>] CUDA ABI option (enabled by default) +ifneq ($(abi), 0) + ABI_SUFFIX = abi +else + NVCCFLAGS += -Xptxas -abi=no + ABI_SUFFIX = noabi +endif + + +# [open64=<0|1>] Middle-end compiler option (nvvm by default) +ifeq ($(open64), 1) + NVCCFLAGS += -open64 + PTX_SUFFIX = open64 +else + PTX_SUFFIX = nvvm +endif + + +# [verbose=<0|1>] Verbose toolchain output from nvcc option +ifeq ($(verbose), 1) + NVCCFLAGS += -v +endif + + +# [keep=<0|1>] Keep intermediate compilation artifacts option +ifeq ($(keep), 1) + NVCCFLAGS += -keep +endif + +# [debug=<0|1>] Generate debug mode code +ifeq ($(debug), 1) + NVCCFLAGS += -G +endif + + +#------------------------------------------------------------------------------- +# Compiler and compilation platform +#------------------------------------------------------------------------------- + +CUB_DIR = $(dir $(lastword $(MAKEFILE_LIST))) + +NVCC = "$(shell which nvcc)" +ifdef nvccver + NVCC_VERSION = $(nvccver) +else + NVCC_VERSION = $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//')) +endif + +# detect OS +OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) + +# Default flags: verbose kernel properties (regs, smem, cmem, etc.); runtimes for compilation phases +NVCCFLAGS += $(SM_DEF) -Xptxas -v -Xcudafe -\# + +ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) + # For MSVC + # Enable more warnings and treat as errors + NVCCFLAGS += -Xcompiler /W3 -Xcompiler /WX + # Disable excess x86 floating point precision that can lead to results being labeled incorrectly + NVCCFLAGS += -Xcompiler /fp:strict + # Help the compiler/linker work with huge numbers of kernels on Windows + NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500 + CC = cl + + # Multithreaded runtime + NVCCFLAGS += -Xcompiler /MT + +ifneq ($(force32), 1) + CUDART_CYG = "$(shell dirname $(NVCC))/../lib/Win32/cudart.lib" +else + CUDART_CYG = "$(shell dirname $(NVCC))/../lib/x64/cudart.lib" +endif + CUDART = "$(shell cygpath -w $(CUDART_CYG))" +else + # For g++ + # Disable excess x86 floating point precision that can lead to results being labeled incorrectly + NVCCFLAGS += -Xcompiler -ffloat-store + CC = g++ +ifneq ($(force32), 1) + CUDART = "$(shell dirname $(NVCC))/../lib/libcudart_static.a" +else + CUDART = "$(shell dirname $(NVCC))/../lib64/libcudart_static.a" +endif +endif + +# Suffix to append to each binary +BIN_SUFFIX = sm$(SM_ARCH)_$(PTX_SUFFIX)_$(NVCC_VERSION)_$(ABI_SUFFIX)_$(CDP_SUFFIX)_$(CPU_ARCH_SUFFIX) + + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +CUB_DEPS = $(call rwildcard, $(CUB_DIR),*.cuh) \ + $(CUB_DIR)common.mk + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_histogram.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_histogram.cuh new file mode 100644 index 0000000..37b1ec9 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_histogram.cuh @@ -0,0 +1,787 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . + */ + +#pragma once + +#include + +#include "../util_type.cuh" +#include "../block/block_load.cuh" +#include "../grid/grid_queue.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy + ******************************************************************************/ + +/** + * + */ +enum BlockHistogramMemoryPreference +{ + GMEM, + SMEM, + BLEND +}; + + +/** + * Parameterizable tuning policy type for AgentHistogram + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + bool _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming + BlockHistogramMemoryPreference _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) + bool _WORK_STEALING> ///< Whether to dequeue tiles from a global work queue +struct AgentHistogramPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + PIXELS_PER_THREAD = _PIXELS_PER_THREAD, ///< Pixels per thread (per tile of input) + IS_RLE_COMPRESS = _RLE_COMPRESS, ///< Whether to perform localized RLE to compress samples before histogramming + MEM_PREFERENCE = _MEM_PREFERENCE, ///< Whether to prefer privatized shared-memory bins (versus privatized global-memory bins) + IS_WORK_STEALING = _WORK_STEALING, ///< Whether to dequeue tiles from a global work queue + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentHistogram implements a stateful abstraction of CUDA thread blocks for participating in device-wide histogram . + */ +template < + typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type + int PRIVATIZED_SMEM_BINS, ///< Number of privatized shared-memory histogram bins of any channel. Zero indicates privatized counters to be maintained in device-accessible memory. + int NUM_CHANNELS, ///< Number of channels interleaved in the input data. Supports up to four channels. + int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed + typename SampleIteratorT, ///< Random-access input iterator type for reading samples + typename CounterT, ///< Integer type for counting sample occurrences per histogram bin + typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel + typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel + typename OffsetT, ///< Signed integer type for global offsets + int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability +struct AgentHistogram +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// The sample type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + /// The pixel type of SampleT + typedef typename CubVector::Type PixelT; + + /// The quad type of SampleT + typedef typename CubVector::Type QuadT; + + /// Constants + enum + { + BLOCK_THREADS = AgentHistogramPolicyT::BLOCK_THREADS, + + PIXELS_PER_THREAD = AgentHistogramPolicyT::PIXELS_PER_THREAD, + SAMPLES_PER_THREAD = PIXELS_PER_THREAD * NUM_CHANNELS, + QUADS_PER_THREAD = SAMPLES_PER_THREAD / 4, + + TILE_PIXELS = PIXELS_PER_THREAD * BLOCK_THREADS, + TILE_SAMPLES = SAMPLES_PER_THREAD * BLOCK_THREADS, + + IS_RLE_COMPRESS = AgentHistogramPolicyT::IS_RLE_COMPRESS, + + MEM_PREFERENCE = (PRIVATIZED_SMEM_BINS > 0) ? + AgentHistogramPolicyT::MEM_PREFERENCE : + GMEM, + + IS_WORK_STEALING = AgentHistogramPolicyT::IS_WORK_STEALING, + }; + + /// Cache load modifier for reading input elements + static const CacheLoadModifier LOAD_MODIFIER = AgentHistogramPolicyT::LOAD_MODIFIER; + + + /// Input iterator wrapper type (for applying cache modifier) + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + SampleIteratorT>::Type // Directly use the supplied input iterator type + WrappedSampleIteratorT; + + /// Pixel input iterator type (for applying cache modifier) + typedef CacheModifiedInputIterator + WrappedPixelIteratorT; + + /// Qaud input iterator type (for applying cache modifier) + typedef CacheModifiedInputIterator + WrappedQuadIteratorT; + + /// Parameterized BlockLoad type for samples + typedef BlockLoad< + SampleT, + BLOCK_THREADS, + SAMPLES_PER_THREAD, + AgentHistogramPolicyT::LOAD_ALGORITHM> + BlockLoadSampleT; + + /// Parameterized BlockLoad type for pixels + typedef BlockLoad< + PixelT, + BLOCK_THREADS, + PIXELS_PER_THREAD, + AgentHistogramPolicyT::LOAD_ALGORITHM> + BlockLoadPixelT; + + /// Parameterized BlockLoad type for quads + typedef BlockLoad< + QuadT, + BLOCK_THREADS, + QUADS_PER_THREAD, + AgentHistogramPolicyT::LOAD_ALGORITHM> + BlockLoadQuadT; + + /// Shared memory type required by this thread block + struct _TempStorage + { + CounterT histograms[NUM_ACTIVE_CHANNELS][PRIVATIZED_SMEM_BINS + 1]; // Smem needed for block-privatized smem histogram (with 1 word of padding) + + int tile_idx; + + // Aliasable storage layout + union Aliasable + { + typename BlockLoadSampleT::TempStorage sample_load; // Smem needed for loading a tile of samples + typename BlockLoadPixelT::TempStorage pixel_load; // Smem needed for loading a tile of pixels + typename BlockLoadQuadT::TempStorage quad_load; // Smem needed for loading a tile of quads + + } aliasable; + }; + + + /// Temporary storage type (unionable) + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + /// Reference to temp_storage + _TempStorage &temp_storage; + + /// Sample input iterator (with cache modifier applied, if possible) + WrappedSampleIteratorT d_wrapped_samples; + + /// Native pointer for input samples (possibly NULL if unavailable) + SampleT* d_native_samples; + + /// The number of output bins for each channel + int (&num_output_bins)[NUM_ACTIVE_CHANNELS]; + + /// The number of privatized bins for each channel + int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS]; + + /// Reference to gmem privatized histograms for each channel + CounterT* d_privatized_histograms[NUM_ACTIVE_CHANNELS]; + + /// Reference to final output histograms (gmem) + CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS]; + + /// The transform operator for determining output bin-ids from privatized counter indices, one for each channel + OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS]; + + /// The transform operator for determining privatized counter indices from samples, one for each channel + PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]; + + /// Whether to prefer privatized smem counters vs privatized global counters + bool prefer_smem; + + + //--------------------------------------------------------------------- + // Initialize privatized bin counters + //--------------------------------------------------------------------- + + // Initialize privatized bin counters + __device__ __forceinline__ void InitBinCounters(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) + { + // Initialize histogram bin counts to zeros + #pragma unroll + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + for (int privatized_bin = threadIdx.x; privatized_bin < num_privatized_bins[CHANNEL]; privatized_bin += BLOCK_THREADS) + { + privatized_histograms[CHANNEL][privatized_bin] = 0; + } + } + + // Barrier to make sure all threads are done updating counters + CTA_SYNC(); + } + + + // Initialize privatized bin counters. Specialized for privatized shared-memory counters + __device__ __forceinline__ void InitSmemBinCounters() + { + CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; + + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; + + InitBinCounters(privatized_histograms); + } + + + // Initialize privatized bin counters. Specialized for privatized global-memory counters + __device__ __forceinline__ void InitGmemBinCounters() + { + InitBinCounters(d_privatized_histograms); + } + + + //--------------------------------------------------------------------- + // Update final output histograms + //--------------------------------------------------------------------- + + // Update final output histograms from privatized histograms + __device__ __forceinline__ void StoreOutput(CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]) + { + // Barrier to make sure all threads are done updating counters + CTA_SYNC(); + + // Apply privatized bin counts to output bin counts + #pragma unroll + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + int channel_bins = num_privatized_bins[CHANNEL]; + for (int privatized_bin = threadIdx.x; + privatized_bin < channel_bins; + privatized_bin += BLOCK_THREADS) + { + int output_bin = -1; + CounterT count = privatized_histograms[CHANNEL][privatized_bin]; + bool is_valid = count > 0; + + output_decode_op[CHANNEL].template BinSelect((SampleT) privatized_bin, output_bin, is_valid); + + if (output_bin >= 0) + { + atomicAdd(&d_output_histograms[CHANNEL][output_bin], count); + } + + } + } + } + + + // Update final output histograms from privatized histograms. Specialized for privatized shared-memory counters + __device__ __forceinline__ void StoreSmemOutput() + { + CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; + + StoreOutput(privatized_histograms); + } + + + // Update final output histograms from privatized histograms. Specialized for privatized global-memory counters + __device__ __forceinline__ void StoreGmemOutput() + { + StoreOutput(d_privatized_histograms); + } + + + //--------------------------------------------------------------------- + // Tile accumulation + //--------------------------------------------------------------------- + + // Accumulate pixels. Specialized for RLE compression. + __device__ __forceinline__ void AccumulatePixels( + SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], + bool is_valid[PIXELS_PER_THREAD], + CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], + Int2Type is_rle_compress) + { + #pragma unroll + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + // Bin pixels + int bins[PIXELS_PER_THREAD]; + + #pragma unroll + for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) + { + bins[PIXEL] = -1; + privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bins[PIXEL], is_valid[PIXEL]); + } + + CounterT accumulator = 1; + + #pragma unroll + for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD - 1; ++PIXEL) + { + if (bins[PIXEL] != bins[PIXEL + 1]) + { + if (bins[PIXEL] >= 0) + atomicAdd(privatized_histograms[CHANNEL] + bins[PIXEL], accumulator); + + accumulator = 0; + } + accumulator++; + } + + // Last pixel + if (bins[PIXELS_PER_THREAD - 1] >= 0) + atomicAdd(privatized_histograms[CHANNEL] + bins[PIXELS_PER_THREAD - 1], accumulator); + } + } + + + // Accumulate pixels. Specialized for individual accumulation of each pixel. + __device__ __forceinline__ void AccumulatePixels( + SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], + bool is_valid[PIXELS_PER_THREAD], + CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS], + Int2Type is_rle_compress) + { + #pragma unroll + for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) + { + #pragma unroll + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + int bin = -1; + privatized_decode_op[CHANNEL].template BinSelect(samples[PIXEL][CHANNEL], bin, is_valid[PIXEL]); + if (bin >= 0) + atomicAdd(privatized_histograms[CHANNEL] + bin, 1); + } + } + } + + + /** + * Accumulate pixel, specialized for smem privatized histogram + */ + __device__ __forceinline__ void AccumulateSmemPixels( + SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], + bool is_valid[PIXELS_PER_THREAD]) + { + CounterT* privatized_histograms[NUM_ACTIVE_CHANNELS]; + + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + privatized_histograms[CHANNEL] = temp_storage.histograms[CHANNEL]; + + AccumulatePixels(samples, is_valid, privatized_histograms, Int2Type()); + } + + + /** + * Accumulate pixel, specialized for gmem privatized histogram + */ + __device__ __forceinline__ void AccumulateGmemPixels( + SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS], + bool is_valid[PIXELS_PER_THREAD]) + { + AccumulatePixels(samples, is_valid, d_privatized_histograms, Int2Type()); + } + + + + //--------------------------------------------------------------------- + // Tile loading + //--------------------------------------------------------------------- + + // Load full, aligned tile using pixel iterator (multi-channel) + template + __device__ __forceinline__ void LoadFullAlignedTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type<_NUM_ACTIVE_CHANNELS> num_active_channels) + { + typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; + + WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); + + // Load using a wrapped pixel iterator + BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( + d_wrapped_pixels, + reinterpret_cast(samples)); + } + + // Load full, aligned tile using quad iterator (single-channel) + __device__ __forceinline__ void LoadFullAlignedTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type<1> num_active_channels) + { + typedef QuadT AliasedQuads[QUADS_PER_THREAD]; + + WrappedQuadIteratorT d_wrapped_quads((QuadT*) (d_native_samples + block_offset)); + + // Load using a wrapped quad iterator + BlockLoadQuadT(temp_storage.aliasable.quad_load).Load( + d_wrapped_quads, + reinterpret_cast(samples)); + } + + // Load full, aligned tile + __device__ __forceinline__ void LoadTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type is_full_tile, + Int2Type is_aligned) + { + LoadFullAlignedTile(block_offset, valid_samples, samples, Int2Type()); + } + + // Load full, mis-aligned tile using sample iterator + __device__ __forceinline__ void LoadTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type is_full_tile, + Int2Type is_aligned) + { + typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; + + // Load using sample iterator + BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( + d_wrapped_samples + block_offset, + reinterpret_cast(samples)); + } + + // Load partially-full, aligned tile using the pixel iterator + __device__ __forceinline__ void LoadTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type is_full_tile, + Int2Type is_aligned) + { + typedef PixelT AliasedPixels[PIXELS_PER_THREAD]; + + WrappedPixelIteratorT d_wrapped_pixels((PixelT*) (d_native_samples + block_offset)); + + int valid_pixels = valid_samples / NUM_CHANNELS; + + // Load using a wrapped pixel iterator + BlockLoadPixelT(temp_storage.aliasable.pixel_load).Load( + d_wrapped_pixels, + reinterpret_cast(samples), + valid_pixels); + } + + // Load partially-full, mis-aligned tile using sample iterator + __device__ __forceinline__ void LoadTile( + OffsetT block_offset, + int valid_samples, + SampleT (&samples)[PIXELS_PER_THREAD][NUM_CHANNELS], + Int2Type is_full_tile, + Int2Type is_aligned) + { + typedef SampleT AliasedSamples[SAMPLES_PER_THREAD]; + + BlockLoadSampleT(temp_storage.aliasable.sample_load).Load( + d_wrapped_samples + block_offset, + reinterpret_cast(samples), + valid_samples); + } + + + //--------------------------------------------------------------------- + // Tile processing + //--------------------------------------------------------------------- + + // Consume a tile of data samples + template < + bool IS_ALIGNED, // Whether the tile offset is aligned (quad-aligned for single-channel, pixel-aligned for multi-channel) + bool IS_FULL_TILE> // Whether the tile is full + __device__ __forceinline__ void ConsumeTile(OffsetT block_offset, int valid_samples) + { + SampleT samples[PIXELS_PER_THREAD][NUM_CHANNELS]; + bool is_valid[PIXELS_PER_THREAD]; + + // Load tile + LoadTile( + block_offset, + valid_samples, + samples, + Int2Type(), + Int2Type()); + + // Set valid flags + #pragma unroll + for (int PIXEL = 0; PIXEL < PIXELS_PER_THREAD; ++PIXEL) + is_valid[PIXEL] = IS_FULL_TILE || (((threadIdx.x * PIXELS_PER_THREAD + PIXEL) * NUM_CHANNELS) < valid_samples); + + // Accumulate samples +#if CUB_PTX_ARCH >= 120 + if (prefer_smem) + AccumulateSmemPixels(samples, is_valid); + else + AccumulateGmemPixels(samples, is_valid); +#else + AccumulateGmemPixels(samples, is_valid); +#endif + + } + + + // Consume row tiles. Specialized for work-stealing from queue + template + __device__ __forceinline__ void ConsumeTiles( + OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< The number of rows in the region of interest + OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest + int tiles_per_row, ///< Number of image tiles per row + GridQueue tile_queue, + Int2Type is_work_stealing) + { + + int num_tiles = num_rows * tiles_per_row; + int tile_idx = (blockIdx.y * gridDim.x) + blockIdx.x; + OffsetT num_even_share_tiles = gridDim.x * gridDim.y; + + while (tile_idx < num_tiles) + { + int row = tile_idx / tiles_per_row; + int col = tile_idx - (row * tiles_per_row); + OffsetT row_offset = row * row_stride_samples; + OffsetT col_offset = (col * TILE_SAMPLES); + OffsetT tile_offset = row_offset + col_offset; + + if (col == tiles_per_row - 1) + { + // Consume a partially-full tile at the end of the row + OffsetT num_remaining = (num_row_pixels * NUM_CHANNELS) - col_offset; + ConsumeTile(tile_offset, num_remaining); + } + else + { + // Consume full tile + ConsumeTile(tile_offset, TILE_SAMPLES); + } + + CTA_SYNC(); + + // Get next tile + if (threadIdx.x == 0) + temp_storage.tile_idx = tile_queue.Drain(1) + num_even_share_tiles; + + CTA_SYNC(); + + tile_idx = temp_storage.tile_idx; + } + } + + + // Consume row tiles. Specialized for even-share (striped across thread blocks) + template + __device__ __forceinline__ void ConsumeTiles( + OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< The number of rows in the region of interest + OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest + int tiles_per_row, ///< Number of image tiles per row + GridQueue tile_queue, + Int2Type is_work_stealing) + { + for (int row = blockIdx.y; row < num_rows; row += gridDim.y) + { + OffsetT row_begin = row * row_stride_samples; + OffsetT row_end = row_begin + (num_row_pixels * NUM_CHANNELS); + OffsetT tile_offset = row_begin + (blockIdx.x * TILE_SAMPLES); + + while (tile_offset < row_end) + { + OffsetT num_remaining = row_end - tile_offset; + + if (num_remaining < TILE_SAMPLES) + { + // Consume partial tile + ConsumeTile(tile_offset, num_remaining); + break; + } + + // Consume full tile + ConsumeTile(tile_offset, TILE_SAMPLES); + tile_offset += gridDim.x * TILE_SAMPLES; + } + } + } + + + //--------------------------------------------------------------------- + // Parameter extraction + //--------------------------------------------------------------------- + + // Return a native pixel pointer (specialized for CacheModifiedInputIterator types) + template < + CacheLoadModifier _MODIFIER, + typename _ValueT, + typename _OffsetT> + __device__ __forceinline__ SampleT* NativePointer(CacheModifiedInputIterator<_MODIFIER, _ValueT, _OffsetT> itr) + { + return itr.ptr; + } + + // Return a native pixel pointer (specialized for other types) + template + __device__ __forceinline__ SampleT* NativePointer(IteratorT itr) + { + return NULL; + } + + + + //--------------------------------------------------------------------- + // Interface + //--------------------------------------------------------------------- + + + /** + * Constructor + */ + __device__ __forceinline__ AgentHistogram( + TempStorage &temp_storage, ///< Reference to temp_storage + SampleIteratorT d_samples, ///< Input data to reduce + int (&num_output_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per final output histogram + int (&num_privatized_bins)[NUM_ACTIVE_CHANNELS], ///< The number bins per privatized histogram + CounterT* (&d_output_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to final output histograms + CounterT* (&d_privatized_histograms)[NUM_ACTIVE_CHANNELS], ///< Reference to privatized histograms + OutputDecodeOpT (&output_decode_op)[NUM_ACTIVE_CHANNELS], ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel + PrivatizedDecodeOpT (&privatized_decode_op)[NUM_ACTIVE_CHANNELS]) ///< The transform operator for determining privatized counter indices from samples, one for each channel + : + temp_storage(temp_storage.Alias()), + d_wrapped_samples(d_samples), + num_output_bins(num_output_bins), + num_privatized_bins(num_privatized_bins), + d_output_histograms(d_output_histograms), + privatized_decode_op(privatized_decode_op), + output_decode_op(output_decode_op), + d_native_samples(NativePointer(d_wrapped_samples)), + prefer_smem((MEM_PREFERENCE == SMEM) ? + true : // prefer smem privatized histograms + (MEM_PREFERENCE == GMEM) ? + false : // prefer gmem privatized histograms + blockIdx.x & 1) // prefer blended privatized histograms + { + int blockId = (blockIdx.y * gridDim.x) + blockIdx.x; + + // Initialize the locations of this block's privatized histograms + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + this->d_privatized_histograms[CHANNEL] = d_privatized_histograms[CHANNEL] + (blockId * num_privatized_bins[CHANNEL]); + } + + + /** + * Consume image + */ + __device__ __forceinline__ void ConsumeTiles( + OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< The number of rows in the region of interest + OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest + int tiles_per_row, ///< Number of image tiles per row + GridQueue tile_queue) ///< Queue descriptor for assigning tiles of work to thread blocks + { + // Check whether all row starting offsets are quad-aligned (in single-channel) or pixel-aligned (in multi-channel) + int quad_mask = AlignBytes::ALIGN_BYTES - 1; + int pixel_mask = AlignBytes::ALIGN_BYTES - 1; + size_t row_bytes = sizeof(SampleT) * row_stride_samples; + + bool quad_aligned_rows = (NUM_CHANNELS == 1) && (SAMPLES_PER_THREAD % 4 == 0) && // Single channel + ((size_t(d_native_samples) & quad_mask) == 0) && // ptr is quad-aligned + ((num_rows == 1) || ((row_bytes & quad_mask) == 0)); // number of row-samples is a multiple of the alignment of the quad + + bool pixel_aligned_rows = (NUM_CHANNELS > 1) && // Multi channel + ((size_t(d_native_samples) & pixel_mask) == 0) && // ptr is pixel-aligned + ((row_bytes & pixel_mask) == 0); // number of row-samples is a multiple of the alignment of the pixel + + // Whether rows are aligned and can be vectorized + if ((d_native_samples != NULL) && (quad_aligned_rows || pixel_aligned_rows)) + ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); + else + ConsumeTiles(num_row_pixels, num_rows, row_stride_samples, tiles_per_row, tile_queue, Int2Type()); + } + + + /** + * Initialize privatized bin counters. Specialized for privatized shared-memory counters + */ + __device__ __forceinline__ void InitBinCounters() + { + if (prefer_smem) + InitSmemBinCounters(); + else + InitGmemBinCounters(); + } + + + /** + * Store privatized histogram to device-accessible memory. Specialized for privatized shared-memory counters + */ + __device__ __forceinline__ void StoreOutput() + { + if (prefer_smem) + StoreSmemOutput(); + else + StoreGmemOutput(); + } + + +}; + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_downsweep.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_downsweep.cuh new file mode 100644 index 0000000..faea881 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_downsweep.cuh @@ -0,0 +1,789 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . + */ + + +#pragma once + +#include + +#include "../thread/thread_load.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_radix_rank.cuh" +#include "../block/block_exchange.cuh" +#include "../util_type.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Radix ranking algorithm + */ +enum RadixRankAlgorithm +{ + RADIX_RANK_BASIC, + RADIX_RANK_MEMOIZE, + RADIX_RANK_MATCH +}; + +/** + * Parameterizable tuning policy type for AgentRadixSortDownsweep + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys (and values) + RadixRankAlgorithm _RANK_ALGORITHM, ///< The radix ranking algorithm to use + BlockScanAlgorithm _SCAN_ALGORITHM, ///< The block scan algorithm to use + int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) +struct AgentRadixSortDownsweepPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys (and values) + static const RadixRankAlgorithm RANK_ALGORITHM = _RANK_ALGORITHM; ///< The radix ranking algorithm to use + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + + + + + +/** + * \brief AgentRadixSortDownsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort downsweep . + */ +template < + typename AgentRadixSortDownsweepPolicy, ///< Parameterized AgentRadixSortDownsweepPolicy tuning policy type + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< KeyT type + typename ValueT, ///< ValueT type + typename OffsetT> ///< Signed integer type for global offsets +struct AgentRadixSortDownsweep +{ + //--------------------------------------------------------------------- + // Type definitions and constants + //--------------------------------------------------------------------- + + // Appropriate unsigned-bits representation of KeyT + typedef typename Traits::UnsignedBits UnsignedBits; + + static const UnsignedBits LOWEST_KEY = Traits::LOWEST_KEY; + static const UnsignedBits MAX_KEY = Traits::MAX_KEY; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = AgentRadixSortDownsweepPolicy::LOAD_ALGORITHM; + static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortDownsweepPolicy::LOAD_MODIFIER; + static const RadixRankAlgorithm RANK_ALGORITHM = AgentRadixSortDownsweepPolicy::RANK_ALGORITHM; + static const BlockScanAlgorithm SCAN_ALGORITHM = AgentRadixSortDownsweepPolicy::SCAN_ALGORITHM; + + enum + { + BLOCK_THREADS = AgentRadixSortDownsweepPolicy::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentRadixSortDownsweepPolicy::ITEMS_PER_THREAD, + RADIX_BITS = AgentRadixSortDownsweepPolicy::RADIX_BITS, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + + RADIX_DIGITS = 1 << RADIX_BITS, + KEYS_ONLY = Equals::VALUE, + }; + + // Input iterator wrapper type (for applying cache modifier)s + typedef CacheModifiedInputIterator KeysItr; + typedef CacheModifiedInputIterator ValuesItr; + + // Radix ranking type to use + typedef typename If<(RANK_ALGORITHM == RADIX_RANK_BASIC), + BlockRadixRank, + typename If<(RANK_ALGORITHM == RADIX_RANK_MEMOIZE), + BlockRadixRank, + BlockRadixRankMatch + >::Type + >::Type BlockRadixRankT; + + enum + { + /// Number of bin-starting offsets tracked per thread + BINS_TRACKED_PER_THREAD = BlockRadixRankT::BINS_TRACKED_PER_THREAD + }; + + // BlockLoad type (keys) + typedef BlockLoad< + UnsignedBits, + BLOCK_THREADS, + ITEMS_PER_THREAD, + LOAD_ALGORITHM> BlockLoadKeysT; + + // BlockLoad type (values) + typedef BlockLoad< + ValueT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + LOAD_ALGORITHM> BlockLoadValuesT; + + // Value exchange array type + typedef ValueT ValueExchangeT[TILE_ITEMS]; + + /** + * Shared memory storage layout + */ + union __align__(16) _TempStorage + { + typename BlockLoadKeysT::TempStorage load_keys; + typename BlockLoadValuesT::TempStorage load_values; + typename BlockRadixRankT::TempStorage radix_rank; + + struct + { + UnsignedBits exchange_keys[TILE_ITEMS]; + OffsetT relative_bin_offsets[RADIX_DIGITS]; + }; + + Uninitialized exchange_values; + + OffsetT exclusive_digit_prefix[RADIX_DIGITS]; + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + // Shared storage for this CTA + _TempStorage &temp_storage; + + // Input and output device pointers + KeysItr d_keys_in; + ValuesItr d_values_in; + UnsignedBits *d_keys_out; + ValueT *d_values_out; + + // The global scatter base offset for each digit (valid in the first RADIX_DIGITS threads) + OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; + + // The least-significant bit position of the current digit to extract + int current_bit; + + // Number of bits in current digit + int num_bits; + + // Whether to short-cirucit + int short_circuit; + + //--------------------------------------------------------------------- + // Utility methods + //--------------------------------------------------------------------- + + + /** + * Scatter ranked keys through shared memory, then to device-accessible memory + */ + template + __device__ __forceinline__ void ScatterKeys( + UnsignedBits (&twiddled_keys)[ITEMS_PER_THREAD], + OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + OffsetT valid_items) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + temp_storage.exchange_keys[ranks[ITEM]] = twiddled_keys[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + UnsignedBits key = temp_storage.exchange_keys[threadIdx.x + (ITEM * BLOCK_THREADS)]; + UnsignedBits digit = BFE(key, current_bit, num_bits); + relative_bin_offsets[ITEM] = temp_storage.relative_bin_offsets[digit]; + + // Un-twiddle + key = Traits::TwiddleOut(key); + + if (FULL_TILE || + (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) + { + d_keys_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = key; + } + } + } + + + /** + * Scatter ranked values through shared memory, then to device-accessible memory + */ + template + __device__ __forceinline__ void ScatterValues( + ValueT (&values)[ITEMS_PER_THREAD], + OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + OffsetT valid_items) + { + CTA_SYNC(); + + ValueExchangeT &exchange_values = temp_storage.exchange_values.Alias(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + exchange_values[ranks[ITEM]] = values[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + ValueT value = exchange_values[threadIdx.x + (ITEM * BLOCK_THREADS)]; + + if (FULL_TILE || + (static_cast(threadIdx.x + (ITEM * BLOCK_THREADS)) < valid_items)) + { + d_values_out[relative_bin_offsets[ITEM] + threadIdx.x + (ITEM * BLOCK_THREADS)] = value; + } + } + } + + /** + * Load a tile of keys (specialized for full tile, any ranking algorithm) + */ + template + __device__ __forceinline__ void LoadKeys( + UnsignedBits (&keys)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + UnsignedBits oob_item, + Int2Type is_full_tile, + Int2Type<_RANK_ALGORITHM> rank_algorithm) + { + BlockLoadKeysT(temp_storage.load_keys).Load( + d_keys_in + block_offset, keys); + + CTA_SYNC(); + } + + + /** + * Load a tile of keys (specialized for partial tile, any ranking algorithm) + */ + template + __device__ __forceinline__ void LoadKeys( + UnsignedBits (&keys)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + UnsignedBits oob_item, + Int2Type is_full_tile, + Int2Type<_RANK_ALGORITHM> rank_algorithm) + { + // Register pressure work-around: moving valid_items through shfl prevents compiler + // from reusing guards/addressing from prior guarded loads + valid_items = ShuffleIndex(valid_items, 0, 0xffffffff); + + BlockLoadKeysT(temp_storage.load_keys).Load( + d_keys_in + block_offset, keys, valid_items, oob_item); + + CTA_SYNC(); + } + + + /** + * Load a tile of keys (specialized for full tile, match ranking algorithm) + */ + __device__ __forceinline__ void LoadKeys( + UnsignedBits (&keys)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + UnsignedBits oob_item, + Int2Type is_full_tile, + Int2Type rank_algorithm) + { + LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys); + } + + + /** + * Load a tile of keys (specialized for partial tile, match ranking algorithm) + */ + __device__ __forceinline__ void LoadKeys( + UnsignedBits (&keys)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + UnsignedBits oob_item, + Int2Type is_full_tile, + Int2Type rank_algorithm) + { + // Register pressure work-around: moving valid_items through shfl prevents compiler + // from reusing guards/addressing from prior guarded loads + valid_items = ShuffleIndex(valid_items, 0, 0xffffffff); + + LoadDirectWarpStriped(threadIdx.x, d_keys_in + block_offset, keys, valid_items, oob_item); + } + + + /** + * Load a tile of values (specialized for full tile, any ranking algorithm) + */ + template + __device__ __forceinline__ void LoadValues( + ValueT (&values)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + Int2Type is_full_tile, + Int2Type<_RANK_ALGORITHM> rank_algorithm) + { + BlockLoadValuesT(temp_storage.load_values).Load( + d_values_in + block_offset, values); + + CTA_SYNC(); + } + + + /** + * Load a tile of values (specialized for partial tile, any ranking algorithm) + */ + template + __device__ __forceinline__ void LoadValues( + ValueT (&values)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + Int2Type is_full_tile, + Int2Type<_RANK_ALGORITHM> rank_algorithm) + { + // Register pressure work-around: moving valid_items through shfl prevents compiler + // from reusing guards/addressing from prior guarded loads + valid_items = ShuffleIndex(valid_items, 0, 0xffffffff); + + BlockLoadValuesT(temp_storage.load_values).Load( + d_values_in + block_offset, values, valid_items); + + CTA_SYNC(); + } + + + /** + * Load a tile of items (specialized for full tile, match ranking algorithm) + */ + __device__ __forceinline__ void LoadValues( + ValueT (&values)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + Int2Type is_full_tile, + Int2Type rank_algorithm) + { + LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values); + } + + + /** + * Load a tile of items (specialized for partial tile, match ranking algorithm) + */ + __device__ __forceinline__ void LoadValues( + ValueT (&values)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + Int2Type is_full_tile, + Int2Type rank_algorithm) + { + // Register pressure work-around: moving valid_items through shfl prevents compiler + // from reusing guards/addressing from prior guarded loads + valid_items = ShuffleIndex(valid_items, 0, 0xffffffff); + + LoadDirectWarpStriped(threadIdx.x, d_values_in + block_offset, values, valid_items); + } + + + /** + * Truck along associated values + */ + template + __device__ __forceinline__ void GatherScatterValues( + OffsetT (&relative_bin_offsets)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + OffsetT block_offset, + OffsetT valid_items, + Int2Type /*is_keys_only*/) + { + ValueT values[ITEMS_PER_THREAD]; + + CTA_SYNC(); + + LoadValues( + values, + block_offset, + valid_items, + Int2Type(), + Int2Type()); + + ScatterValues( + values, + relative_bin_offsets, + ranks, + valid_items); + } + + + /** + * Truck along associated values (specialized for key-only sorting) + */ + template + __device__ __forceinline__ void GatherScatterValues( + OffsetT (&/*relative_bin_offsets*/)[ITEMS_PER_THREAD], + int (&/*ranks*/)[ITEMS_PER_THREAD], + OffsetT /*block_offset*/, + OffsetT /*valid_items*/, + Int2Type /*is_keys_only*/) + {} + + + /** + * Process tile + */ + template + __device__ __forceinline__ void ProcessTile( + OffsetT block_offset, + const OffsetT &valid_items = TILE_ITEMS) + { + UnsignedBits keys[ITEMS_PER_THREAD]; + int ranks[ITEMS_PER_THREAD]; + OffsetT relative_bin_offsets[ITEMS_PER_THREAD]; + + // Assign default (min/max) value to all keys + UnsignedBits default_key = (IS_DESCENDING) ? LOWEST_KEY : MAX_KEY; + + // Load tile of keys + LoadKeys( + keys, + block_offset, + valid_items, + default_key, + Int2Type(), + Int2Type()); + + // Twiddle key bits if necessary + #pragma unroll + for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) + { + keys[KEY] = Traits::TwiddleIn(keys[KEY]); + } + + // Rank the twiddled keys + int exclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; + BlockRadixRankT(temp_storage.radix_rank).RankKeys( + keys, + ranks, + current_bit, + num_bits, + exclusive_digit_prefix); + + CTA_SYNC(); + + // Share exclusive digit prefix + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + // Store exclusive prefix + temp_storage.exclusive_digit_prefix[bin_idx] = + exclusive_digit_prefix[track]; + } + } + + CTA_SYNC(); + + // Get inclusive digit prefix + int inclusive_digit_prefix[BINS_TRACKED_PER_THREAD]; + + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + if (IS_DESCENDING) + { + // Get inclusive digit prefix from exclusive prefix (higher bins come first) + inclusive_digit_prefix[track] = (bin_idx == 0) ? + (BLOCK_THREADS * ITEMS_PER_THREAD) : + temp_storage.exclusive_digit_prefix[bin_idx - 1]; + } + else + { + // Get inclusive digit prefix from exclusive prefix (lower bins come first) + inclusive_digit_prefix[track] = (bin_idx == RADIX_DIGITS - 1) ? + (BLOCK_THREADS * ITEMS_PER_THREAD) : + temp_storage.exclusive_digit_prefix[bin_idx + 1]; + } + } + } + + CTA_SYNC(); + + // Update global scatter base offsets for each digit + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + bin_offset[track] -= exclusive_digit_prefix[track]; + temp_storage.relative_bin_offsets[bin_idx] = bin_offset[track]; + bin_offset[track] += inclusive_digit_prefix[track]; + } + } + + CTA_SYNC(); + + // Scatter keys + ScatterKeys(keys, relative_bin_offsets, ranks, valid_items); + + // Gather/scatter values + GatherScatterValues(relative_bin_offsets , ranks, block_offset, valid_items, Int2Type()); + } + + //--------------------------------------------------------------------- + // Copy shortcut + //--------------------------------------------------------------------- + + /** + * Copy tiles within the range of input + */ + template < + typename InputIteratorT, + typename T> + __device__ __forceinline__ void Copy( + InputIteratorT d_in, + T *d_out, + OffsetT block_offset, + OffsetT block_end) + { + // Simply copy the input + while (block_offset + TILE_ITEMS <= block_end) + { + T items[ITEMS_PER_THREAD]; + + LoadDirectStriped(threadIdx.x, d_in + block_offset, items); + CTA_SYNC(); + StoreDirectStriped(threadIdx.x, d_out + block_offset, items); + + block_offset += TILE_ITEMS; + } + + // Clean up last partial tile with guarded-I/O + if (block_offset < block_end) + { + OffsetT valid_items = block_end - block_offset; + + T items[ITEMS_PER_THREAD]; + + LoadDirectStriped(threadIdx.x, d_in + block_offset, items, valid_items); + CTA_SYNC(); + StoreDirectStriped(threadIdx.x, d_out + block_offset, items, valid_items); + } + } + + + /** + * Copy tiles within the range of input (specialized for NullType) + */ + template + __device__ __forceinline__ void Copy( + InputIteratorT /*d_in*/, + NullType * /*d_out*/, + OffsetT /*block_offset*/, + OffsetT /*block_end*/) + {} + + + //--------------------------------------------------------------------- + // Interface + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ AgentRadixSortDownsweep( + TempStorage &temp_storage, + OffsetT (&bin_offset)[BINS_TRACKED_PER_THREAD], + OffsetT num_items, + const KeyT *d_keys_in, + KeyT *d_keys_out, + const ValueT *d_values_in, + ValueT *d_values_out, + int current_bit, + int num_bits) + : + temp_storage(temp_storage.Alias()), + d_keys_in(reinterpret_cast(d_keys_in)), + d_values_in(d_values_in), + d_keys_out(reinterpret_cast(d_keys_out)), + d_values_out(d_values_out), + current_bit(current_bit), + num_bits(num_bits), + short_circuit(1) + { + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + this->bin_offset[track] = bin_offset[track]; + + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + // Short circuit if the histogram has only bin counts of only zeros or problem-size + short_circuit = short_circuit && ((bin_offset[track] == 0) || (bin_offset[track] == num_items)); + } + } + + short_circuit = CTA_SYNC_AND(short_circuit); + } + + + /** + * Constructor + */ + __device__ __forceinline__ AgentRadixSortDownsweep( + TempStorage &temp_storage, + OffsetT num_items, + OffsetT *d_spine, + const KeyT *d_keys_in, + KeyT *d_keys_out, + const ValueT *d_values_in, + ValueT *d_values_out, + int current_bit, + int num_bits) + : + temp_storage(temp_storage.Alias()), + d_keys_in(reinterpret_cast(d_keys_in)), + d_values_in(d_values_in), + d_keys_out(reinterpret_cast(d_keys_out)), + d_values_out(d_values_out), + current_bit(current_bit), + num_bits(num_bits), + short_circuit(1) + { + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + // Load digit bin offsets (each of the first RADIX_DIGITS threads will load an offset for that digit) + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + if (IS_DESCENDING) + bin_idx = RADIX_DIGITS - bin_idx - 1; + + // Short circuit if the first block's histogram has only bin counts of only zeros or problem-size + OffsetT first_block_bin_offset = d_spine[gridDim.x * bin_idx]; + short_circuit = short_circuit && ((first_block_bin_offset == 0) || (first_block_bin_offset == num_items)); + + // Load my block's bin offset for my bin + bin_offset[track] = d_spine[(gridDim.x * bin_idx) + blockIdx.x]; + } + } + + short_circuit = CTA_SYNC_AND(short_circuit); + } + + + /** + * Distribute keys from a segment of input tiles. + */ + __device__ __forceinline__ void ProcessRegion( + OffsetT block_offset, + OffsetT block_end) + { + if (short_circuit) + { + // Copy keys + Copy(d_keys_in, d_keys_out, block_offset, block_end); + + // Copy values + Copy(d_values_in, d_values_out, block_offset, block_end); + } + else + { + // Process full tiles of tile_items + #pragma unroll 1 + while (block_offset + TILE_ITEMS <= block_end) + { + ProcessTile(block_offset); + block_offset += TILE_ITEMS; + + CTA_SYNC(); + } + + // Clean up last partial tile with guarded-I/O + if (block_offset < block_end) + { + ProcessTile(block_offset, block_end - block_offset); + } + + } + } + +}; + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_upsweep.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_upsweep.cuh new file mode 100644 index 0000000..2081cef --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_radix_sort_upsweep.cuh @@ -0,0 +1,526 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . + */ + +#pragma once + +#include "../thread/thread_reduce.cuh" +#include "../thread/thread_load.cuh" +#include "../warp/warp_reduce.cuh" +#include "../block/block_load.cuh" +#include "../util_type.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentRadixSortUpsweep + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading keys + int _RADIX_BITS> ///< The number of radix bits, i.e., log2(bins) +struct AgentRadixSortUpsweepPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + RADIX_BITS = _RADIX_BITS, ///< The number of radix bits, i.e., log2(bins) + }; + + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading keys +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentRadixSortUpsweep implements a stateful abstraction of CUDA thread blocks for participating in device-wide radix sort upsweep . + */ +template < + typename AgentRadixSortUpsweepPolicy, ///< Parameterized AgentRadixSortUpsweepPolicy tuning policy type + typename KeyT, ///< KeyT type + typename OffsetT> ///< Signed integer type for global offsets +struct AgentRadixSortUpsweep +{ + + //--------------------------------------------------------------------- + // Type definitions and constants + //--------------------------------------------------------------------- + + typedef typename Traits::UnsignedBits UnsignedBits; + + // Integer type for digit counters (to be packed into words of PackedCounters) + typedef unsigned char DigitCounter; + + // Integer type for packing DigitCounters into columns of shared memory banks + typedef unsigned int PackedCounter; + + static const CacheLoadModifier LOAD_MODIFIER = AgentRadixSortUpsweepPolicy::LOAD_MODIFIER; + + enum + { + RADIX_BITS = AgentRadixSortUpsweepPolicy::RADIX_BITS, + BLOCK_THREADS = AgentRadixSortUpsweepPolicy::BLOCK_THREADS, + KEYS_PER_THREAD = AgentRadixSortUpsweepPolicy::ITEMS_PER_THREAD, + + RADIX_DIGITS = 1 << RADIX_BITS, + + LOG_WARP_THREADS = CUB_PTX_LOG_WARP_THREADS, + WARP_THREADS = 1 << LOG_WARP_THREADS, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + TILE_ITEMS = BLOCK_THREADS * KEYS_PER_THREAD, + + BYTES_PER_COUNTER = sizeof(DigitCounter), + LOG_BYTES_PER_COUNTER = Log2::VALUE, + + PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), + LOG_PACKING_RATIO = Log2::VALUE, + + LOG_COUNTER_LANES = CUB_MAX(0, RADIX_BITS - LOG_PACKING_RATIO), + COUNTER_LANES = 1 << LOG_COUNTER_LANES, + + // To prevent counter overflow, we must periodically unpack and aggregate the + // digit counters back into registers. Each counter lane is assigned to a + // warp for aggregation. + + LANES_PER_WARP = CUB_MAX(1, (COUNTER_LANES + WARPS - 1) / WARPS), + + // Unroll tiles in batches without risk of counter overflow + UNROLL_COUNT = CUB_MIN(64, 255 / KEYS_PER_THREAD), + UNROLLED_ELEMENTS = UNROLL_COUNT * TILE_ITEMS, + }; + + + // Input iterator wrapper type (for applying cache modifier)s + typedef CacheModifiedInputIterator KeysItr; + + /** + * Shared memory storage layout + */ + union __align__(16) _TempStorage + { + DigitCounter thread_counters[COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; + PackedCounter packed_thread_counters[COUNTER_LANES][BLOCK_THREADS]; + OffsetT block_counters[WARP_THREADS][RADIX_DIGITS]; + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Thread fields (aggregate state bundle) + //--------------------------------------------------------------------- + + // Shared storage for this CTA + _TempStorage &temp_storage; + + // Thread-local counters for periodically aggregating composite-counter lanes + OffsetT local_counts[LANES_PER_WARP][PACKING_RATIO]; + + // Input and output device pointers + KeysItr d_keys_in; + + // The least-significant bit position of the current digit to extract + int current_bit; + + // Number of bits in current digit + int num_bits; + + + + //--------------------------------------------------------------------- + // Helper structure for templated iteration + //--------------------------------------------------------------------- + + // Iterate + template + struct Iterate + { + // BucketKeys + static __device__ __forceinline__ void BucketKeys( + AgentRadixSortUpsweep &cta, + UnsignedBits keys[KEYS_PER_THREAD]) + { + cta.Bucket(keys[COUNT]); + + // Next + Iterate::BucketKeys(cta, keys); + } + }; + + // Terminate + template + struct Iterate + { + // BucketKeys + static __device__ __forceinline__ void BucketKeys(AgentRadixSortUpsweep &/*cta*/, UnsignedBits /*keys*/[KEYS_PER_THREAD]) {} + }; + + + //--------------------------------------------------------------------- + // Utility methods + //--------------------------------------------------------------------- + + /** + * Decode a key and increment corresponding smem digit counter + */ + __device__ __forceinline__ void Bucket(UnsignedBits key) + { + // Perform transform op + UnsignedBits converted_key = Traits::TwiddleIn(key); + + // Extract current digit bits + UnsignedBits digit = BFE(converted_key, current_bit, num_bits); + + // Get sub-counter offset + UnsignedBits sub_counter = digit & (PACKING_RATIO - 1); + + // Get row offset + UnsignedBits row_offset = digit >> LOG_PACKING_RATIO; + + // Increment counter + temp_storage.thread_counters[row_offset][threadIdx.x][sub_counter]++; + } + + + /** + * Reset composite counters + */ + __device__ __forceinline__ void ResetDigitCounters() + { + #pragma unroll + for (int LANE = 0; LANE < COUNTER_LANES; LANE++) + { + temp_storage.packed_thread_counters[LANE][threadIdx.x] = 0; + } + } + + + /** + * Reset the unpacked counters in each thread + */ + __device__ __forceinline__ void ResetUnpackedCounters() + { + #pragma unroll + for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) + { + #pragma unroll + for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) + { + local_counts[LANE][UNPACKED_COUNTER] = 0; + } + } + } + + + /** + * Extracts and aggregates the digit counters for each counter lane + * owned by this warp + */ + __device__ __forceinline__ void UnpackDigitCounts() + { + unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; + unsigned int warp_tid = LaneId(); + + #pragma unroll + for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) + { + const int counter_lane = (LANE * WARPS) + warp_id; + if (counter_lane < COUNTER_LANES) + { + #pragma unroll + for (int PACKED_COUNTER = 0; PACKED_COUNTER < BLOCK_THREADS; PACKED_COUNTER += WARP_THREADS) + { + #pragma unroll + for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) + { + OffsetT counter = temp_storage.thread_counters[counter_lane][warp_tid + PACKED_COUNTER][UNPACKED_COUNTER]; + local_counts[LANE][UNPACKED_COUNTER] += counter; + } + } + } + } + } + + + /** + * Processes a single, full tile + */ + __device__ __forceinline__ void ProcessFullTile(OffsetT block_offset) + { + // Tile of keys + UnsignedBits keys[KEYS_PER_THREAD]; + + LoadDirectStriped(threadIdx.x, d_keys_in + block_offset, keys); + + // Prevent hoisting + CTA_SYNC(); + + // Bucket tile of keys + Iterate<0, KEYS_PER_THREAD>::BucketKeys(*this, keys); + } + + + /** + * Processes a single load (may have some threads masked off) + */ + __device__ __forceinline__ void ProcessPartialTile( + OffsetT block_offset, + const OffsetT &block_end) + { + // Process partial tile if necessary using single loads + block_offset += threadIdx.x; + while (block_offset < block_end) + { + // Load and bucket key + UnsignedBits key = d_keys_in[block_offset]; + Bucket(key); + block_offset += BLOCK_THREADS; + } + } + + + //--------------------------------------------------------------------- + // Interface + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ AgentRadixSortUpsweep( + TempStorage &temp_storage, + const KeyT *d_keys_in, + int current_bit, + int num_bits) + : + temp_storage(temp_storage.Alias()), + d_keys_in(reinterpret_cast(d_keys_in)), + current_bit(current_bit), + num_bits(num_bits) + {} + + + /** + * Compute radix digit histograms from a segment of input tiles. + */ + __device__ __forceinline__ void ProcessRegion( + OffsetT block_offset, + const OffsetT &block_end) + { + // Reset digit counters in smem and unpacked counters in registers + ResetDigitCounters(); + ResetUnpackedCounters(); + + // Unroll batches of full tiles + while (block_offset + UNROLLED_ELEMENTS <= block_end) + { + for (int i = 0; i < UNROLL_COUNT; ++i) + { + ProcessFullTile(block_offset); + block_offset += TILE_ITEMS; + } + + CTA_SYNC(); + + // Aggregate back into local_count registers to prevent overflow + UnpackDigitCounts(); + + CTA_SYNC(); + + // Reset composite counters in lanes + ResetDigitCounters(); + } + + // Unroll single full tiles + while (block_offset + TILE_ITEMS <= block_end) + { + ProcessFullTile(block_offset); + block_offset += TILE_ITEMS; + } + + // Process partial tile if necessary + ProcessPartialTile( + block_offset, + block_end); + + CTA_SYNC(); + + // Aggregate back into local_count registers + UnpackDigitCounts(); + } + + + /** + * Extract counts (saving them to the external array) + */ + template + __device__ __forceinline__ void ExtractCounts( + OffsetT *counters, + int bin_stride = 1, + int bin_offset = 0) + { + unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; + unsigned int warp_tid = LaneId(); + + // Place unpacked digit counters in shared memory + #pragma unroll + for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) + { + int counter_lane = (LANE * WARPS) + warp_id; + if (counter_lane < COUNTER_LANES) + { + int digit_row = counter_lane << LOG_PACKING_RATIO; + + #pragma unroll + for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) + { + int bin_idx = digit_row + UNPACKED_COUNTER; + + temp_storage.block_counters[warp_tid][bin_idx] = + local_counts[LANE][UNPACKED_COUNTER]; + } + } + } + + CTA_SYNC(); + + // Rake-reduce bin_count reductions + + // Whole blocks + #pragma unroll + for (int BIN_BASE = RADIX_DIGITS % BLOCK_THREADS; + (BIN_BASE + BLOCK_THREADS) <= RADIX_DIGITS; + BIN_BASE += BLOCK_THREADS) + { + int bin_idx = BIN_BASE + threadIdx.x; + + OffsetT bin_count = 0; + #pragma unroll + for (int i = 0; i < WARP_THREADS; ++i) + bin_count += temp_storage.block_counters[i][bin_idx]; + + if (IS_DESCENDING) + bin_idx = RADIX_DIGITS - bin_idx - 1; + + counters[(bin_stride * bin_idx) + bin_offset] = bin_count; + } + + // Remainder + if ((RADIX_DIGITS % BLOCK_THREADS != 0) && (threadIdx.x < RADIX_DIGITS)) + { + int bin_idx = threadIdx.x; + + OffsetT bin_count = 0; + #pragma unroll + for (int i = 0; i < WARP_THREADS; ++i) + bin_count += temp_storage.block_counters[i][bin_idx]; + + if (IS_DESCENDING) + bin_idx = RADIX_DIGITS - bin_idx - 1; + + counters[(bin_stride * bin_idx) + bin_offset] = bin_count; + } + } + + + /** + * Extract counts + */ + template + __device__ __forceinline__ void ExtractCounts( + OffsetT (&bin_count)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] + { + unsigned int warp_id = threadIdx.x >> LOG_WARP_THREADS; + unsigned int warp_tid = LaneId(); + + // Place unpacked digit counters in shared memory + #pragma unroll + for (int LANE = 0; LANE < LANES_PER_WARP; LANE++) + { + int counter_lane = (LANE * WARPS) + warp_id; + if (counter_lane < COUNTER_LANES) + { + int digit_row = counter_lane << LOG_PACKING_RATIO; + + #pragma unroll + for (int UNPACKED_COUNTER = 0; UNPACKED_COUNTER < PACKING_RATIO; UNPACKED_COUNTER++) + { + int bin_idx = digit_row + UNPACKED_COUNTER; + + temp_storage.block_counters[warp_tid][bin_idx] = + local_counts[LANE][UNPACKED_COUNTER]; + } + } + } + + CTA_SYNC(); + + // Rake-reduce bin_count reductions + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + bin_count[track] = 0; + + #pragma unroll + for (int i = 0; i < WARP_THREADS; ++i) + bin_count[track] += temp_storage.block_counters[i][bin_idx]; + } + } + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce.cuh new file mode 100644 index 0000000..000a905 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce.cuh @@ -0,0 +1,385 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . + */ + +#pragma once + +#include + +#include "../block/block_load.cuh" +#include "../block/block_reduce.cuh" +#include "../grid/grid_mapping.cuh" +#include "../grid/grid_even_share.cuh" +#include "../util_type.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentReduce + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + int _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load + BlockReduceAlgorithm _BLOCK_ALGORITHM, ///< Cooperative block-wide reduction algorithm to use + CacheLoadModifier _LOAD_MODIFIER> ///< Cache load modifier for reading input elements +struct AgentReducePolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + VECTOR_LOAD_LENGTH = _VECTOR_LOAD_LENGTH, ///< Number of items per vectorized load + }; + + static const BlockReduceAlgorithm BLOCK_ALGORITHM = _BLOCK_ALGORITHM; ///< Cooperative block-wide reduction algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements +}; + + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentReduce implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduction . + * + * Each thread reduces only the values it loads. If \p FIRST_TILE, this + * partial reduction is stored into \p thread_aggregate. Otherwise it is + * accumulated into \p thread_aggregate. + */ +template < + typename AgentReducePolicy, ///< Parameterized AgentReducePolicy tuning policy type + typename InputIteratorT, ///< Random-access iterator type for input + typename OutputIteratorT, ///< Random-access iterator type for output + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) +struct AgentReduce +{ + + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// The input value type + typedef typename std::iterator_traits::value_type InputT; + + /// The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + /// Vector type of InputT for data movement + typedef typename CubVector::Type VectorT; + + /// Input iterator wrapper type (for applying cache modifier) + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + InputIteratorT>::Type // Directly use the supplied input iterator type + WrappedInputIteratorT; + + /// Constants + enum + { + BLOCK_THREADS = AgentReducePolicy::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentReducePolicy::ITEMS_PER_THREAD, + VECTOR_LOAD_LENGTH = CUB_MIN(ITEMS_PER_THREAD, AgentReducePolicy::VECTOR_LOAD_LENGTH), + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + + // Can vectorize according to the policy if the input iterator is a native pointer to a primitive type + ATTEMPT_VECTORIZATION = (VECTOR_LOAD_LENGTH > 1) && + (ITEMS_PER_THREAD % VECTOR_LOAD_LENGTH == 0) && + (IsPointer::VALUE) && Traits::PRIMITIVE, + + }; + + static const CacheLoadModifier LOAD_MODIFIER = AgentReducePolicy::LOAD_MODIFIER; + static const BlockReduceAlgorithm BLOCK_ALGORITHM = AgentReducePolicy::BLOCK_ALGORITHM; + + /// Parameterized BlockReduce primitive + typedef BlockReduce BlockReduceT; + + /// Shared memory type required by this thread block + struct _TempStorage + { + typename BlockReduceT::TempStorage reduce; + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + InputIteratorT d_in; ///< Input data to reduce + WrappedInputIteratorT d_wrapped_in; ///< Wrapped input data to reduce + ReductionOp reduction_op; ///< Binary reduction operator + + + //--------------------------------------------------------------------- + // Utility + //--------------------------------------------------------------------- + + + // Whether or not the input is aligned with the vector type (specialized for types we can vectorize) + template + static __device__ __forceinline__ bool IsAligned( + Iterator d_in, + Int2Type /*can_vectorize*/) + { + return (size_t(d_in) & (sizeof(VectorT) - 1)) == 0; + } + + // Whether or not the input is aligned with the vector type (specialized for types we cannot vectorize) + template + static __device__ __forceinline__ bool IsAligned( + Iterator /*d_in*/, + Int2Type /*can_vectorize*/) + { + return false; + } + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ AgentReduce( + TempStorage& temp_storage, ///< Reference to temp_storage + InputIteratorT d_in, ///< Input data to reduce + ReductionOp reduction_op) ///< Binary reduction operator + : + temp_storage(temp_storage.Alias()), + d_in(d_in), + d_wrapped_in(d_in), + reduction_op(reduction_op) + {} + + + //--------------------------------------------------------------------- + // Tile consumption + //--------------------------------------------------------------------- + + /** + * Consume a full tile of input (non-vectorized) + */ + template + __device__ __forceinline__ void ConsumeTile( + OutputT &thread_aggregate, + OffsetT block_offset, ///< The offset the tile to consume + int /*valid_items*/, ///< The number of valid items in the tile + Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile + Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads + { + OutputT items[ITEMS_PER_THREAD]; + + // Load items in striped fashion + LoadDirectStriped(threadIdx.x, d_wrapped_in + block_offset, items); + + // Reduce items within each thread stripe + thread_aggregate = (IS_FIRST_TILE) ? + internal::ThreadReduce(items, reduction_op) : + internal::ThreadReduce(items, reduction_op, thread_aggregate); + } + + + /** + * Consume a full tile of input (vectorized) + */ + template + __device__ __forceinline__ void ConsumeTile( + OutputT &thread_aggregate, + OffsetT block_offset, ///< The offset the tile to consume + int /*valid_items*/, ///< The number of valid items in the tile + Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile + Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads + { + // Alias items as an array of VectorT and load it in striped fashion + enum { WORDS = ITEMS_PER_THREAD / VECTOR_LOAD_LENGTH }; + + // Fabricate a vectorized input iterator + InputT *d_in_unqualified = const_cast(d_in) + block_offset + (threadIdx.x * VECTOR_LOAD_LENGTH); + CacheModifiedInputIterator d_vec_in( + reinterpret_cast(d_in_unqualified)); + + // Load items as vector items + InputT input_items[ITEMS_PER_THREAD]; + VectorT *vec_items = reinterpret_cast(input_items); + #pragma unroll + for (int i = 0; i < WORDS; ++i) + vec_items[i] = d_vec_in[BLOCK_THREADS * i]; + + // Convert from input type to output type + OutputT items[ITEMS_PER_THREAD]; + #pragma unroll + for (int i = 0; i < ITEMS_PER_THREAD; ++i) + items[i] = input_items[i]; + + // Reduce items within each thread stripe + thread_aggregate = (IS_FIRST_TILE) ? + internal::ThreadReduce(items, reduction_op) : + internal::ThreadReduce(items, reduction_op, thread_aggregate); + } + + + /** + * Consume a partial tile of input + */ + template + __device__ __forceinline__ void ConsumeTile( + OutputT &thread_aggregate, + OffsetT block_offset, ///< The offset the tile to consume + int valid_items, ///< The number of valid items in the tile + Int2Type /*is_full_tile*/, ///< Whether or not this is a full tile + Int2Type /*can_vectorize*/) ///< Whether or not we can vectorize loads + { + // Partial tile + int thread_offset = threadIdx.x; + + // Read first item + if ((IS_FIRST_TILE) && (thread_offset < valid_items)) + { + thread_aggregate = d_wrapped_in[block_offset + thread_offset]; + thread_offset += BLOCK_THREADS; + } + + // Continue reading items (block-striped) + while (thread_offset < valid_items) + { + OutputT item = d_wrapped_in[block_offset + thread_offset]; + thread_aggregate = reduction_op(thread_aggregate, item); + thread_offset += BLOCK_THREADS; + } + } + + + //--------------------------------------------------------------- + // Consume a contiguous segment of tiles + //--------------------------------------------------------------------- + + /** + * \brief Reduce a contiguous segment of input tiles + */ + template + __device__ __forceinline__ OutputT ConsumeRange( + GridEvenShare &even_share, ///< GridEvenShare descriptor + Int2Type can_vectorize) ///< Whether or not we can vectorize loads + { + OutputT thread_aggregate; + + if (even_share.block_offset + TILE_ITEMS > even_share.block_end) + { + // First tile isn't full (not all threads have valid items) + int valid_items = even_share.block_end - even_share.block_offset; + ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); + return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op, valid_items); + } + + // At least one full block + ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); + even_share.block_offset += even_share.block_stride; + + // Consume subsequent full tiles of input + while (even_share.block_offset + TILE_ITEMS <= even_share.block_end) + { + ConsumeTile(thread_aggregate, even_share.block_offset, TILE_ITEMS, Int2Type(), can_vectorize); + even_share.block_offset += even_share.block_stride; + } + + // Consume a partially-full tile + if (even_share.block_offset < even_share.block_end) + { + int valid_items = even_share.block_end - even_share.block_offset; + ConsumeTile(thread_aggregate, even_share.block_offset, valid_items, Int2Type(), can_vectorize); + } + + // Compute block-wide reduction (all threads have valid items) + return BlockReduceT(temp_storage.reduce).Reduce(thread_aggregate, reduction_op); + } + + + /** + * \brief Reduce a contiguous segment of input tiles + */ + __device__ __forceinline__ OutputT ConsumeRange( + OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) + OffsetT block_end) ///< [in] Threadblock end offset (exclusive) + { + GridEvenShare even_share; + even_share.template BlockInit(block_offset, block_end); + + return (IsAligned(d_in + block_offset, Int2Type())) ? + ConsumeRange(even_share, Int2Type()) : + ConsumeRange(even_share, Int2Type()); + } + + + /** + * Reduce a contiguous segment of input tiles + */ + __device__ __forceinline__ OutputT ConsumeTiles( + GridEvenShare &even_share) ///< [in] GridEvenShare descriptor + { + // Initialize GRID_MAPPING_STRIP_MINE even-share descriptor for this thread block + even_share.template BlockInit(); + + return (IsAligned(d_in, Int2Type())) ? + ConsumeRange(even_share, Int2Type()) : + ConsumeRange(even_share, Int2Type()); + + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce_by_key.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce_by_key.cuh new file mode 100644 index 0000000..51964d3 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_reduce_by_key.cuh @@ -0,0 +1,547 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. + */ + +#pragma once + +#include + +#include "single_pass_scan_operators.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_scan.cuh" +#include "../block/block_discontinuity.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../iterator/constant_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentReduceByKey + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentReduceByKeyPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentReduceByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key + */ +template < + typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicy tuning policy type + typename KeysInputIteratorT, ///< Random-access input iterator type for keys + typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys + typename ValuesInputIteratorT, ///< Random-access input iterator type for values + typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values + typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of items selected + typename EqualityOpT, ///< KeyT equality operator type + typename ReductionOpT, ///< ValueT reduction operator type + typename OffsetT> ///< Signed integer type for global offsets +struct AgentReduceByKey +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // The input keys type + typedef typename std::iterator_traits::value_type KeyInputT; + + // The output keys type + typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type + + // The input values type + typedef typename std::iterator_traits::value_type ValueInputT; + + // The output values type + typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type + + // Tuple type for scanning (pairs accumulated segment-value with segment-index) + typedef KeyValuePair OffsetValuePairT; + + // Tuple type for pairing keys and values + typedef KeyValuePair KeyValuePairT; + + // Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + // Guarded inequality functor + template + struct GuardedInequalityWrapper + { + _EqualityOpT op; ///< Wrapped equality operator + int num_remaining; ///< Items remaining + + /// Constructor + __host__ __device__ __forceinline__ + GuardedInequalityWrapper(_EqualityOpT op, int num_remaining) : op(op), num_remaining(num_remaining) {} + + /// Boolean inequality operator, returns (a != b) + template + __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b, int idx) const + { + if (idx < num_remaining) + return !op(a, b); // In bounds + + // Return true if first out-of-bounds item, false otherwise + return (idx == num_remaining); + } + }; + + + // Constants + enum + { + BLOCK_THREADS = AgentReduceByKeyPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentReduceByKeyPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), + + // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) + HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), + }; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + KeysInputIteratorT>::Type // Directly use the supplied input iterator type + WrappedKeysInputIteratorT; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for values + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + ValuesInputIteratorT>::Type // Directly use the supplied input iterator type + WrappedValuesInputIteratorT; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type + WrappedFixupInputIteratorT; + + // Reduce-value-by-segment scan operator + typedef ReduceBySegmentOp ReduceBySegmentOpT; + + // Parameterized BlockLoad type for keys + typedef BlockLoad< + KeyOutputT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + AgentReduceByKeyPolicyT::LOAD_ALGORITHM> + BlockLoadKeysT; + + // Parameterized BlockLoad type for values + typedef BlockLoad< + ValueOutputT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + AgentReduceByKeyPolicyT::LOAD_ALGORITHM> + BlockLoadValuesT; + + // Parameterized BlockDiscontinuity type for keys + typedef BlockDiscontinuity< + KeyOutputT, + BLOCK_THREADS> + BlockDiscontinuityKeys; + + // Parameterized BlockScan type + typedef BlockScan< + OffsetValuePairT, + BLOCK_THREADS, + AgentReduceByKeyPolicyT::SCAN_ALGORITHM> + BlockScanT; + + // Callback type for obtaining tile prefix during block scan + typedef TilePrefixCallbackOp< + OffsetValuePairT, + ReduceBySegmentOpT, + ScanTileStateT> + TilePrefixCallbackOpT; + + // Key and value exchange types + typedef KeyOutputT KeyExchangeT[TILE_ITEMS + 1]; + typedef ValueOutputT ValueExchangeT[TILE_ITEMS + 1]; + + // Shared memory type for this thread block + union _TempStorage + { + struct + { + typename BlockScanT::TempStorage scan; // Smem needed for tile scanning + typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback + typename BlockDiscontinuityKeys::TempStorage discontinuity; // Smem needed for discontinuity detection + }; + + // Smem needed for loading keys + typename BlockLoadKeysT::TempStorage load_keys; + + // Smem needed for loading values + typename BlockLoadValuesT::TempStorage load_values; + + // Smem needed for compacting key value pairs(allows non POD items in this union) + Uninitialized raw_exchange; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + WrappedKeysInputIteratorT d_keys_in; ///< Input keys + UniqueOutputIteratorT d_unique_out; ///< Unique output keys + WrappedValuesInputIteratorT d_values_in; ///< Input values + AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates + NumRunsOutputIteratorT d_num_runs_out; ///< Output pointer for total number of segments identified + EqualityOpT equality_op; ///< KeyT equality operator + ReductionOpT reduction_op; ///< Reduction operator + ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + // Constructor + __device__ __forceinline__ + AgentReduceByKey( + TempStorage& temp_storage, ///< Reference to temp_storage + KeysInputIteratorT d_keys_in, ///< Input keys + UniqueOutputIteratorT d_unique_out, ///< Unique output keys + ValuesInputIteratorT d_values_in, ///< Input values + AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates + NumRunsOutputIteratorT d_num_runs_out, ///< Output pointer for total number of segments identified + EqualityOpT equality_op, ///< KeyT equality operator + ReductionOpT reduction_op) ///< ValueT reduction operator + : + temp_storage(temp_storage.Alias()), + d_keys_in(d_keys_in), + d_unique_out(d_unique_out), + d_values_in(d_values_in), + d_aggregates_out(d_aggregates_out), + d_num_runs_out(d_num_runs_out), + equality_op(equality_op), + reduction_op(reduction_op), + scan_op(reduction_op) + {} + + + //--------------------------------------------------------------------- + // Scatter utility methods + //--------------------------------------------------------------------- + + /** + * Directly scatter flagged items to output offsets + */ + __device__ __forceinline__ void ScatterDirect( + KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], + OffsetT (&segment_flags)[ITEMS_PER_THREAD], + OffsetT (&segment_indices)[ITEMS_PER_THREAD]) + { + // Scatter flagged keys and values + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (segment_flags[ITEM]) + { + d_unique_out[segment_indices[ITEM]] = scatter_items[ITEM].key; + d_aggregates_out[segment_indices[ITEM]] = scatter_items[ITEM].value; + } + } + } + + + /** + * 2-phase scatter flagged items to output offsets + * + * The exclusive scan causes each head flag to be paired with the previous + * value aggregate: the scatter offsets must be decremented for value aggregates + */ + __device__ __forceinline__ void ScatterTwoPhase( + KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], + OffsetT (&segment_flags)[ITEMS_PER_THREAD], + OffsetT (&segment_indices)[ITEMS_PER_THREAD], + OffsetT num_tile_segments, + OffsetT num_tile_segments_prefix) + { + CTA_SYNC(); + + // Compact and scatter pairs + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (segment_flags[ITEM]) + { + temp_storage.raw_exchange.Alias()[segment_indices[ITEM] - num_tile_segments_prefix] = scatter_items[ITEM]; + } + } + + CTA_SYNC(); + + for (int item = threadIdx.x; item < num_tile_segments; item += BLOCK_THREADS) + { + KeyValuePairT pair = temp_storage.raw_exchange.Alias()[item]; + d_unique_out[num_tile_segments_prefix + item] = pair.key; + d_aggregates_out[num_tile_segments_prefix + item] = pair.value; + } + } + + + /** + * Scatter flagged items + */ + __device__ __forceinline__ void Scatter( + KeyValuePairT (&scatter_items)[ITEMS_PER_THREAD], + OffsetT (&segment_flags)[ITEMS_PER_THREAD], + OffsetT (&segment_indices)[ITEMS_PER_THREAD], + OffsetT num_tile_segments, + OffsetT num_tile_segments_prefix) + { + // Do a one-phase scatter if (a) two-phase is disabled or (b) the average number of selected items per thread is less than one + if (TWO_PHASE_SCATTER && (num_tile_segments > BLOCK_THREADS)) + { + ScatterTwoPhase( + scatter_items, + segment_flags, + segment_indices, + num_tile_segments, + num_tile_segments_prefix); + } + else + { + ScatterDirect( + scatter_items, + segment_flags, + segment_indices); + } + } + + + //--------------------------------------------------------------------- + // Cooperatively scan a device-wide sequence of tiles with other CTAs + //--------------------------------------------------------------------- + + /** + * Process a tile of input (dynamic chained scan) + */ + template ///< Whether the current tile is the last tile + __device__ __forceinline__ void ConsumeTile( + OffsetT num_remaining, ///< Number of global input items remaining (including this tile) + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + KeyOutputT keys[ITEMS_PER_THREAD]; // Tile keys + KeyOutputT prev_keys[ITEMS_PER_THREAD]; // Tile keys shuffled up + ValueOutputT values[ITEMS_PER_THREAD]; // Tile values + OffsetT head_flags[ITEMS_PER_THREAD]; // Segment head flags + OffsetT segment_indices[ITEMS_PER_THREAD]; // Segment indices + OffsetValuePairT scan_items[ITEMS_PER_THREAD]; // Zipped values and segment flags|indices + KeyValuePairT scatter_items[ITEMS_PER_THREAD]; // Zipped key value pairs for scattering + + // Load keys + if (IS_LAST_TILE) + BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys, num_remaining); + else + BlockLoadKeysT(temp_storage.load_keys).Load(d_keys_in + tile_offset, keys); + + // Load tile predecessor key in first thread + KeyOutputT tile_predecessor; + if (threadIdx.x == 0) + { + tile_predecessor = (tile_idx == 0) ? + keys[0] : // First tile gets repeat of first item (thus first item will not be flagged as a head) + d_keys_in[tile_offset - 1]; // Subsequent tiles get last key from previous tile + } + + CTA_SYNC(); + + // Load values + if (IS_LAST_TILE) + BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values, num_remaining); + else + BlockLoadValuesT(temp_storage.load_values).Load(d_values_in + tile_offset, values); + + CTA_SYNC(); + + // Initialize head-flags and shuffle up the previous keys + if (IS_LAST_TILE) + { + // Use custom flag operator to additionally flag the first out-of-bounds item + GuardedInequalityWrapper flag_op(equality_op, num_remaining); + BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( + head_flags, keys, prev_keys, flag_op, tile_predecessor); + } + else + { + InequalityWrapper flag_op(equality_op); + BlockDiscontinuityKeys(temp_storage.discontinuity).FlagHeads( + head_flags, keys, prev_keys, flag_op, tile_predecessor); + } + + // Zip values and head flags + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + scan_items[ITEM].value = values[ITEM]; + scan_items[ITEM].key = head_flags[ITEM]; + } + + // Perform exclusive tile scan + OffsetValuePairT block_aggregate; // Inclusive block-wide scan aggregate + OffsetT num_segments_prefix; // Number of segments prior to this tile + OffsetValuePairT total_aggregate; // The tile prefix folded with block_aggregate + if (tile_idx == 0) + { + // Scan first tile + BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, block_aggregate); + num_segments_prefix = 0; + total_aggregate = block_aggregate; + + // Update tile status if there are successor tiles + if ((!IS_LAST_TILE) && (threadIdx.x == 0)) + tile_state.SetInclusive(0, block_aggregate); + } + else + { + // Scan non-first tile + TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); + BlockScanT(temp_storage.scan).ExclusiveScan(scan_items, scan_items, scan_op, prefix_op); + + block_aggregate = prefix_op.GetBlockAggregate(); + num_segments_prefix = prefix_op.GetExclusivePrefix().key; + total_aggregate = prefix_op.GetInclusivePrefix(); + } + + // Rezip scatter items and segment indices + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + scatter_items[ITEM].key = prev_keys[ITEM]; + scatter_items[ITEM].value = scan_items[ITEM].value; + segment_indices[ITEM] = scan_items[ITEM].key; + } + + // At this point, each flagged segment head has: + // - The key for the previous segment + // - The reduced value from the previous segment + // - The segment index for the reduced value + + // Scatter flagged keys and values + OffsetT num_tile_segments = block_aggregate.key; + Scatter(scatter_items, head_flags, segment_indices, num_tile_segments, num_segments_prefix); + + // Last thread in last tile will output final count (and last pair, if necessary) + if ((IS_LAST_TILE) && (threadIdx.x == BLOCK_THREADS - 1)) + { + OffsetT num_segments = num_segments_prefix + num_tile_segments; + + // If the last tile is a whole tile, output the final_value + if (num_remaining == TILE_ITEMS) + { + d_unique_out[num_segments] = keys[ITEMS_PER_THREAD - 1]; + d_aggregates_out[num_segments] = total_aggregate.value; + num_segments++; + } + + // Output the total number of items selected + *d_num_runs_out = num_segments; + } + } + + + /** + * Scan tiles of items as part of a dynamic chained scan + */ + __device__ __forceinline__ void ConsumeRange( + int num_items, ///< Total number of input items + ScanTileStateT& tile_state, ///< Global tile state descriptor + int start_tile) ///< The starting tile for the current grid + { + // Blocks are launched in increasing order, so just assign one tile per block + int tile_idx = start_tile + blockIdx.x; // Current tile index + OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile + OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) + + if (num_remaining > TILE_ITEMS) + { + // Not last tile + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); + } + else if (num_remaining > 0) + { + // Last tile + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); + } + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_rle.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_rle.cuh new file mode 100644 index 0000000..cb7a4a6 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_rle.cuh @@ -0,0 +1,837 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode. + */ + +#pragma once + +#include + +#include "single_pass_scan_operators.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_scan.cuh" +#include "../block/block_exchange.cuh" +#include "../block/block_discontinuity.cuh" +#include "../grid/grid_queue.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../iterator/constant_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentRle + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + bool _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentRlePolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + STORE_WARP_TIME_SLICING = _STORE_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + + + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentRle implements a stateful abstraction of CUDA thread blocks for participating in device-wide run-length-encode + */ +template < + typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type + typename InputIteratorT, ///< Random-access input iterator type for data + typename OffsetsOutputIteratorT, ///< Random-access output iterator type for offset values + typename LengthsOutputIteratorT, ///< Random-access output iterator type for length values + typename EqualityOpT, ///< T equality operator type + typename OffsetT> ///< Signed integer type for global offsets +struct AgentRle +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// The input value type + typedef typename std::iterator_traits::value_type T; + + /// The lengths output value type + typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? + OffsetT, // ... then the OffsetT type, + typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type + + /// Tuple type for scanning (pairs run-length and run-index) + typedef KeyValuePair LengthOffsetPair; + + /// Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + // Constants + enum + { + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + BLOCK_THREADS = AgentRlePolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentRlePolicyT::ITEMS_PER_THREAD, + WARP_ITEMS = WARP_THREADS * ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + /// Whether or not to sync after loading data + SYNC_AFTER_LOAD = (AgentRlePolicyT::LOAD_ALGORITHM != BLOCK_LOAD_DIRECT), + + /// Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any store-related data transpositions (versus each warp having its own storage) + STORE_WARP_TIME_SLICING = AgentRlePolicyT::STORE_WARP_TIME_SLICING, + ACTIVE_EXCHANGE_WARPS = (STORE_WARP_TIME_SLICING) ? 1 : WARPS, + }; + + + /** + * Special operator that signals all out-of-bounds items are not equal to everything else, + * forcing both (1) the last item to be tail-flagged and (2) all oob items to be marked + * trivial. + */ + template + struct OobInequalityOp + { + OffsetT num_remaining; + EqualityOpT equality_op; + + __device__ __forceinline__ OobInequalityOp( + OffsetT num_remaining, + EqualityOpT equality_op) + : + num_remaining(num_remaining), + equality_op(equality_op) + {} + + template + __host__ __device__ __forceinline__ bool operator()(T first, T second, Index idx) + { + if (!LAST_TILE || (idx < num_remaining)) + return !equality_op(first, second); + else + return true; + } + }; + + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for data + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedVLengthnputIterator + InputIteratorT>::Type // Directly use the supplied input iterator type + WrappedInputIteratorT; + + // Parameterized BlockLoad type for data + typedef BlockLoad< + T, + AgentRlePolicyT::BLOCK_THREADS, + AgentRlePolicyT::ITEMS_PER_THREAD, + AgentRlePolicyT::LOAD_ALGORITHM> + BlockLoadT; + + // Parameterized BlockDiscontinuity type for data + typedef BlockDiscontinuity BlockDiscontinuityT; + + // Parameterized WarpScan type + typedef WarpScan WarpScanPairs; + + // Reduce-length-by-run scan operator + typedef ReduceBySegmentOp ReduceBySegmentOpT; + + // Callback type for obtaining tile prefix during block scan + typedef TilePrefixCallbackOp< + LengthOffsetPair, + ReduceBySegmentOpT, + ScanTileStateT> + TilePrefixCallbackOpT; + + // Warp exchange types + typedef WarpExchange WarpExchangePairs; + + typedef typename If::Type WarpExchangePairsStorage; + + typedef WarpExchange WarpExchangeOffsets; + typedef WarpExchange WarpExchangeLengths; + + typedef LengthOffsetPair WarpAggregates[WARPS]; + + // Shared memory type for this thread block + struct _TempStorage + { + // Aliasable storage layout + union Aliasable + { + struct + { + typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection + typename WarpScanPairs::TempStorage warp_scan[WARPS]; // Smem needed for warp-synchronous scans + Uninitialized warp_aggregates; // Smem needed for sharing warp-wide aggregates + typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback + }; + + // Smem needed for input loading + typename BlockLoadT::TempStorage load; + + // Aliasable layout needed for two-phase scatter + union ScatterAliasable + { + unsigned long long align; + WarpExchangePairsStorage exchange_pairs[ACTIVE_EXCHANGE_WARPS]; + typename WarpExchangeOffsets::TempStorage exchange_offsets[ACTIVE_EXCHANGE_WARPS]; + typename WarpExchangeLengths::TempStorage exchange_lengths[ACTIVE_EXCHANGE_WARPS]; + + } scatter_aliasable; + + } aliasable; + + OffsetT tile_idx; // Shared tile index + LengthOffsetPair tile_inclusive; // Inclusive tile prefix + LengthOffsetPair tile_exclusive; // Exclusive tile prefix + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + + WrappedInputIteratorT d_in; ///< Pointer to input sequence of data items + OffsetsOutputIteratorT d_offsets_out; ///< Input run offsets + LengthsOutputIteratorT d_lengths_out; ///< Output run lengths + + EqualityOpT equality_op; ///< T equality operator + ReduceBySegmentOpT scan_op; ///< Reduce-length-by-flag scan operator + OffsetT num_items; ///< Total number of input items + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + // Constructor + __device__ __forceinline__ + AgentRle( + TempStorage &temp_storage, ///< [in] Reference to temp_storage + InputIteratorT d_in, ///< [in] Pointer to input sequence of data items + OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run offsets + LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run lengths + EqualityOpT equality_op, ///< [in] T equality operator + OffsetT num_items) ///< [in] Total number of input items + : + temp_storage(temp_storage.Alias()), + d_in(d_in), + d_offsets_out(d_offsets_out), + d_lengths_out(d_lengths_out), + equality_op(equality_op), + scan_op(cub::Sum()), + num_items(num_items) + {} + + + //--------------------------------------------------------------------- + // Utility methods for initializing the selections + //--------------------------------------------------------------------- + + template + __device__ __forceinline__ void InitializeSelections( + OffsetT tile_offset, + OffsetT num_remaining, + T (&items)[ITEMS_PER_THREAD], + LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) + { + bool head_flags[ITEMS_PER_THREAD]; + bool tail_flags[ITEMS_PER_THREAD]; + + OobInequalityOp inequality_op(num_remaining, equality_op); + + if (FIRST_TILE && LAST_TILE) + { + // First-and-last-tile always head-flags the first item and tail-flags the last item + + BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( + head_flags, tail_flags, items, inequality_op); + } + else if (FIRST_TILE) + { + // First-tile always head-flags the first item + + // Get the first item from the next tile + T tile_successor_item; + if (threadIdx.x == BLOCK_THREADS - 1) + tile_successor_item = d_in[tile_offset + TILE_ITEMS]; + + BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( + head_flags, tail_flags, tile_successor_item, items, inequality_op); + } + else if (LAST_TILE) + { + // Last-tile always flags the last item + + // Get the last item from the previous tile + T tile_predecessor_item; + if (threadIdx.x == 0) + tile_predecessor_item = d_in[tile_offset - 1]; + + BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( + head_flags, tile_predecessor_item, tail_flags, items, inequality_op); + } + else + { + // Get the first item from the next tile + T tile_successor_item; + if (threadIdx.x == BLOCK_THREADS - 1) + tile_successor_item = d_in[tile_offset + TILE_ITEMS]; + + // Get the last item from the previous tile + T tile_predecessor_item; + if (threadIdx.x == 0) + tile_predecessor_item = d_in[tile_offset - 1]; + + BlockDiscontinuityT(temp_storage.aliasable.discontinuity).FlagHeadsAndTails( + head_flags, tile_predecessor_item, tail_flags, tile_successor_item, items, inequality_op); + } + + // Zip counts and runs + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + lengths_and_num_runs[ITEM].key = head_flags[ITEM] && (!tail_flags[ITEM]); + lengths_and_num_runs[ITEM].value = ((!head_flags[ITEM]) || (!tail_flags[ITEM])); + } + } + + //--------------------------------------------------------------------- + // Scan utility methods + //--------------------------------------------------------------------- + + /** + * Scan of allocations + */ + __device__ __forceinline__ void WarpScanAllocations( + LengthOffsetPair &tile_aggregate, + LengthOffsetPair &warp_aggregate, + LengthOffsetPair &warp_exclusive_in_tile, + LengthOffsetPair &thread_exclusive_in_warp, + LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD]) + { + // Perform warpscans + unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); + int lane_id = LaneId(); + + LengthOffsetPair identity; + identity.key = 0; + identity.value = 0; + + LengthOffsetPair thread_inclusive; + LengthOffsetPair thread_aggregate = internal::ThreadReduce(lengths_and_num_runs, scan_op); + WarpScanPairs(temp_storage.aliasable.warp_scan[warp_id]).Scan( + thread_aggregate, + thread_inclusive, + thread_exclusive_in_warp, + identity, + scan_op); + + // Last lane in each warp shares its warp-aggregate + if (lane_id == WARP_THREADS - 1) + temp_storage.aliasable.warp_aggregates.Alias()[warp_id] = thread_inclusive; + + CTA_SYNC(); + + // Accumulate total selected and the warp-wide prefix + warp_exclusive_in_tile = identity; + warp_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[warp_id]; + tile_aggregate = temp_storage.aliasable.warp_aggregates.Alias()[0]; + + #pragma unroll + for (int WARP = 1; WARP < WARPS; ++WARP) + { + if (warp_id == WARP) + warp_exclusive_in_tile = tile_aggregate; + + tile_aggregate = scan_op(tile_aggregate, temp_storage.aliasable.warp_aggregates.Alias()[WARP]); + } + } + + + //--------------------------------------------------------------------- + // Utility methods for scattering selections + //--------------------------------------------------------------------- + + /** + * Two-phase scatter, specialized for warp time-slicing + */ + template + __device__ __forceinline__ void ScatterTwoPhase( + OffsetT tile_num_runs_exclusive_in_global, + OffsetT warp_num_runs_aggregate, + OffsetT warp_num_runs_exclusive_in_tile, + OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], + LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], + Int2Type is_warp_time_slice) + { + unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); + int lane_id = LaneId(); + + // Locally compact items within the warp (first warp) + if (warp_id == 0) + { + WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( + lengths_and_offsets, thread_num_runs_exclusive_in_warp); + } + + // Locally compact items within the warp (remaining warps) + #pragma unroll + for (int SLICE = 1; SLICE < WARPS; ++SLICE) + { + CTA_SYNC(); + + if (warp_id == SLICE) + { + WarpExchangePairs(temp_storage.aliasable.scatter_aliasable.exchange_pairs[0]).ScatterToStriped( + lengths_and_offsets, thread_num_runs_exclusive_in_warp); + } + } + + // Global scatter + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if ((ITEM * WARP_THREADS) < warp_num_runs_aggregate - lane_id) + { + OffsetT item_offset = + tile_num_runs_exclusive_in_global + + warp_num_runs_exclusive_in_tile + + (ITEM * WARP_THREADS) + lane_id; + + // Scatter offset + d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; + + // Scatter length if not the first (global) length + if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) + { + d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; + } + } + } + } + + + /** + * Two-phase scatter + */ + template + __device__ __forceinline__ void ScatterTwoPhase( + OffsetT tile_num_runs_exclusive_in_global, + OffsetT warp_num_runs_aggregate, + OffsetT warp_num_runs_exclusive_in_tile, + OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], + LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD], + Int2Type is_warp_time_slice) + { + unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); + int lane_id = LaneId(); + + // Unzip + OffsetT run_offsets[ITEMS_PER_THREAD]; + LengthT run_lengths[ITEMS_PER_THREAD]; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + run_offsets[ITEM] = lengths_and_offsets[ITEM].key; + run_lengths[ITEM] = lengths_and_offsets[ITEM].value; + } + + WarpExchangeOffsets(temp_storage.aliasable.scatter_aliasable.exchange_offsets[warp_id]).ScatterToStriped( + run_offsets, thread_num_runs_exclusive_in_warp); + + WARP_SYNC(0xffffffff); + + WarpExchangeLengths(temp_storage.aliasable.scatter_aliasable.exchange_lengths[warp_id]).ScatterToStriped( + run_lengths, thread_num_runs_exclusive_in_warp); + + // Global scatter + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if ((ITEM * WARP_THREADS) + lane_id < warp_num_runs_aggregate) + { + OffsetT item_offset = + tile_num_runs_exclusive_in_global + + warp_num_runs_exclusive_in_tile + + (ITEM * WARP_THREADS) + lane_id; + + // Scatter offset + d_offsets_out[item_offset] = run_offsets[ITEM]; + + // Scatter length if not the first (global) length + if ((!FIRST_TILE) || (ITEM != 0) || (threadIdx.x > 0)) + { + d_lengths_out[item_offset - 1] = run_lengths[ITEM]; + } + } + } + } + + + /** + * Direct scatter + */ + template + __device__ __forceinline__ void ScatterDirect( + OffsetT tile_num_runs_exclusive_in_global, + OffsetT warp_num_runs_aggregate, + OffsetT warp_num_runs_exclusive_in_tile, + OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], + LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (thread_num_runs_exclusive_in_warp[ITEM] < warp_num_runs_aggregate) + { + OffsetT item_offset = + tile_num_runs_exclusive_in_global + + warp_num_runs_exclusive_in_tile + + thread_num_runs_exclusive_in_warp[ITEM]; + + // Scatter offset + d_offsets_out[item_offset] = lengths_and_offsets[ITEM].key; + + // Scatter length if not the first (global) length + if (item_offset >= 1) + { + d_lengths_out[item_offset - 1] = lengths_and_offsets[ITEM].value; + } + } + } + } + + + /** + * Scatter + */ + template + __device__ __forceinline__ void Scatter( + OffsetT tile_num_runs_aggregate, + OffsetT tile_num_runs_exclusive_in_global, + OffsetT warp_num_runs_aggregate, + OffsetT warp_num_runs_exclusive_in_tile, + OffsetT (&thread_num_runs_exclusive_in_warp)[ITEMS_PER_THREAD], + LengthOffsetPair (&lengths_and_offsets)[ITEMS_PER_THREAD]) + { + if ((ITEMS_PER_THREAD == 1) || (tile_num_runs_aggregate < BLOCK_THREADS)) + { + // Direct scatter if the warp has any items + if (warp_num_runs_aggregate) + { + ScatterDirect( + tile_num_runs_exclusive_in_global, + warp_num_runs_aggregate, + warp_num_runs_exclusive_in_tile, + thread_num_runs_exclusive_in_warp, + lengths_and_offsets); + } + } + else + { + // Scatter two phase + ScatterTwoPhase( + tile_num_runs_exclusive_in_global, + warp_num_runs_aggregate, + warp_num_runs_exclusive_in_tile, + thread_num_runs_exclusive_in_warp, + lengths_and_offsets, + Int2Type()); + } + } + + + + //--------------------------------------------------------------------- + // Cooperatively scan a device-wide sequence of tiles with other CTAs + //--------------------------------------------------------------------- + + /** + * Process a tile of input (dynamic chained scan) + */ + template < + bool LAST_TILE> + __device__ __forceinline__ LengthOffsetPair ConsumeTile( + OffsetT num_items, ///< Total number of global input items + OffsetT num_remaining, ///< Number of global input items remaining (including this tile) + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT &tile_status) ///< Global list of tile status + { + if (tile_idx == 0) + { + // First tile + + // Load items + T items[ITEMS_PER_THREAD]; + if (LAST_TILE) + BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); + else + BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); + + if (SYNC_AFTER_LOAD) + CTA_SYNC(); + + // Set flags + LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; + + InitializeSelections( + tile_offset, + num_remaining, + items, + lengths_and_num_runs); + + // Exclusive scan of lengths and runs + LengthOffsetPair tile_aggregate; + LengthOffsetPair warp_aggregate; + LengthOffsetPair warp_exclusive_in_tile; + LengthOffsetPair thread_exclusive_in_warp; + + WarpScanAllocations( + tile_aggregate, + warp_aggregate, + warp_exclusive_in_tile, + thread_exclusive_in_warp, + lengths_and_num_runs); + + // Update tile status if this is not the last tile + if (!LAST_TILE && (threadIdx.x == 0)) + tile_status.SetInclusive(0, tile_aggregate); + + // Update thread_exclusive_in_warp to fold in warp run-length + if (thread_exclusive_in_warp.key == 0) + thread_exclusive_in_warp.value += warp_exclusive_in_tile.value; + + LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; + OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; + LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; + + // Downsweep scan through lengths_and_num_runs + internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); + + // Zip + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; + lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; + thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? + lengths_and_num_runs2[ITEM].key : // keep + WARP_THREADS * ITEMS_PER_THREAD; // discard + } + + OffsetT tile_num_runs_aggregate = tile_aggregate.key; + OffsetT tile_num_runs_exclusive_in_global = 0; + OffsetT warp_num_runs_aggregate = warp_aggregate.key; + OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; + + // Scatter + Scatter( + tile_num_runs_aggregate, + tile_num_runs_exclusive_in_global, + warp_num_runs_aggregate, + warp_num_runs_exclusive_in_tile, + thread_num_runs_exclusive_in_warp, + lengths_and_offsets); + + // Return running total (inclusive of this tile) + return tile_aggregate; + } + else + { + // Not first tile + + // Load items + T items[ITEMS_PER_THREAD]; + if (LAST_TILE) + BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items, num_remaining, T()); + else + BlockLoadT(temp_storage.aliasable.load).Load(d_in + tile_offset, items); + + if (SYNC_AFTER_LOAD) + CTA_SYNC(); + + // Set flags + LengthOffsetPair lengths_and_num_runs[ITEMS_PER_THREAD]; + + InitializeSelections( + tile_offset, + num_remaining, + items, + lengths_and_num_runs); + + // Exclusive scan of lengths and runs + LengthOffsetPair tile_aggregate; + LengthOffsetPair warp_aggregate; + LengthOffsetPair warp_exclusive_in_tile; + LengthOffsetPair thread_exclusive_in_warp; + + WarpScanAllocations( + tile_aggregate, + warp_aggregate, + warp_exclusive_in_tile, + thread_exclusive_in_warp, + lengths_and_num_runs); + + // First warp computes tile prefix in lane 0 + TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.aliasable.prefix, Sum(), tile_idx); + unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS); + if (warp_id == 0) + { + prefix_op(tile_aggregate); + if (threadIdx.x == 0) + temp_storage.tile_exclusive = prefix_op.exclusive_prefix; + } + + CTA_SYNC(); + + LengthOffsetPair tile_exclusive_in_global = temp_storage.tile_exclusive; + + // Update thread_exclusive_in_warp to fold in warp and tile run-lengths + LengthOffsetPair thread_exclusive = scan_op(tile_exclusive_in_global, warp_exclusive_in_tile); + if (thread_exclusive_in_warp.key == 0) + thread_exclusive_in_warp.value += thread_exclusive.value; + + // Downsweep scan through lengths_and_num_runs + LengthOffsetPair lengths_and_num_runs2[ITEMS_PER_THREAD]; + LengthOffsetPair lengths_and_offsets[ITEMS_PER_THREAD]; + OffsetT thread_num_runs_exclusive_in_warp[ITEMS_PER_THREAD]; + + internal::ThreadScanExclusive(lengths_and_num_runs, lengths_and_num_runs2, scan_op, thread_exclusive_in_warp); + + // Zip + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + lengths_and_offsets[ITEM].value = lengths_and_num_runs2[ITEM].value; + lengths_and_offsets[ITEM].key = tile_offset + (threadIdx.x * ITEMS_PER_THREAD) + ITEM; + thread_num_runs_exclusive_in_warp[ITEM] = (lengths_and_num_runs[ITEM].key) ? + lengths_and_num_runs2[ITEM].key : // keep + WARP_THREADS * ITEMS_PER_THREAD; // discard + } + + OffsetT tile_num_runs_aggregate = tile_aggregate.key; + OffsetT tile_num_runs_exclusive_in_global = tile_exclusive_in_global.key; + OffsetT warp_num_runs_aggregate = warp_aggregate.key; + OffsetT warp_num_runs_exclusive_in_tile = warp_exclusive_in_tile.key; + + // Scatter + Scatter( + tile_num_runs_aggregate, + tile_num_runs_exclusive_in_global, + warp_num_runs_aggregate, + warp_num_runs_exclusive_in_tile, + thread_num_runs_exclusive_in_warp, + lengths_and_offsets); + + // Return running total (inclusive of this tile) + return prefix_op.inclusive_prefix; + } + } + + + /** + * Scan tiles of items as part of a dynamic chained scan + */ + template ///< Output iterator type for recording number of items selected + __device__ __forceinline__ void ConsumeRange( + int num_tiles, ///< Total number of input tiles + ScanTileStateT& tile_status, ///< Global list of tile status + NumRunsIteratorT d_num_runs_out) ///< Output pointer for total number of runs identified + { + // Blocks are launched in increasing order, so just assign one tile per block + int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index + OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile + OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) + + if (tile_idx < num_tiles - 1) + { + // Not the last tile (full) + ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); + } + else if (num_remaining > 0) + { + // The last tile (possibly partially-full) + LengthOffsetPair running_total = ConsumeTile(num_items, num_remaining, tile_idx, tile_offset, tile_status); + + if (threadIdx.x == 0) + { + // Output the total number of items selected + *d_num_runs_out = running_total.key; + + // The inclusive prefix contains accumulated length reduction for the last run + if (running_total.key > 0) + d_lengths_out[running_total.key - 1] = running_total.value; + } + } + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_scan.cuh new file mode 100644 index 0000000..9368615 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_scan.cuh @@ -0,0 +1,471 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . + */ + +#pragma once + +#include + +#include "single_pass_scan_operators.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_scan.cuh" +#include "../grid/grid_queue.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentScan + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + BlockStoreAlgorithm _STORE_ALGORITHM, ///< The BlockStore algorithm to use + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentScanPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockStoreAlgorithm STORE_ALGORITHM = _STORE_ALGORITHM; ///< The BlockStore algorithm to use + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentScan implements a stateful abstraction of CUDA thread blocks for participating in device-wide prefix scan . + */ +template < + typename AgentScanPolicyT, ///< Parameterized AgentScanPolicyT tuning policy type + typename InputIteratorT, ///< Random-access input iterator type + typename OutputIteratorT, ///< Random-access output iterator type + typename ScanOpT, ///< Scan functor type + typename InitValueT, ///< The init_value element for ScanOpT type (cub::NullType for inclusive scan) + typename OffsetT> ///< Signed integer type for global offsets +struct AgentScan +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Tile status descriptor interface type + typedef ScanTileState ScanTileStateT; + + // Input iterator wrapper type (for applying cache modifier) + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + InputIteratorT>::Type // Directly use the supplied input iterator type + WrappedInputIteratorT; + + // Constants + enum + { + IS_INCLUSIVE = Equals::VALUE, // Inclusive scan if no init_value type is provided + BLOCK_THREADS = AgentScanPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentScanPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + // Parameterized BlockLoad type + typedef BlockLoad< + OutputT, + AgentScanPolicyT::BLOCK_THREADS, + AgentScanPolicyT::ITEMS_PER_THREAD, + AgentScanPolicyT::LOAD_ALGORITHM> + BlockLoadT; + + // Parameterized BlockStore type + typedef BlockStore< + OutputT, + AgentScanPolicyT::BLOCK_THREADS, + AgentScanPolicyT::ITEMS_PER_THREAD, + AgentScanPolicyT::STORE_ALGORITHM> + BlockStoreT; + + // Parameterized BlockScan type + typedef BlockScan< + OutputT, + AgentScanPolicyT::BLOCK_THREADS, + AgentScanPolicyT::SCAN_ALGORITHM> + BlockScanT; + + // Callback type for obtaining tile prefix during block scan + typedef TilePrefixCallbackOp< + OutputT, + ScanOpT, + ScanTileStateT> + TilePrefixCallbackOpT; + + // Stateful BlockScan prefix callback type for managing a running total while scanning consecutive tiles + typedef BlockScanRunningPrefixOp< + OutputT, + ScanOpT> + RunningPrefixCallbackOp; + + // Shared memory type for this thread block + union _TempStorage + { + typename BlockLoadT::TempStorage load; // Smem needed for tile loading + typename BlockStoreT::TempStorage store; // Smem needed for tile storing + + struct + { + typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback + typename BlockScanT::TempStorage scan; // Smem needed for tile scanning + }; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + WrappedInputIteratorT d_in; ///< Input data + OutputIteratorT d_out; ///< Output data + ScanOpT scan_op; ///< Binary scan operator + InitValueT init_value; ///< The init_value element for ScanOpT + + + //--------------------------------------------------------------------- + // Block scan utility methods + //--------------------------------------------------------------------- + + /** + * Exclusive scan specialization (first tile) + */ + __device__ __forceinline__ + void ScanTile( + OutputT (&items)[ITEMS_PER_THREAD], + OutputT init_value, + ScanOpT scan_op, + OutputT &block_aggregate, + Int2Type /*is_inclusive*/) + { + BlockScanT(temp_storage.scan).ExclusiveScan(items, items, init_value, scan_op, block_aggregate); + block_aggregate = scan_op(init_value, block_aggregate); + } + + + /** + * Inclusive scan specialization (first tile) + */ + __device__ __forceinline__ + void ScanTile( + OutputT (&items)[ITEMS_PER_THREAD], + InitValueT /*init_value*/, + ScanOpT scan_op, + OutputT &block_aggregate, + Int2Type /*is_inclusive*/) + { + BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, block_aggregate); + } + + + /** + * Exclusive scan specialization (subsequent tiles) + */ + template + __device__ __forceinline__ + void ScanTile( + OutputT (&items)[ITEMS_PER_THREAD], + ScanOpT scan_op, + PrefixCallback &prefix_op, + Int2Type /*is_inclusive*/) + { + BlockScanT(temp_storage.scan).ExclusiveScan(items, items, scan_op, prefix_op); + } + + + /** + * Inclusive scan specialization (subsequent tiles) + */ + template + __device__ __forceinline__ + void ScanTile( + OutputT (&items)[ITEMS_PER_THREAD], + ScanOpT scan_op, + PrefixCallback &prefix_op, + Int2Type /*is_inclusive*/) + { + BlockScanT(temp_storage.scan).InclusiveScan(items, items, scan_op, prefix_op); + } + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + // Constructor + __device__ __forceinline__ + AgentScan( + TempStorage& temp_storage, ///< Reference to temp_storage + InputIteratorT d_in, ///< Input data + OutputIteratorT d_out, ///< Output data + ScanOpT scan_op, ///< Binary scan operator + InitValueT init_value) ///< Initial value to seed the exclusive scan + : + temp_storage(temp_storage.Alias()), + d_in(d_in), + d_out(d_out), + scan_op(scan_op), + init_value(init_value) + {} + + + //--------------------------------------------------------------------- + // Cooperatively scan a device-wide sequence of tiles with other CTAs + //--------------------------------------------------------------------- + + /** + * Process a tile of input (dynamic chained scan) + */ + template ///< Whether the current tile is the last tile + __device__ __forceinline__ void ConsumeTile( + OffsetT num_remaining, ///< Number of global input items remaining (including this tile) + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + // Load items + OutputT items[ITEMS_PER_THREAD]; + + if (IS_LAST_TILE) + BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, num_remaining); + else + BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); + + CTA_SYNC(); + + // Perform tile scan + if (tile_idx == 0) + { + // Scan first tile + OutputT block_aggregate; + ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); + if ((!IS_LAST_TILE) && (threadIdx.x == 0)) + tile_state.SetInclusive(0, block_aggregate); + } + else + { + // Scan non-first tile + TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); + ScanTile(items, scan_op, prefix_op, Int2Type()); + } + + CTA_SYNC(); + + // Store items + if (IS_LAST_TILE) + BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, num_remaining); + else + BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); + } + + + /** + * Scan tiles of items as part of a dynamic chained scan + */ + __device__ __forceinline__ void ConsumeRange( + int num_items, ///< Total number of input items + ScanTileStateT& tile_state, ///< Global tile state descriptor + int start_tile) ///< The starting tile for the current grid + { + // Blocks are launched in increasing order, so just assign one tile per block + int tile_idx = start_tile + blockIdx.x; // Current tile index + OffsetT tile_offset = OffsetT(TILE_ITEMS) * tile_idx; // Global offset for the current tile + OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) + + if (num_remaining > TILE_ITEMS) + { + // Not last tile + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); + } + else if (num_remaining > 0) + { + // Last tile + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); + } + } + + + //--------------------------------------------------------------------- + // Scan an sequence of consecutive tiles (independent of other thread blocks) + //--------------------------------------------------------------------- + + /** + * Process a tile of input + */ + template < + bool IS_FIRST_TILE, + bool IS_LAST_TILE> + __device__ __forceinline__ void ConsumeTile( + OffsetT tile_offset, ///< Tile offset + RunningPrefixCallbackOp& prefix_op, ///< Running prefix operator + int valid_items = TILE_ITEMS) ///< Number of valid items in the tile + { + // Load items + OutputT items[ITEMS_PER_THREAD]; + + if (IS_LAST_TILE) + BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items, valid_items); + else + BlockLoadT(temp_storage.load).Load(d_in + tile_offset, items); + + CTA_SYNC(); + + // Block scan + if (IS_FIRST_TILE) + { + OutputT block_aggregate; + ScanTile(items, init_value, scan_op, block_aggregate, Int2Type()); + prefix_op.running_total = block_aggregate; + } + else + { + ScanTile(items, scan_op, prefix_op, Int2Type()); + } + + CTA_SYNC(); + + // Store items + if (IS_LAST_TILE) + BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items, valid_items); + else + BlockStoreT(temp_storage.store).Store(d_out + tile_offset, items); + } + + + /** + * Scan a consecutive share of input tiles + */ + __device__ __forceinline__ void ConsumeRange( + OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) + OffsetT range_end) ///< [in] Threadblock end offset (exclusive) + { + BlockScanRunningPrefixOp prefix_op(scan_op); + + if (range_offset + TILE_ITEMS <= range_end) + { + // Consume first tile of input (full) + ConsumeTile(range_offset, prefix_op); + range_offset += TILE_ITEMS; + + // Consume subsequent full tiles of input + while (range_offset + TILE_ITEMS <= range_end) + { + ConsumeTile(range_offset, prefix_op); + range_offset += TILE_ITEMS; + } + + // Consume a partially-full tile + if (range_offset < range_end) + { + int valid_items = range_end - range_offset; + ConsumeTile(range_offset, prefix_op, valid_items); + } + } + else + { + // Consume the first tile of input (partially-full) + int valid_items = range_end - range_offset; + ConsumeTile(range_offset, prefix_op, valid_items); + } + } + + + /** + * Scan a consecutive share of input tiles, seeded with the specified prefix value + */ + __device__ __forceinline__ void ConsumeRange( + OffsetT range_offset, ///< [in] Threadblock begin offset (inclusive) + OffsetT range_end, ///< [in] Threadblock end offset (exclusive) + OutputT prefix) ///< [in] The prefix to apply to the scan segment + { + BlockScanRunningPrefixOp prefix_op(prefix, scan_op); + + // Consume full tiles of input + while (range_offset + TILE_ITEMS <= range_end) + { + ConsumeTile(range_offset, prefix_op); + range_offset += TILE_ITEMS; + } + + // Consume a partially-full tile + if (range_offset < range_end) + { + int valid_items = range_end - range_offset; + ConsumeTile(range_offset, prefix_op, valid_items); + } + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_segment_fixup.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_segment_fixup.cuh new file mode 100644 index 0000000..e2de58e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_segment_fixup.cuh @@ -0,0 +1,375 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. + */ + +#pragma once + +#include + +#include "single_pass_scan_operators.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_scan.cuh" +#include "../block/block_discontinuity.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../iterator/constant_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentSegmentFixup + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentSegmentFixupPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +/** + * \brief AgentSegmentFixup implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key + */ +template < + typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type + typename PairsInputIteratorT, ///< Random-access input iterator type for keys + typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values + typename EqualityOpT, ///< KeyT equality operator type + typename ReductionOpT, ///< ValueT reduction operator type + typename OffsetT> ///< Signed integer type for global offsets +struct AgentSegmentFixup +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // Data type of key-value input iterator + typedef typename std::iterator_traits::value_type KeyValuePairT; + + // Value type + typedef typename KeyValuePairT::Value ValueT; + + // Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + // Constants + enum + { + BLOCK_THREADS = AgentSegmentFixupPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentSegmentFixupPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + + // Whether or not do fixup using RLE + global atomics + USE_ATOMIC_FIXUP = (CUB_PTX_ARCH >= 350) && + (Equals::VALUE || + Equals::VALUE || + Equals::VALUE || + Equals::VALUE), + + // Whether or not the scan operation has a zero-valued identity value (true if we're performing addition on a primitive type) + HAS_IDENTITY_ZERO = (Equals::VALUE) && (Traits::PRIMITIVE), + }; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for keys + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + PairsInputIteratorT>::Type // Directly use the supplied input iterator type + WrappedPairsInputIteratorT; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for fixup values + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + AggregatesOutputIteratorT>::Type // Directly use the supplied input iterator type + WrappedFixupInputIteratorT; + + // Reduce-value-by-segment scan operator + typedef ReduceByKeyOp ReduceBySegmentOpT; + + // Parameterized BlockLoad type for pairs + typedef BlockLoad< + KeyValuePairT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + AgentSegmentFixupPolicyT::LOAD_ALGORITHM> + BlockLoadPairs; + + // Parameterized BlockScan type + typedef BlockScan< + KeyValuePairT, + BLOCK_THREADS, + AgentSegmentFixupPolicyT::SCAN_ALGORITHM> + BlockScanT; + + // Callback type for obtaining tile prefix during block scan + typedef TilePrefixCallbackOp< + KeyValuePairT, + ReduceBySegmentOpT, + ScanTileStateT> + TilePrefixCallbackOpT; + + // Shared memory type for this thread block + union _TempStorage + { + struct + { + typename BlockScanT::TempStorage scan; // Smem needed for tile scanning + typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback + }; + + // Smem needed for loading keys + typename BlockLoadPairs::TempStorage load_pairs; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + WrappedPairsInputIteratorT d_pairs_in; ///< Input keys + AggregatesOutputIteratorT d_aggregates_out; ///< Output value aggregates + WrappedFixupInputIteratorT d_fixup_in; ///< Fixup input values + InequalityWrapper inequality_op; ///< KeyT inequality operator + ReductionOpT reduction_op; ///< Reduction operator + ReduceBySegmentOpT scan_op; ///< Reduce-by-segment scan operator + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + // Constructor + __device__ __forceinline__ + AgentSegmentFixup( + TempStorage& temp_storage, ///< Reference to temp_storage + PairsInputIteratorT d_pairs_in, ///< Input keys + AggregatesOutputIteratorT d_aggregates_out, ///< Output value aggregates + EqualityOpT equality_op, ///< KeyT equality operator + ReductionOpT reduction_op) ///< ValueT reduction operator + : + temp_storage(temp_storage.Alias()), + d_pairs_in(d_pairs_in), + d_aggregates_out(d_aggregates_out), + d_fixup_in(d_aggregates_out), + inequality_op(equality_op), + reduction_op(reduction_op), + scan_op(reduction_op) + {} + + + //--------------------------------------------------------------------- + // Cooperatively scan a device-wide sequence of tiles with other CTAs + //--------------------------------------------------------------------- + + + /** + * Process input tile. Specialized for atomic-fixup + */ + template + __device__ __forceinline__ void ConsumeTile( + OffsetT num_remaining, ///< Number of global input items remaining (including this tile) + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state, ///< Global tile state descriptor + Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) + { + KeyValuePairT pairs[ITEMS_PER_THREAD]; + + // Load pairs + KeyValuePairT oob_pair; + oob_pair.key = -1; + + if (IS_LAST_TILE) + BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); + else + BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); + + // RLE + #pragma unroll + for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + ValueT* d_scatter = d_aggregates_out + pairs[ITEM - 1].key; + if (pairs[ITEM].key != pairs[ITEM - 1].key) + atomicAdd(d_scatter, pairs[ITEM - 1].value); + else + pairs[ITEM].value = reduction_op(pairs[ITEM - 1].value, pairs[ITEM].value); + } + + // Flush last item if valid + ValueT* d_scatter = d_aggregates_out + pairs[ITEMS_PER_THREAD - 1].key; + if ((!IS_LAST_TILE) || (pairs[ITEMS_PER_THREAD - 1].key >= 0)) + atomicAdd(d_scatter, pairs[ITEMS_PER_THREAD - 1].value); + } + + + /** + * Process input tile. Specialized for reduce-by-key fixup + */ + template + __device__ __forceinline__ void ConsumeTile( + OffsetT num_remaining, ///< Number of global input items remaining (including this tile) + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state, ///< Global tile state descriptor + Int2Type use_atomic_fixup) ///< Marker whether to use atomicAdd (instead of reduce-by-key) + { + KeyValuePairT pairs[ITEMS_PER_THREAD]; + KeyValuePairT scatter_pairs[ITEMS_PER_THREAD]; + + // Load pairs + KeyValuePairT oob_pair; + oob_pair.key = -1; + + if (IS_LAST_TILE) + BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs, num_remaining, oob_pair); + else + BlockLoadPairs(temp_storage.load_pairs).Load(d_pairs_in + tile_offset, pairs); + + CTA_SYNC(); + + KeyValuePairT tile_aggregate; + if (tile_idx == 0) + { + // Exclusive scan of values and segment_flags + BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, tile_aggregate); + + // Update tile status if this is not the last tile + if (threadIdx.x == 0) + { + // Set first segment id to not trigger a flush (invalid from exclusive scan) + scatter_pairs[0].key = pairs[0].key; + + if (!IS_LAST_TILE) + tile_state.SetInclusive(0, tile_aggregate); + + } + } + else + { + // Exclusive scan of values and segment_flags + TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, scan_op, tile_idx); + BlockScanT(temp_storage.scan).ExclusiveScan(pairs, scatter_pairs, scan_op, prefix_op); + tile_aggregate = prefix_op.GetBlockAggregate(); + } + + // Scatter updated values + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (scatter_pairs[ITEM].key != pairs[ITEM].key) + { + // Update the value at the key location + ValueT value = d_fixup_in[scatter_pairs[ITEM].key]; + value = reduction_op(value, scatter_pairs[ITEM].value); + + d_aggregates_out[scatter_pairs[ITEM].key] = value; + } + } + + // Finalize the last item + if (IS_LAST_TILE) + { + // Last thread will output final count and last item, if necessary + if (threadIdx.x == BLOCK_THREADS - 1) + { + // If the last tile is a whole tile, the inclusive prefix contains accumulated value reduction for the last segment + if (num_remaining == TILE_ITEMS) + { + // Update the value at the key location + OffsetT last_key = pairs[ITEMS_PER_THREAD - 1].key; + d_aggregates_out[last_key] = reduction_op(tile_aggregate.value, d_fixup_in[last_key]); + } + } + } + } + + + /** + * Scan tiles of items as part of a dynamic chained scan + */ + __device__ __forceinline__ void ConsumeRange( + int num_items, ///< Total number of input items + int num_tiles, ///< Total number of input tiles + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + // Blocks are launched in increasing order, so just assign one tile per block + int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index + OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile + OffsetT num_remaining = num_items - tile_offset; // Remaining items (including this tile) + + if (num_remaining > TILE_ITEMS) + { + // Not the last tile (full) + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); + } + else if (num_remaining > 0) + { + // The last tile (possibly partially-full) + ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state, Int2Type()); + } + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_select_if.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_select_if.cuh new file mode 100644 index 0000000..52ca9fc --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_select_if.cuh @@ -0,0 +1,703 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide select. + */ + +#pragma once + +#include + +#include "single_pass_scan_operators.cuh" +#include "../block/block_load.cuh" +#include "../block/block_store.cuh" +#include "../block/block_scan.cuh" +#include "../block/block_exchange.cuh" +#include "../block/block_discontinuity.cuh" +#include "../grid/grid_queue.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentSelectIf + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentSelectIfPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + + +/** + * \brief AgentSelectIf implements a stateful abstraction of CUDA thread blocks for participating in device-wide selection + * + * Performs functor-based selection if SelectOpT functor type != NullType + * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType + * Otherwise performs discontinuity selection (keep unique) + */ +template < + typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicy tuning policy type + typename InputIteratorT, ///< Random-access input iterator type for selection items + typename FlagsInputIteratorT, ///< Random-access input iterator type for selections (NullType* if a selection functor or discontinuity flagging is to be used for selection) + typename SelectedOutputIteratorT, ///< Random-access input iterator type for selection_flags items + typename SelectOpT, ///< Selection operator type (NullType if selections or discontinuity flagging is to be used for selection) + typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selections is to be used for selection) + typename OffsetT, ///< Signed integer type for global offsets + bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output +struct AgentSelectIf +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // The flag value type + typedef typename std::iterator_traits::value_type FlagT; + + // Tile status descriptor interface type + typedef ScanTileState ScanTileStateT; + + // Constants + enum + { + USE_SELECT_OP, + USE_SELECT_FLAGS, + USE_DISCONTINUITY, + + BLOCK_THREADS = AgentSelectIfPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentSelectIfPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + TWO_PHASE_SCATTER = (ITEMS_PER_THREAD > 1), + + SELECT_METHOD = (!Equals::VALUE) ? + USE_SELECT_OP : + (!Equals::VALUE) ? + USE_SELECT_FLAGS : + USE_DISCONTINUITY + }; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for items + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + InputIteratorT>::Type // Directly use the supplied input iterator type + WrappedInputIteratorT; + + // Cache-modified Input iterator wrapper type (for applying cache modifier) for values + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedValuesInputIterator + FlagsInputIteratorT>::Type // Directly use the supplied input iterator type + WrappedFlagsInputIteratorT; + + // Parameterized BlockLoad type for input data + typedef BlockLoad< + OutputT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + AgentSelectIfPolicyT::LOAD_ALGORITHM> + BlockLoadT; + + // Parameterized BlockLoad type for flags + typedef BlockLoad< + FlagT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + AgentSelectIfPolicyT::LOAD_ALGORITHM> + BlockLoadFlags; + + // Parameterized BlockDiscontinuity type for items + typedef BlockDiscontinuity< + OutputT, + BLOCK_THREADS> + BlockDiscontinuityT; + + // Parameterized BlockScan type + typedef BlockScan< + OffsetT, + BLOCK_THREADS, + AgentSelectIfPolicyT::SCAN_ALGORITHM> + BlockScanT; + + // Callback type for obtaining tile prefix during block scan + typedef TilePrefixCallbackOp< + OffsetT, + cub::Sum, + ScanTileStateT> + TilePrefixCallbackOpT; + + // Item exchange type + typedef OutputT ItemExchangeT[TILE_ITEMS]; + + // Shared memory type for this thread block + union _TempStorage + { + struct + { + typename BlockScanT::TempStorage scan; // Smem needed for tile scanning + typename TilePrefixCallbackOpT::TempStorage prefix; // Smem needed for cooperative prefix callback + typename BlockDiscontinuityT::TempStorage discontinuity; // Smem needed for discontinuity detection + }; + + // Smem needed for loading items + typename BlockLoadT::TempStorage load_items; + + // Smem needed for loading values + typename BlockLoadFlags::TempStorage load_flags; + + // Smem needed for compacting items (allows non POD items in this union) + Uninitialized raw_exchange; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + _TempStorage& temp_storage; ///< Reference to temp_storage + WrappedInputIteratorT d_in; ///< Input items + SelectedOutputIteratorT d_selected_out; ///< Unique output items + WrappedFlagsInputIteratorT d_flags_in; ///< Input selection flags (if applicable) + InequalityWrapper inequality_op; ///< T inequality operator + SelectOpT select_op; ///< Selection operator + OffsetT num_items; ///< Total number of input items + + + //--------------------------------------------------------------------- + // Constructor + //--------------------------------------------------------------------- + + // Constructor + __device__ __forceinline__ + AgentSelectIf( + TempStorage &temp_storage, ///< Reference to temp_storage + InputIteratorT d_in, ///< Input data + FlagsInputIteratorT d_flags_in, ///< Input selection flags (if applicable) + SelectedOutputIteratorT d_selected_out, ///< Output data + SelectOpT select_op, ///< Selection operator + EqualityOpT equality_op, ///< Equality operator + OffsetT num_items) ///< Total number of input items + : + temp_storage(temp_storage.Alias()), + d_in(d_in), + d_flags_in(d_flags_in), + d_selected_out(d_selected_out), + select_op(select_op), + inequality_op(equality_op), + num_items(num_items) + {} + + + //--------------------------------------------------------------------- + // Utility methods for initializing the selections + //--------------------------------------------------------------------- + + /** + * Initialize selections (specialized for selection operator) + */ + template + __device__ __forceinline__ void InitializeSelections( + OffsetT /*tile_offset*/, + OffsetT num_tile_items, + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + Int2Type /*select_method*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + // Out-of-bounds items are selection_flags + selection_flags[ITEM] = 1; + + if (!IS_LAST_TILE || (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM < num_tile_items)) + selection_flags[ITEM] = select_op(items[ITEM]); + } + } + + + /** + * Initialize selections (specialized for valid flags) + */ + template + __device__ __forceinline__ void InitializeSelections( + OffsetT tile_offset, + OffsetT num_tile_items, + OutputT (&/*items*/)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + Int2Type /*select_method*/) + { + CTA_SYNC(); + + FlagT flags[ITEMS_PER_THREAD]; + + if (IS_LAST_TILE) + { + // Out-of-bounds items are selection_flags + BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags, num_tile_items, 1); + } + else + { + BlockLoadFlags(temp_storage.load_flags).Load(d_flags_in + tile_offset, flags); + } + + // Convert flag type to selection_flags type + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + selection_flags[ITEM] = flags[ITEM]; + } + } + + + /** + * Initialize selections (specialized for discontinuity detection) + */ + template + __device__ __forceinline__ void InitializeSelections( + OffsetT tile_offset, + OffsetT num_tile_items, + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + Int2Type /*select_method*/) + { + if (IS_FIRST_TILE) + { + CTA_SYNC(); + + // Set head selection_flags. First tile sets the first flag for the first item + BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op); + } + else + { + OutputT tile_predecessor; + if (threadIdx.x == 0) + tile_predecessor = d_in[tile_offset - 1]; + + CTA_SYNC(); + + BlockDiscontinuityT(temp_storage.discontinuity).FlagHeads(selection_flags, items, inequality_op, tile_predecessor); + } + + // Set selection flags for out-of-bounds items + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + // Set selection_flags for out-of-bounds items + if ((IS_LAST_TILE) && (OffsetT(threadIdx.x * ITEMS_PER_THREAD) + ITEM >= num_tile_items)) + selection_flags[ITEM] = 1; + } + } + + + //--------------------------------------------------------------------- + // Scatter utility methods + //--------------------------------------------------------------------- + + /** + * Scatter flagged items to output offsets (specialized for direct scattering) + */ + template + __device__ __forceinline__ void ScatterDirect( + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + OffsetT (&selection_indices)[ITEMS_PER_THREAD], + OffsetT num_selections) + { + // Scatter flagged items + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (selection_flags[ITEM]) + { + if ((!IS_LAST_TILE) || selection_indices[ITEM] < num_selections) + { + d_selected_out[selection_indices[ITEM]] = items[ITEM]; + } + } + } + } + + + /** + * Scatter flagged items to output offsets (specialized for two-phase scattering) + */ + template + __device__ __forceinline__ void ScatterTwoPhase( + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + OffsetT (&selection_indices)[ITEMS_PER_THREAD], + int /*num_tile_items*/, ///< Number of valid items in this tile + int num_tile_selections, ///< Number of selections in this tile + OffsetT num_selections_prefix, ///< Total number of selections prior to this tile + OffsetT /*num_rejected_prefix*/, ///< Total number of rejections prior to this tile + Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition + { + CTA_SYNC(); + + // Compact and scatter items + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int local_scatter_offset = selection_indices[ITEM] - num_selections_prefix; + if (selection_flags[ITEM]) + { + temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; + } + } + + CTA_SYNC(); + + for (int item = threadIdx.x; item < num_tile_selections; item += BLOCK_THREADS) + { + d_selected_out[num_selections_prefix + item] = temp_storage.raw_exchange.Alias()[item]; + } + } + + + /** + * Scatter flagged items to output offsets (specialized for two-phase scattering) + */ + template + __device__ __forceinline__ void ScatterTwoPhase( + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + OffsetT (&selection_indices)[ITEMS_PER_THREAD], + int num_tile_items, ///< Number of valid items in this tile + int num_tile_selections, ///< Number of selections in this tile + OffsetT num_selections_prefix, ///< Total number of selections prior to this tile + OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile + Int2Type /*is_keep_rejects*/) ///< Marker type indicating whether to keep rejected items in the second partition + { + CTA_SYNC(); + + int tile_num_rejections = num_tile_items - num_tile_selections; + + // Scatter items to shared memory (rejections first) + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int item_idx = (threadIdx.x * ITEMS_PER_THREAD) + ITEM; + int local_selection_idx = selection_indices[ITEM] - num_selections_prefix; + int local_rejection_idx = item_idx - local_selection_idx; + int local_scatter_offset = (selection_flags[ITEM]) ? + tile_num_rejections + local_selection_idx : + local_rejection_idx; + + temp_storage.raw_exchange.Alias()[local_scatter_offset] = items[ITEM]; + } + + CTA_SYNC(); + + // Gather items from shared memory and scatter to global + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int item_idx = (ITEM * BLOCK_THREADS) + threadIdx.x; + int rejection_idx = item_idx; + int selection_idx = item_idx - tile_num_rejections; + OffsetT scatter_offset = (item_idx < tile_num_rejections) ? + num_items - num_rejected_prefix - rejection_idx - 1 : + num_selections_prefix + selection_idx; + + OutputT item = temp_storage.raw_exchange.Alias()[item_idx]; + + if (!IS_LAST_TILE || (item_idx < num_tile_items)) + { + d_selected_out[scatter_offset] = item; + } + } + } + + + /** + * Scatter flagged items + */ + template + __device__ __forceinline__ void Scatter( + OutputT (&items)[ITEMS_PER_THREAD], + OffsetT (&selection_flags)[ITEMS_PER_THREAD], + OffsetT (&selection_indices)[ITEMS_PER_THREAD], + int num_tile_items, ///< Number of valid items in this tile + int num_tile_selections, ///< Number of selections in this tile + OffsetT num_selections_prefix, ///< Total number of selections prior to this tile + OffsetT num_rejected_prefix, ///< Total number of rejections prior to this tile + OffsetT num_selections) ///< Total number of selections including this tile + { + // Do a two-phase scatter if (a) keeping both partitions or (b) two-phase is enabled and the average number of selection_flags items per thread is greater than one + if (KEEP_REJECTS || (TWO_PHASE_SCATTER && (num_tile_selections > BLOCK_THREADS))) + { + ScatterTwoPhase( + items, + selection_flags, + selection_indices, + num_tile_items, + num_tile_selections, + num_selections_prefix, + num_rejected_prefix, + Int2Type()); + } + else + { + ScatterDirect( + items, + selection_flags, + selection_indices, + num_selections); + } + } + + //--------------------------------------------------------------------- + // Cooperatively scan a device-wide sequence of tiles with other CTAs + //--------------------------------------------------------------------- + + + /** + * Process first tile of input (dynamic chained scan). Returns the running count of selections (including this tile) + */ + template + __device__ __forceinline__ OffsetT ConsumeFirstTile( + int num_tile_items, ///< Number of input items comprising this tile + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + OutputT items[ITEMS_PER_THREAD]; + OffsetT selection_flags[ITEMS_PER_THREAD]; + OffsetT selection_indices[ITEMS_PER_THREAD]; + + // Load items + if (IS_LAST_TILE) + BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); + else + BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); + + // Initialize selection_flags + InitializeSelections( + tile_offset, + num_tile_items, + items, + selection_flags, + Int2Type()); + + CTA_SYNC(); + + // Exclusive scan of selection_flags + OffsetT num_tile_selections; + BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, num_tile_selections); + + if (threadIdx.x == 0) + { + // Update tile status if this is not the last tile + if (!IS_LAST_TILE) + tile_state.SetInclusive(0, num_tile_selections); + } + + // Discount any out-of-bounds selections + if (IS_LAST_TILE) + num_tile_selections -= (TILE_ITEMS - num_tile_items); + + // Scatter flagged items + Scatter( + items, + selection_flags, + selection_indices, + num_tile_items, + num_tile_selections, + 0, + 0, + num_tile_selections); + + return num_tile_selections; + } + + + /** + * Process subsequent tile of input (dynamic chained scan). Returns the running count of selections (including this tile) + */ + template + __device__ __forceinline__ OffsetT ConsumeSubsequentTile( + int num_tile_items, ///< Number of input items comprising this tile + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + OutputT items[ITEMS_PER_THREAD]; + OffsetT selection_flags[ITEMS_PER_THREAD]; + OffsetT selection_indices[ITEMS_PER_THREAD]; + + // Load items + if (IS_LAST_TILE) + BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items, num_tile_items); + else + BlockLoadT(temp_storage.load_items).Load(d_in + tile_offset, items); + + // Initialize selection_flags + InitializeSelections( + tile_offset, + num_tile_items, + items, + selection_flags, + Int2Type()); + + CTA_SYNC(); + + // Exclusive scan of values and selection_flags + TilePrefixCallbackOpT prefix_op(tile_state, temp_storage.prefix, cub::Sum(), tile_idx); + BlockScanT(temp_storage.scan).ExclusiveSum(selection_flags, selection_indices, prefix_op); + + OffsetT num_tile_selections = prefix_op.GetBlockAggregate(); + OffsetT num_selections = prefix_op.GetInclusivePrefix(); + OffsetT num_selections_prefix = prefix_op.GetExclusivePrefix(); + OffsetT num_rejected_prefix = (tile_idx * TILE_ITEMS) - num_selections_prefix; + + // Discount any out-of-bounds selections + if (IS_LAST_TILE) + { + int num_discount = TILE_ITEMS - num_tile_items; + num_selections -= num_discount; + num_tile_selections -= num_discount; + } + + // Scatter flagged items + Scatter( + items, + selection_flags, + selection_indices, + num_tile_items, + num_tile_selections, + num_selections_prefix, + num_rejected_prefix, + num_selections); + + return num_selections; + } + + + /** + * Process a tile of input + */ + template + __device__ __forceinline__ OffsetT ConsumeTile( + int num_tile_items, ///< Number of input items comprising this tile + int tile_idx, ///< Tile index + OffsetT tile_offset, ///< Tile offset + ScanTileStateT& tile_state) ///< Global tile state descriptor + { + OffsetT num_selections; + if (tile_idx == 0) + { + num_selections = ConsumeFirstTile(num_tile_items, tile_offset, tile_state); + } + else + { + num_selections = ConsumeSubsequentTile(num_tile_items, tile_idx, tile_offset, tile_state); + } + + return num_selections; + } + + + /** + * Scan tiles of items as part of a dynamic chained scan + */ + template ///< Output iterator type for recording number of items selection_flags + __device__ __forceinline__ void ConsumeRange( + int num_tiles, ///< Total number of input tiles + ScanTileStateT& tile_state, ///< Global tile state descriptor + NumSelectedIteratorT d_num_selected_out) ///< Output total number selection_flags + { + // Blocks are launched in increasing order, so just assign one tile per block + int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index + OffsetT tile_offset = tile_idx * TILE_ITEMS; // Global offset for the current tile + + if (tile_idx < num_tiles - 1) + { + // Not the last tile (full) + ConsumeTile(TILE_ITEMS, tile_idx, tile_offset, tile_state); + } + else + { + // The last tile (possibly partially-full) + OffsetT num_remaining = num_items - tile_offset; + OffsetT num_selections = ConsumeTile(num_remaining, tile_idx, tile_offset, tile_state); + + if (threadIdx.x == 0) + { + // Output the total number of items selection_flags + *d_num_selected_out = num_selections; + } + } + } + +}; + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_spmv_orig.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_spmv_orig.cuh new file mode 100644 index 0000000..54e2a13 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/agent_spmv_orig.cuh @@ -0,0 +1,670 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. + */ + +#pragma once + +#include + +#include "../util_type.cuh" +#include "../block/block_reduce.cuh" +#include "../block/block_scan.cuh" +#include "../block/block_exchange.cuh" +#include "../thread/thread_search.cuh" +#include "../thread/thread_operators.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../iterator/counting_input_iterator.cuh" +#include "../iterator/tex_ref_input_iterator.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Tuning policy + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for AgentSpmv + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + CacheLoadModifier _ROW_OFFSETS_SEARCH_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets during search + CacheLoadModifier _ROW_OFFSETS_LOAD_MODIFIER, ///< Cache load modifier for reading CSR row-offsets + CacheLoadModifier _COLUMN_INDICES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR column-indices + CacheLoadModifier _VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading CSR values + CacheLoadModifier _VECTOR_VALUES_LOAD_MODIFIER, ///< Cache load modifier for reading vector values + bool _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (vs. pre-staged through shared memory) + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct AgentSpmvPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + DIRECT_LOAD_NONZEROS = _DIRECT_LOAD_NONZEROS, ///< Whether to load nonzeros directly from global during sequential merging (pre-staged through shared memory) + }; + + static const CacheLoadModifier ROW_OFFSETS_SEARCH_LOAD_MODIFIER = _ROW_OFFSETS_SEARCH_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets + static const CacheLoadModifier ROW_OFFSETS_LOAD_MODIFIER = _ROW_OFFSETS_LOAD_MODIFIER; ///< Cache load modifier for reading CSR row-offsets + static const CacheLoadModifier COLUMN_INDICES_LOAD_MODIFIER = _COLUMN_INDICES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR column-indices + static const CacheLoadModifier VALUES_LOAD_MODIFIER = _VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading CSR values + static const CacheLoadModifier VECTOR_VALUES_LOAD_MODIFIER = _VECTOR_VALUES_LOAD_MODIFIER; ///< Cache load modifier for reading vector values + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use + +}; + + +/****************************************************************************** + * Thread block abstractions + ******************************************************************************/ + +template < + typename ValueT, ///< Matrix and vector value type + typename OffsetT> ///< Signed integer type for sequence offsets +struct SpmvParams +{ + ValueT* d_values; ///< Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. + OffsetT* d_row_end_offsets; ///< Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values + OffsetT* d_column_indices; ///< Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) + ValueT* d_vector_x; ///< Pointer to the array of \p num_cols values corresponding to the dense input vector x + ValueT* d_vector_y; ///< Pointer to the array of \p num_rows values corresponding to the dense output vector y + int num_rows; ///< Number of rows of matrix A. + int num_cols; ///< Number of columns of matrix A. + int num_nonzeros; ///< Number of nonzero elements of matrix A. + ValueT alpha; ///< Alpha multiplicand + ValueT beta; ///< Beta addend-multiplicand + + TexRefInputIterator t_vector_x; +}; + + +/** + * \brief AgentSpmv implements a stateful abstraction of CUDA thread blocks for participating in device-wide SpMV. + */ +template < + typename AgentSpmvPolicyT, ///< Parameterized AgentSpmvPolicy tuning policy type + typename ValueT, ///< Matrix and vector value type + typename OffsetT, ///< Signed integer type for sequence offsets + bool HAS_ALPHA, ///< Whether the input parameter \p alpha is 1 + bool HAS_BETA, ///< Whether the input parameter \p beta is 0 + int PTX_ARCH = CUB_PTX_ARCH> ///< PTX compute capability +struct AgentSpmv +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// Constants + enum + { + BLOCK_THREADS = AgentSpmvPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = AgentSpmvPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + /// 2D merge path coordinate type + typedef typename CubVector::Type CoordinateT; + + /// Input iterator wrapper types (for applying cache modifiers) + + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, + OffsetT, + OffsetT> + RowOffsetsSearchIteratorT; + + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::ROW_OFFSETS_LOAD_MODIFIER, + OffsetT, + OffsetT> + RowOffsetsIteratorT; + + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::COLUMN_INDICES_LOAD_MODIFIER, + OffsetT, + OffsetT> + ColumnIndicesIteratorT; + + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::VALUES_LOAD_MODIFIER, + ValueT, + OffsetT> + ValueIteratorT; + + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, + ValueT, + OffsetT> + VectorValueIteratorT; + + // Tuple type for scanning (pairs accumulated segment-value with segment-index) + typedef KeyValuePair KeyValuePairT; + + // Reduce-value-by-segment scan operator + typedef ReduceByKeyOp ReduceBySegmentOpT; + + // BlockReduce specialization + typedef BlockReduce< + ValueT, + BLOCK_THREADS, + BLOCK_REDUCE_WARP_REDUCTIONS> + BlockReduceT; + + // BlockScan specialization + typedef BlockScan< + KeyValuePairT, + BLOCK_THREADS, + AgentSpmvPolicyT::SCAN_ALGORITHM> + BlockScanT; + + // BlockScan specialization + typedef BlockScan< + ValueT, + BLOCK_THREADS, + AgentSpmvPolicyT::SCAN_ALGORITHM> + BlockPrefixSumT; + + // BlockExchange specialization + typedef BlockExchange< + ValueT, + BLOCK_THREADS, + ITEMS_PER_THREAD> + BlockExchangeT; + + /// Merge item type (either a non-zero value or a row-end offset) + union MergeItem + { + // Value type to pair with index type OffsetT (NullType if loading values directly during merge) + typedef typename If::Type MergeValueT; + + OffsetT row_end_offset; + MergeValueT nonzero; + }; + + /// Shared memory type required by this thread block + struct _TempStorage + { + CoordinateT tile_coords[2]; + + union Aliasable + { + // Smem needed for tile of merge items + MergeItem merge_items[ITEMS_PER_THREAD + TILE_ITEMS + 1]; + + // Smem needed for block exchange + typename BlockExchangeT::TempStorage exchange; + + // Smem needed for block-wide reduction + typename BlockReduceT::TempStorage reduce; + + // Smem needed for tile scanning + typename BlockScanT::TempStorage scan; + + // Smem needed for tile prefix sum + typename BlockPrefixSumT::TempStorage prefix_sum; + + } aliasable; + }; + + /// Temporary storage type (unionable) + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + + _TempStorage& temp_storage; /// Reference to temp_storage + + SpmvParams& spmv_params; + + ValueIteratorT wd_values; ///< Wrapped pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. + RowOffsetsIteratorT wd_row_end_offsets; ///< Wrapped Pointer to the array of \p m offsets demarcating the end of every row in \p d_column_indices and \p d_values + ColumnIndicesIteratorT wd_column_indices; ///< Wrapped Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) + VectorValueIteratorT wd_vector_x; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x + VectorValueIteratorT wd_vector_y; ///< Wrapped Pointer to the array of \p num_cols values corresponding to the dense input vector x + + + //--------------------------------------------------------------------- + // Interface + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ AgentSpmv( + TempStorage& temp_storage, ///< Reference to temp_storage + SpmvParams& spmv_params) ///< SpMV input parameter bundle + : + temp_storage(temp_storage.Alias()), + spmv_params(spmv_params), + wd_values(spmv_params.d_values), + wd_row_end_offsets(spmv_params.d_row_end_offsets), + wd_column_indices(spmv_params.d_column_indices), + wd_vector_x(spmv_params.d_vector_x), + wd_vector_y(spmv_params.d_vector_y) + {} + + + + + /** + * Consume a merge tile, specialized for direct-load of nonzeros + */ + __device__ __forceinline__ KeyValuePairT ConsumeTile( + int tile_idx, + CoordinateT tile_start_coord, + CoordinateT tile_end_coord, + Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch + { + int tile_num_rows = tile_end_coord.x - tile_start_coord.x; + int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; + OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; + + // Gather the row end-offsets for the merge tile into shared memory + for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) + { + s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; + } + + CTA_SYNC(); + + // Search for the thread's starting coordinate within the merge tile + CountingInputIterator tile_nonzero_indices(tile_start_coord.y); + CoordinateT thread_start_coord; + + MergePathSearch( + OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal + s_tile_row_end_offsets, // List A + tile_nonzero_indices, // List B + tile_num_rows, + tile_num_nonzeros, + thread_start_coord); + + CTA_SYNC(); // Perf-sync + + // Compute the thread's merge path segment + CoordinateT thread_current_coord = thread_start_coord; + KeyValuePairT scan_segment[ITEMS_PER_THREAD]; + + ValueT running_total = 0.0; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + OffsetT nonzero_idx = CUB_MIN(tile_nonzero_indices[thread_current_coord.y], spmv_params.num_nonzeros - 1); + OffsetT column_idx = wd_column_indices[nonzero_idx]; + ValueT value = wd_values[nonzero_idx]; + + ValueT vector_value = spmv_params.t_vector_x[column_idx]; +#if (CUB_PTX_ARCH >= 350) + vector_value = wd_vector_x[column_idx]; +#endif + ValueT nonzero = value * vector_value; + + OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; + + if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) + { + // Move down (accumulate) + running_total += nonzero; + scan_segment[ITEM].value = running_total; + scan_segment[ITEM].key = tile_num_rows; + ++thread_current_coord.y; + } + else + { + // Move right (reset) + scan_segment[ITEM].value = running_total; + scan_segment[ITEM].key = thread_current_coord.x; + running_total = 0.0; + ++thread_current_coord.x; + } + } + + CTA_SYNC(); + + // Block-wide reduce-value-by-segment + KeyValuePairT tile_carry; + ReduceBySegmentOpT scan_op; + KeyValuePairT scan_item; + + scan_item.value = running_total; + scan_item.key = thread_current_coord.x; + + BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); + + if (tile_num_rows > 0) + { + if (threadIdx.x == 0) + scan_item.key = -1; + + // Direct scatter + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (scan_segment[ITEM].key < tile_num_rows) + { + if (scan_item.key == scan_segment[ITEM].key) + scan_segment[ITEM].value = scan_item.value + scan_segment[ITEM].value; + + if (HAS_ALPHA) + { + scan_segment[ITEM].value *= spmv_params.alpha; + } + + if (HAS_BETA) + { + // Update the output vector element + ValueT addend = spmv_params.beta * wd_vector_y[tile_start_coord.x + scan_segment[ITEM].key]; + scan_segment[ITEM].value += addend; + } + + // Set the output vector element + spmv_params.d_vector_y[tile_start_coord.x + scan_segment[ITEM].key] = scan_segment[ITEM].value; + } + } + } + + // Return the tile's running carry-out + return tile_carry; + } + + + + /** + * Consume a merge tile, specialized for indirect load of nonzeros + */ + __device__ __forceinline__ KeyValuePairT ConsumeTile( + int tile_idx, + CoordinateT tile_start_coord, + CoordinateT tile_end_coord, + Int2Type is_direct_load) ///< Marker type indicating whether to load nonzeros directly during path-discovery or beforehand in batch + { + int tile_num_rows = tile_end_coord.x - tile_start_coord.x; + int tile_num_nonzeros = tile_end_coord.y - tile_start_coord.y; + +#if (CUB_PTX_ARCH >= 520) + + OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; + ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; + + // Gather the nonzeros for the merge tile into shared memory + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); + + ValueIteratorT a = wd_values + tile_start_coord.y + nonzero_idx; + ColumnIndicesIteratorT ci = wd_column_indices + tile_start_coord.y + nonzero_idx; + ValueT* s = s_tile_nonzeros + nonzero_idx; + + if (nonzero_idx < tile_num_nonzeros) + { + + OffsetT column_idx = *ci; + ValueT value = *a; + + ValueT vector_value = spmv_params.t_vector_x[column_idx]; + vector_value = wd_vector_x[column_idx]; + + ValueT nonzero = value * vector_value; + + *s = nonzero; + } + } + + +#else + + OffsetT* s_tile_row_end_offsets = &temp_storage.aliasable.merge_items[0].row_end_offset; + ValueT* s_tile_nonzeros = &temp_storage.aliasable.merge_items[tile_num_rows + ITEMS_PER_THREAD].nonzero; + + // Gather the nonzeros for the merge tile into shared memory + if (tile_num_nonzeros > 0) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int nonzero_idx = threadIdx.x + (ITEM * BLOCK_THREADS); + nonzero_idx = CUB_MIN(nonzero_idx, tile_num_nonzeros - 1); + + OffsetT column_idx = wd_column_indices[tile_start_coord.y + nonzero_idx]; + ValueT value = wd_values[tile_start_coord.y + nonzero_idx]; + + ValueT vector_value = spmv_params.t_vector_x[column_idx]; +#if (CUB_PTX_ARCH >= 350) + vector_value = wd_vector_x[column_idx]; +#endif + ValueT nonzero = value * vector_value; + + s_tile_nonzeros[nonzero_idx] = nonzero; + } + } + +#endif + + // Gather the row end-offsets for the merge tile into shared memory + #pragma unroll 1 + for (int item = threadIdx.x; item <= tile_num_rows; item += BLOCK_THREADS) + { + s_tile_row_end_offsets[item] = wd_row_end_offsets[tile_start_coord.x + item]; + } + + CTA_SYNC(); + + // Search for the thread's starting coordinate within the merge tile + CountingInputIterator tile_nonzero_indices(tile_start_coord.y); + CoordinateT thread_start_coord; + + MergePathSearch( + OffsetT(threadIdx.x * ITEMS_PER_THREAD), // Diagonal + s_tile_row_end_offsets, // List A + tile_nonzero_indices, // List B + tile_num_rows, + tile_num_nonzeros, + thread_start_coord); + + CTA_SYNC(); // Perf-sync + + // Compute the thread's merge path segment + CoordinateT thread_current_coord = thread_start_coord; + KeyValuePairT scan_segment[ITEMS_PER_THREAD]; + ValueT running_total = 0.0; + + OffsetT row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; + ValueT nonzero = s_tile_nonzeros[thread_current_coord.y]; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (tile_nonzero_indices[thread_current_coord.y] < row_end_offset) + { + // Move down (accumulate) + scan_segment[ITEM].value = nonzero; + running_total += nonzero; + ++thread_current_coord.y; + nonzero = s_tile_nonzeros[thread_current_coord.y]; + } + else + { + // Move right (reset) + scan_segment[ITEM].value = 0.0; + running_total = 0.0; + ++thread_current_coord.x; + row_end_offset = s_tile_row_end_offsets[thread_current_coord.x]; + } + + scan_segment[ITEM].key = thread_current_coord.x; + } + + CTA_SYNC(); + + // Block-wide reduce-value-by-segment + KeyValuePairT tile_carry; + ReduceBySegmentOpT scan_op; + KeyValuePairT scan_item; + + scan_item.value = running_total; + scan_item.key = thread_current_coord.x; + + BlockScanT(temp_storage.aliasable.scan).ExclusiveScan(scan_item, scan_item, scan_op, tile_carry); + + if (threadIdx.x == 0) + { + scan_item.key = thread_start_coord.x; + scan_item.value = 0.0; + } + + if (tile_num_rows > 0) + { + + CTA_SYNC(); + + // Scan downsweep and scatter + ValueT* s_partials = &temp_storage.aliasable.merge_items[0].nonzero; + + if (scan_item.key != scan_segment[0].key) + { + s_partials[scan_item.key] = scan_item.value; + } + else + { + scan_segment[0].value += scan_item.value; + } + + #pragma unroll + for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (scan_segment[ITEM - 1].key != scan_segment[ITEM].key) + { + s_partials[scan_segment[ITEM - 1].key] = scan_segment[ITEM - 1].value; + } + else + { + scan_segment[ITEM].value += scan_segment[ITEM - 1].value; + } + } + + CTA_SYNC(); + + #pragma unroll 1 + for (int item = threadIdx.x; item < tile_num_rows; item += BLOCK_THREADS) + { + spmv_params.d_vector_y[tile_start_coord.x + item] = s_partials[item]; + } + } + + // Return the tile's running carry-out + return tile_carry; + } + + + /** + * Consume input tile + */ + __device__ __forceinline__ void ConsumeTile( + CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates + KeyValuePairT* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block + int num_merge_tiles) ///< [in] Number of merge tiles + { + int tile_idx = (blockIdx.x * gridDim.y) + blockIdx.y; // Current tile index + + if (tile_idx >= num_merge_tiles) + return; + + // Read our starting coordinates + if (threadIdx.x < 2) + { + if (d_tile_coordinates == NULL) + { + // Search our starting coordinates + OffsetT diagonal = (tile_idx + threadIdx.x) * TILE_ITEMS; + CoordinateT tile_coord; + CountingInputIterator nonzero_indices(0); + + // Search the merge path + MergePathSearch( + diagonal, + RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), + nonzero_indices, + spmv_params.num_rows, + spmv_params.num_nonzeros, + tile_coord); + + temp_storage.tile_coords[threadIdx.x] = tile_coord; + } + else + { + temp_storage.tile_coords[threadIdx.x] = d_tile_coordinates[tile_idx + threadIdx.x]; + } + } + + CTA_SYNC(); + + CoordinateT tile_start_coord = temp_storage.tile_coords[0]; + CoordinateT tile_end_coord = temp_storage.tile_coords[1]; + + // Consume multi-segment tile + KeyValuePairT tile_carry = ConsumeTile( + tile_idx, + tile_start_coord, + tile_end_coord, + Int2Type()); + + // Output the tile's carry-out + if (threadIdx.x == 0) + { + if (HAS_ALPHA) + tile_carry.value *= spmv_params.alpha; + + tile_carry.key += tile_start_coord.x; + d_tile_carry_pairs[tile_idx] = tile_carry; + } + } + + +}; + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/single_pass_scan_operators.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/single_pass_scan_operators.cuh new file mode 100644 index 0000000..53409bd --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/agent/single_pass_scan_operators.cuh @@ -0,0 +1,815 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Callback operator types for supplying BlockScan prefixes + */ + +#pragma once + +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../warp/warp_reduce.cuh" +#include "../util_arch.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Prefix functor type for maintaining a running prefix while scanning a + * region independent of other thread blocks + ******************************************************************************/ + +/** + * Stateful callback operator type for supplying BlockScan prefixes. + * Maintains a running prefix that can be applied to consecutive + * BlockScan operations. + */ +template < + typename T, ///< BlockScan value type + typename ScanOpT> ///< Wrapped scan operator type +struct BlockScanRunningPrefixOp +{ + ScanOpT op; ///< Wrapped scan operator + T running_total; ///< Running block-wide prefix + + /// Constructor + __device__ __forceinline__ BlockScanRunningPrefixOp(ScanOpT op) + : + op(op) + {} + + /// Constructor + __device__ __forceinline__ BlockScanRunningPrefixOp( + T starting_prefix, + ScanOpT op) + : + op(op), + running_total(starting_prefix) + {} + + /** + * Prefix callback operator. Returns the block-wide running_total in thread-0. + */ + __device__ __forceinline__ T operator()( + const T &block_aggregate) ///< The aggregate sum of the BlockScan inputs + { + T retval = running_total; + running_total = op(running_total, block_aggregate); + return retval; + } +}; + + +/****************************************************************************** + * Generic tile status interface types for block-cooperative scans + ******************************************************************************/ + +/** + * Enumerations of tile status + */ +enum ScanTileStatus +{ + SCAN_TILE_OOB, // Out-of-bounds (e.g., padding) + SCAN_TILE_INVALID = 99, // Not yet processed + SCAN_TILE_PARTIAL, // Tile aggregate is available + SCAN_TILE_INCLUSIVE, // Inclusive tile prefix is available +}; + + +/** + * Tile status interface. + */ +template < + typename T, + bool SINGLE_WORD = Traits::PRIMITIVE> +struct ScanTileState; + + +/** + * Tile status interface specialized for scan status and value types + * that can be combined into one machine word that can be + * read/written coherently in a single access. + */ +template +struct ScanTileState +{ + // Status word type + typedef typename If<(sizeof(T) == 8), + long long, + typename If<(sizeof(T) == 4), + int, + typename If<(sizeof(T) == 2), + short, + char>::Type>::Type>::Type StatusWord; + + + // Unit word type + typedef typename If<(sizeof(T) == 8), + longlong2, + typename If<(sizeof(T) == 4), + int2, + typename If<(sizeof(T) == 2), + int, + uchar2>::Type>::Type>::Type TxnWord; + + + // Device word type + struct TileDescriptor + { + StatusWord status; + T value; + }; + + + // Constants + enum + { + TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, + }; + + + // Device storage + TxnWord *d_tile_descriptors; + + /// Constructor + __host__ __device__ __forceinline__ + ScanTileState() + : + d_tile_descriptors(NULL) + {} + + + /// Initializer + __host__ __device__ __forceinline__ + cudaError_t Init( + int /*num_tiles*/, ///< [in] Number of tiles + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation + { + d_tile_descriptors = reinterpret_cast(d_temp_storage); + return cudaSuccess; + } + + + /** + * Compute device memory needed for tile status + */ + __host__ __device__ __forceinline__ + static cudaError_t AllocationSize( + int num_tiles, ///< [in] Number of tiles + size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation + { + temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors + return cudaSuccess; + } + + + /** + * Initialize (from device) + */ + __device__ __forceinline__ void InitializeStatus(int num_tiles) + { + int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; + + TxnWord val = TxnWord(); + TileDescriptor *descriptor = reinterpret_cast(&val); + + if (tile_idx < num_tiles) + { + // Not-yet-set + descriptor->status = StatusWord(SCAN_TILE_INVALID); + d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; + } + + if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) + { + // Padding + descriptor->status = StatusWord(SCAN_TILE_OOB); + d_tile_descriptors[threadIdx.x] = val; + } + } + + + /** + * Update the specified tile's inclusive value and corresponding status + */ + __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) + { + TileDescriptor tile_descriptor; + tile_descriptor.status = SCAN_TILE_INCLUSIVE; + tile_descriptor.value = tile_inclusive; + + TxnWord alias; + *reinterpret_cast(&alias) = tile_descriptor; + ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); + } + + + /** + * Update the specified tile's partial value and corresponding status + */ + __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) + { + TileDescriptor tile_descriptor; + tile_descriptor.status = SCAN_TILE_PARTIAL; + tile_descriptor.value = tile_partial; + + TxnWord alias; + *reinterpret_cast(&alias) = tile_descriptor; + ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); + } + + /** + * Wait for the corresponding tile to become non-invalid + */ + __device__ __forceinline__ void WaitForValid( + int tile_idx, + StatusWord &status, + T &value) + { + TileDescriptor tile_descriptor; + do + { + __threadfence_block(); // prevent hoisting loads from loop + TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); + tile_descriptor = reinterpret_cast(alias); + + } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); + + status = tile_descriptor.status; + value = tile_descriptor.value; + } + +}; + + + +/** + * Tile status interface specialized for scan status and value types that + * cannot be combined into one machine word. + */ +template +struct ScanTileState +{ + // Status word type + typedef char StatusWord; + + // Constants + enum + { + TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, + }; + + // Device storage + StatusWord *d_tile_status; + T *d_tile_partial; + T *d_tile_inclusive; + + /// Constructor + __host__ __device__ __forceinline__ + ScanTileState() + : + d_tile_status(NULL), + d_tile_partial(NULL), + d_tile_inclusive(NULL) + {} + + + /// Initializer + __host__ __device__ __forceinline__ + cudaError_t Init( + int num_tiles, ///< [in] Number of tiles + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t temp_storage_bytes) ///< [in] Size in bytes of \t d_temp_storage allocation + { + cudaError_t error = cudaSuccess; + do + { + void* allocations[3]; + size_t allocation_sizes[3]; + + allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors + allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials + allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives + + // Compute allocation pointers into the single storage blob + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + + // Alias the offsets + d_tile_status = reinterpret_cast(allocations[0]); + d_tile_partial = reinterpret_cast(allocations[1]); + d_tile_inclusive = reinterpret_cast(allocations[2]); + } + while (0); + + return error; + } + + + /** + * Compute device memory needed for tile status + */ + __host__ __device__ __forceinline__ + static cudaError_t AllocationSize( + int num_tiles, ///< [in] Number of tiles + size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation + { + // Specify storage allocation requirements + size_t allocation_sizes[3]; + allocation_sizes[0] = (num_tiles + TILE_STATUS_PADDING) * sizeof(StatusWord); // bytes needed for tile status descriptors + allocation_sizes[1] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for partials + allocation_sizes[2] = (num_tiles + TILE_STATUS_PADDING) * sizeof(Uninitialized); // bytes needed for inclusives + + // Set the necessary size of the blob + void* allocations[3]; + return CubDebug(AliasTemporaries(NULL, temp_storage_bytes, allocations, allocation_sizes)); + } + + + /** + * Initialize (from device) + */ + __device__ __forceinline__ void InitializeStatus(int num_tiles) + { + int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; + if (tile_idx < num_tiles) + { + // Not-yet-set + d_tile_status[TILE_STATUS_PADDING + tile_idx] = StatusWord(SCAN_TILE_INVALID); + } + + if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) + { + // Padding + d_tile_status[threadIdx.x] = StatusWord(SCAN_TILE_OOB); + } + } + + + /** + * Update the specified tile's inclusive value and corresponding status + */ + __device__ __forceinline__ void SetInclusive(int tile_idx, T tile_inclusive) + { + // Update tile inclusive value + ThreadStore(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx, tile_inclusive); + + // Fence + __threadfence(); + + // Update tile status + ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_INCLUSIVE)); + } + + + /** + * Update the specified tile's partial value and corresponding status + */ + __device__ __forceinline__ void SetPartial(int tile_idx, T tile_partial) + { + // Update tile partial value + ThreadStore(d_tile_partial + TILE_STATUS_PADDING + tile_idx, tile_partial); + + // Fence + __threadfence(); + + // Update tile status + ThreadStore(d_tile_status + TILE_STATUS_PADDING + tile_idx, StatusWord(SCAN_TILE_PARTIAL)); + } + + /** + * Wait for the corresponding tile to become non-invalid + */ + __device__ __forceinline__ void WaitForValid( + int tile_idx, + StatusWord &status, + T &value) + { + do { + status = ThreadLoad(d_tile_status + TILE_STATUS_PADDING + tile_idx); + + __threadfence(); // prevent hoisting loads from loop or loads below above this one + + } while (status == SCAN_TILE_INVALID); + + if (status == StatusWord(SCAN_TILE_PARTIAL)) + value = ThreadLoad(d_tile_partial + TILE_STATUS_PADDING + tile_idx); + else + value = ThreadLoad(d_tile_inclusive + TILE_STATUS_PADDING + tile_idx); + } +}; + + +/****************************************************************************** + * ReduceByKey tile status interface types for block-cooperative scans + ******************************************************************************/ + +/** + * Tile status interface for reduction by key. + * + */ +template < + typename ValueT, + typename KeyT, + bool SINGLE_WORD = (Traits::PRIMITIVE) && (sizeof(ValueT) + sizeof(KeyT) < 16)> +struct ReduceByKeyScanTileState; + + +/** + * Tile status interface for reduction by key, specialized for scan status and value types that + * cannot be combined into one machine word. + */ +template < + typename ValueT, + typename KeyT> +struct ReduceByKeyScanTileState : + ScanTileState > +{ + typedef ScanTileState > SuperClass; + + /// Constructor + __host__ __device__ __forceinline__ + ReduceByKeyScanTileState() : SuperClass() {} +}; + + +/** + * Tile status interface for reduction by key, specialized for scan status and value types that + * can be combined into one machine word that can be read/written coherently in a single access. + */ +template < + typename ValueT, + typename KeyT> +struct ReduceByKeyScanTileState +{ + typedef KeyValuePairKeyValuePairT; + + // Constants + enum + { + PAIR_SIZE = sizeof(ValueT) + sizeof(KeyT), + TXN_WORD_SIZE = 1 << Log2::VALUE, + STATUS_WORD_SIZE = TXN_WORD_SIZE - PAIR_SIZE, + + TILE_STATUS_PADDING = CUB_PTX_WARP_THREADS, + }; + + // Status word type + typedef typename If<(STATUS_WORD_SIZE == 8), + long long, + typename If<(STATUS_WORD_SIZE == 4), + int, + typename If<(STATUS_WORD_SIZE == 2), + short, + char>::Type>::Type>::Type StatusWord; + + // Status word type + typedef typename If<(TXN_WORD_SIZE == 16), + longlong2, + typename If<(TXN_WORD_SIZE == 8), + long long, + int>::Type>::Type TxnWord; + + // Device word type (for when sizeof(ValueT) == sizeof(KeyT)) + struct TileDescriptorBigStatus + { + KeyT key; + ValueT value; + StatusWord status; + }; + + // Device word type (for when sizeof(ValueT) != sizeof(KeyT)) + struct TileDescriptorLittleStatus + { + ValueT value; + StatusWord status; + KeyT key; + }; + + // Device word type + typedef typename If< + (sizeof(ValueT) == sizeof(KeyT)), + TileDescriptorBigStatus, + TileDescriptorLittleStatus>::Type + TileDescriptor; + + + // Device storage + TxnWord *d_tile_descriptors; + + + /// Constructor + __host__ __device__ __forceinline__ + ReduceByKeyScanTileState() + : + d_tile_descriptors(NULL) + {} + + + /// Initializer + __host__ __device__ __forceinline__ + cudaError_t Init( + int /*num_tiles*/, ///< [in] Number of tiles + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t /*temp_storage_bytes*/) ///< [in] Size in bytes of \t d_temp_storage allocation + { + d_tile_descriptors = reinterpret_cast(d_temp_storage); + return cudaSuccess; + } + + + /** + * Compute device memory needed for tile status + */ + __host__ __device__ __forceinline__ + static cudaError_t AllocationSize( + int num_tiles, ///< [in] Number of tiles + size_t &temp_storage_bytes) ///< [out] Size in bytes of \t d_temp_storage allocation + { + temp_storage_bytes = (num_tiles + TILE_STATUS_PADDING) * sizeof(TileDescriptor); // bytes needed for tile status descriptors + return cudaSuccess; + } + + + /** + * Initialize (from device) + */ + __device__ __forceinline__ void InitializeStatus(int num_tiles) + { + int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; + TxnWord val = TxnWord(); + TileDescriptor *descriptor = reinterpret_cast(&val); + + if (tile_idx < num_tiles) + { + // Not-yet-set + descriptor->status = StatusWord(SCAN_TILE_INVALID); + d_tile_descriptors[TILE_STATUS_PADDING + tile_idx] = val; + } + + if ((blockIdx.x == 0) && (threadIdx.x < TILE_STATUS_PADDING)) + { + // Padding + descriptor->status = StatusWord(SCAN_TILE_OOB); + d_tile_descriptors[threadIdx.x] = val; + } + } + + + /** + * Update the specified tile's inclusive value and corresponding status + */ + __device__ __forceinline__ void SetInclusive(int tile_idx, KeyValuePairT tile_inclusive) + { + TileDescriptor tile_descriptor; + tile_descriptor.status = SCAN_TILE_INCLUSIVE; + tile_descriptor.value = tile_inclusive.value; + tile_descriptor.key = tile_inclusive.key; + + TxnWord alias; + *reinterpret_cast(&alias) = tile_descriptor; + ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); + } + + + /** + * Update the specified tile's partial value and corresponding status + */ + __device__ __forceinline__ void SetPartial(int tile_idx, KeyValuePairT tile_partial) + { + TileDescriptor tile_descriptor; + tile_descriptor.status = SCAN_TILE_PARTIAL; + tile_descriptor.value = tile_partial.value; + tile_descriptor.key = tile_partial.key; + + TxnWord alias; + *reinterpret_cast(&alias) = tile_descriptor; + ThreadStore(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx, alias); + } + + /** + * Wait for the corresponding tile to become non-invalid + */ + __device__ __forceinline__ void WaitForValid( + int tile_idx, + StatusWord &status, + KeyValuePairT &value) + { +// TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); +// TileDescriptor tile_descriptor = reinterpret_cast(alias); +// +// while (tile_descriptor.status == SCAN_TILE_INVALID) +// { +// __threadfence_block(); // prevent hoisting loads from loop +// +// alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); +// tile_descriptor = reinterpret_cast(alias); +// } +// +// status = tile_descriptor.status; +// value.value = tile_descriptor.value; +// value.key = tile_descriptor.key; + + TileDescriptor tile_descriptor; + do + { + __threadfence_block(); // prevent hoisting loads from loop + TxnWord alias = ThreadLoad(d_tile_descriptors + TILE_STATUS_PADDING + tile_idx); + tile_descriptor = reinterpret_cast(alias); + + } while (WARP_ANY((tile_descriptor.status == SCAN_TILE_INVALID), 0xffffffff)); + + status = tile_descriptor.status; + value.value = tile_descriptor.value; + value.key = tile_descriptor.key; + } + +}; + + +/****************************************************************************** + * Prefix call-back operator for coupling local block scan within a + * block-cooperative scan + ******************************************************************************/ + +/** + * Stateful block-scan prefix functor. Provides the the running prefix for + * the current tile by using the call-back warp to wait on on + * aggregates/prefixes from predecessor tiles to become available. + */ +template < + typename T, + typename ScanOpT, + typename ScanTileStateT, + int PTX_ARCH = CUB_PTX_ARCH> +struct TilePrefixCallbackOp +{ + // Parameterized warp reduce + typedef WarpReduce WarpReduceT; + + // Temporary storage type + struct _TempStorage + { + typename WarpReduceT::TempStorage warp_reduce; + T exclusive_prefix; + T inclusive_prefix; + T block_aggregate; + }; + + // Alias wrapper allowing temporary storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + // Type of status word + typedef typename ScanTileStateT::StatusWord StatusWord; + + // Fields + _TempStorage& temp_storage; ///< Reference to a warp-reduction instance + ScanTileStateT& tile_status; ///< Interface to tile status + ScanOpT scan_op; ///< Binary scan operator + int tile_idx; ///< The current tile index + T exclusive_prefix; ///< Exclusive prefix for the tile + T inclusive_prefix; ///< Inclusive prefix for the tile + + // Constructor + __device__ __forceinline__ + TilePrefixCallbackOp( + ScanTileStateT &tile_status, + TempStorage &temp_storage, + ScanOpT scan_op, + int tile_idx) + : + temp_storage(temp_storage.Alias()), + tile_status(tile_status), + scan_op(scan_op), + tile_idx(tile_idx) {} + + + // Block until all predecessors within the warp-wide window have non-invalid status + __device__ __forceinline__ + void ProcessWindow( + int predecessor_idx, ///< Preceding tile index to inspect + StatusWord &predecessor_status, ///< [out] Preceding tile status + T &window_aggregate) ///< [out] Relevant partial reduction from this window of preceding tiles + { + T value; + tile_status.WaitForValid(predecessor_idx, predecessor_status, value); + + // Perform a segmented reduction to get the prefix for the current window. + // Use the swizzled scan operator because we are now scanning *down* towards thread0. + + int tail_flag = (predecessor_status == StatusWord(SCAN_TILE_INCLUSIVE)); + window_aggregate = WarpReduceT(temp_storage.warp_reduce).TailSegmentedReduce( + value, + tail_flag, + SwizzleScanOp(scan_op)); + } + + + // BlockScan prefix callback functor (called by the first warp) + __device__ __forceinline__ + T operator()(T block_aggregate) + { + + // Update our status with our tile-aggregate + if (threadIdx.x == 0) + { + temp_storage.block_aggregate = block_aggregate; + tile_status.SetPartial(tile_idx, block_aggregate); + } + + int predecessor_idx = tile_idx - threadIdx.x - 1; + StatusWord predecessor_status; + T window_aggregate; + + // Wait for the warp-wide window of predecessor tiles to become valid + ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); + + // The exclusive tile prefix starts out as the current window aggregate + exclusive_prefix = window_aggregate; + + // Keep sliding the window back until we come across a tile whose inclusive prefix is known + while (WARP_ALL((predecessor_status != StatusWord(SCAN_TILE_INCLUSIVE)), 0xffffffff)) + { + predecessor_idx -= CUB_PTX_WARP_THREADS; + + // Update exclusive tile prefix with the window prefix + ProcessWindow(predecessor_idx, predecessor_status, window_aggregate); + exclusive_prefix = scan_op(window_aggregate, exclusive_prefix); + } + + // Compute the inclusive tile prefix and update the status for this tile + if (threadIdx.x == 0) + { + inclusive_prefix = scan_op(exclusive_prefix, block_aggregate); + tile_status.SetInclusive(tile_idx, inclusive_prefix); + + temp_storage.exclusive_prefix = exclusive_prefix; + temp_storage.inclusive_prefix = inclusive_prefix; + } + + // Return exclusive_prefix + return exclusive_prefix; + } + + // Get the exclusive prefix stored in temporary storage + __device__ __forceinline__ + T GetExclusivePrefix() + { + return temp_storage.exclusive_prefix; + } + + // Get the inclusive prefix stored in temporary storage + __device__ __forceinline__ + T GetInclusivePrefix() + { + return temp_storage.inclusive_prefix; + } + + // Get the block aggregate stored in temporary storage + __device__ __forceinline__ + T GetBlockAggregate() + { + return temp_storage.block_aggregate; + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_adjacent_difference.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_adjacent_difference.cuh new file mode 100644 index 0000000..acef9f0 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_adjacent_difference.cuh @@ -0,0 +1,596 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../util_type.cuh" +#include "../util_ptx.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +template < + typename T, + int BLOCK_DIM_X, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockAdjacentDifference +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + + /// Shared memory storage layout type (last element from each thread's input) + struct _TempStorage + { + T first_items[BLOCK_THREADS]; + T last_items[BLOCK_THREADS]; + }; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /// Specialization for when FlagOp has third index param + template ::HAS_PARAM> + struct ApplyOp + { + // Apply flag operator + static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int idx) + { + return flag_op(b, a, idx); + } + }; + + /// Specialization for when FlagOp does not have a third index param + template + struct ApplyOp + { + // Apply flag operator + static __device__ __forceinline__ T FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) + { + return flag_op(b, a); + } + }; + + /// Templated unrolling of item comparison (inductive case) + template + struct Iterate + { + // Head flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagHeads( + int linear_tid, + FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + preds[ITERATION] = input[ITERATION - 1]; + + flags[ITERATION] = ApplyOp::FlagT( + flag_op, + preds[ITERATION], + input[ITERATION], + (linear_tid * ITEMS_PER_THREAD) + ITERATION); + + Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); + } + + // Tail flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagTails( + int linear_tid, + FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + flags[ITERATION] = ApplyOp::FlagT( + flag_op, + input[ITERATION], + input[ITERATION + 1], + (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); + + Iterate::FlagTails(linear_tid, flags, input, flag_op); + } + + }; + + /// Templated unrolling of item comparison (termination case) + template + struct Iterate + { + // Head flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagHeads( + int /*linear_tid*/, + FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate + {} + + // Tail flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagTails( + int /*linear_tid*/, + FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate + {} + }; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + +public: + + /// \smemstorage{BlockDiscontinuity} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockAdjacentDifference() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockAdjacentDifference( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Head flag operations + *********************************************************************/ + //@{ + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share last item + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + if (linear_tid == 0) + { + // Set flag for first thread-item (preds[0] is undefined) + head_flags[0] = 1; + } + else + { + preds[0] = temp_storage.last_items[linear_tid - 1]; + head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); + } + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + } + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + { + // Share last item + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + } + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + T preds[ITEMS_PER_THREAD]; + FlagHeads(head_flags, input, preds, flag_op); + } + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + { + T preds[ITEMS_PER_THREAD]; + FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); + } + + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagTails( + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first item + temp_storage.first_items[linear_tid] = input[0]; + + CTA_SYNC(); + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagTails( + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + { + // Share first item + temp_storage.first_items[linear_tid] = input[0]; + + CTA_SYNC(); + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = temp_storage.last_items[linear_tid - 1]; + if (linear_tid == 0) + { + head_flags[0] = 1; + } + else + { + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + } + + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + if (linear_tid == 0) + { + head_flags[0] = 1; + } + else + { + preds[0] = temp_storage.last_items[linear_tid - 1]; + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + } + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_discontinuity.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_discontinuity.cuh new file mode 100644 index 0000000..503e3e0 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_discontinuity.cuh @@ -0,0 +1,1148 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../util_type.cuh" +#include "../util_ptx.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief The BlockDiscontinuity class provides [collective](index.html#sec0) methods for flagging discontinuities within an ordered set of items partitioned across a CUDA thread block. ![](discont_logo.png) + * \ingroup BlockModule + * + * \tparam T The data type to be flagged. + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - A set of "head flags" (or "tail flags") is often used to indicate corresponding items + * that differ from their predecessors (or successors). For example, head flags are convenient + * for demarcating disjoint data segments as part of a segmented scan or reduction. + * - \blocked + * + * \par Performance Considerations + * - \granularity + * + * \par A Simple Example + * \blockcollective{BlockDiscontinuity} + * \par + * The code snippet below illustrates the head flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute head flags for discontinuities in the segment + * int head_flags[4]; + * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. + * The corresponding output \p head_flags in those threads will be + * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * + * \par Performance Considerations + * - Incurs zero bank conflicts for most types + * + */ +template < + typename T, + int BLOCK_DIM_X, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockDiscontinuity +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + + /// Shared memory storage layout type (last element from each thread's input) + struct _TempStorage + { + T first_items[BLOCK_THREADS]; + T last_items[BLOCK_THREADS]; + }; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /// Specialization for when FlagOp has third index param + template ::HAS_PARAM> + struct ApplyOp + { + // Apply flag operator + static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int idx) + { + return flag_op(a, b, idx); + } + }; + + /// Specialization for when FlagOp does not have a third index param + template + struct ApplyOp + { + // Apply flag operator + static __device__ __forceinline__ bool FlagT(FlagOp flag_op, const T &a, const T &b, int /*idx*/) + { + return flag_op(a, b); + } + }; + + /// Templated unrolling of item comparison (inductive case) + template + struct Iterate + { + // Head flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagHeads( + int linear_tid, + FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + preds[ITERATION] = input[ITERATION - 1]; + + flags[ITERATION] = ApplyOp::FlagT( + flag_op, + preds[ITERATION], + input[ITERATION], + (linear_tid * ITEMS_PER_THREAD) + ITERATION); + + Iterate::FlagHeads(linear_tid, flags, input, preds, flag_op); + } + + // Tail flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagTails( + int linear_tid, + FlagT (&flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + flags[ITERATION] = ApplyOp::FlagT( + flag_op, + input[ITERATION], + input[ITERATION + 1], + (linear_tid * ITEMS_PER_THREAD) + ITERATION + 1); + + Iterate::FlagTails(linear_tid, flags, input, flag_op); + } + + }; + + /// Templated unrolling of item comparison (termination case) + template + struct Iterate + { + // Head flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagHeads( + int /*linear_tid*/, + FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&/*preds*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate + {} + + // Tail flags + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + static __device__ __forceinline__ void FlagTails( + int /*linear_tid*/, + FlagT (&/*flags*/)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&/*input*/)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp /*flag_op*/) ///< [in] Binary boolean flag predicate + {} + }; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + +public: + + /// \smemstorage{BlockDiscontinuity} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockDiscontinuity() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockDiscontinuity( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Head flag operations + *********************************************************************/ + //@{ + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share last item + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + if (linear_tid == 0) + { + // Set flag for first thread-item (preds[0] is undefined) + head_flags[0] = 1; + } + else + { + preds[0] = temp_storage.last_items[linear_tid - 1]; + head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); + } + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + } + + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&preds)[ITEMS_PER_THREAD], ///< [out] Calling thread's predecessor items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + { + // Share last item + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT(flag_op, preds[0], input[0], linear_tid * ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + } + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + /** + * \brief Sets head flags indicating discontinuities between items partitioned across the thread block, for which the first item has no reference and is always flagged. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is always flagged. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute head flags for discontinuities in the segment + * int head_flags[4]; + * BlockDiscontinuity(temp_storage).FlagHeads(head_flags, thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }. + * The corresponding output \p head_flags in those threads will be + * { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + T preds[ITEMS_PER_THREAD]; + FlagHeads(head_flags, input, preds, flag_op); + } + + + /** + * \brief Sets head flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is compared + * against \p tile_predecessor_item. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Have thread0 obtain the predecessor item for the entire tile + * int tile_predecessor_item; + * if (threadIdx.x == 0) tile_predecessor_item == ... + * + * // Collectively compute head flags for discontinuities in the segment + * int head_flags[4]; + * BlockDiscontinuity(temp_storage).FlagHeads( + * head_flags, thread_data, cub::Inequality(), tile_predecessor_item); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], [3,4,4,4], ... }, + * and that \p tile_predecessor_item is \p 0. The corresponding output \p head_flags in those threads will be + * { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeads( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_predecessor_item) ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + { + T preds[ITEMS_PER_THREAD]; + FlagHeads(head_flags, input, preds, flag_op, tile_predecessor_item); + } + + + + //@} end member group + /******************************************************************//** + * \name Tail flag operations + *********************************************************************/ + //@{ + + + /** + * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block, for which the last item has no reference and is always flagged. + * + * \par + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is always flagged. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute tail flags for discontinuities in the segment + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails(tail_flags, thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }. + * The corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagTails( + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first item + temp_storage.first_items[linear_tid] = input[0]; + + CTA_SYNC(); + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + /** + * \brief Sets tail flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is compared + * against \p tile_successor_item. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Have thread127 obtain the successor item for the entire tile + * int tile_successor_item; + * if (threadIdx.x == 127) tile_successor_item == ... + * + * // Collectively compute tail flags for discontinuities in the segment + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails( + * tail_flags, thread_data, cub::Inequality(), tile_successor_item); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } + * and that \p tile_successor_item is \p 125. The corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagTails( + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op, ///< [in] Binary boolean flag predicate + T tile_successor_item) ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + { + // Share first item + temp_storage.first_items[linear_tid] = input[0]; + + CTA_SYNC(); + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + //@} end member group + /******************************************************************//** + * \name Head & tail flag operations + *********************************************************************/ + //@{ + + + /** + * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is always flagged. + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is always flagged. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head- and tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute head and flags for discontinuities in the segment + * int head_flags[4]; + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails( + * head_flags, tail_flags, thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } + * and that the tile_successor_item is \p 125. The corresponding output \p head_flags + * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * and the corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = temp_storage.last_items[linear_tid - 1]; + if (linear_tid == 0) + { + head_flags[0] = 1; + } + else + { + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + } + + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + /** + * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is always flagged. + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is compared + * against \p tile_predecessor_item. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head- and tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Have thread127 obtain the successor item for the entire tile + * int tile_successor_item; + * if (threadIdx.x == 127) tile_successor_item == ... + * + * // Collectively compute head and flags for discontinuities in the segment + * int head_flags[4]; + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails( + * head_flags, tail_flags, tile_successor_item, thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] } + * and that the tile_successor_item is \p 125. The corresponding output \p head_flags + * in those threads will be { [1,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * and the corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + if (linear_tid == 0) + { + head_flags[0] = 1; + } + else + { + preds[0] = temp_storage.last_items[linear_tid - 1]; + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + } + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + /** + * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is compared + * against \p tile_predecessor_item. + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is always flagged. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head- and tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Have thread0 obtain the predecessor item for the entire tile + * int tile_predecessor_item; + * if (threadIdx.x == 0) tile_predecessor_item == ... + * + * // Have thread127 obtain the successor item for the entire tile + * int tile_successor_item; + * if (threadIdx.x == 127) tile_successor_item == ... + * + * // Collectively compute head and flags for discontinuities in the segment + * int head_flags[4]; + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails( + * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, + * thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, + * that the \p tile_predecessor_item is \p 0, and that the + * \p tile_successor_item is \p 125. The corresponding output \p head_flags + * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * and the corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,1] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + + // Set flag for last thread-item + tail_flags[ITEMS_PER_THREAD - 1] = (linear_tid == BLOCK_THREADS - 1) ? + 1 : // Last thread + ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + temp_storage.first_items[linear_tid + 1], + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + /** + * \brief Sets both head and tail flags indicating discontinuities between items partitioned across the thread block. + * + * \par + * - The flag head_flagsi is set for item + * inputi when + * flag_op(previous-item, inputi) + * returns \p true (where previous-item is either the preceding item + * in the same thread or the last item in the previous thread). + * - For thread0, item input0 is compared + * against \p tile_predecessor_item. + * - The flag tail_flagsi is set for item + * inputi when + * flag_op(inputi, next-item) + * returns \p true (where next-item is either the next item + * in the same thread or the first item in the next thread). + * - For threadBLOCK_THREADS-1, item + * inputITEMS_PER_THREAD-1 is compared + * against \p tile_successor_item. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the head- and tail-flagging of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockDiscontinuity for a 1D block of 128 threads on type int + * typedef cub::BlockDiscontinuity BlockDiscontinuity; + * + * // Allocate shared memory for BlockDiscontinuity + * __shared__ typename BlockDiscontinuity::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Have thread0 obtain the predecessor item for the entire tile + * int tile_predecessor_item; + * if (threadIdx.x == 0) tile_predecessor_item == ... + * + * // Have thread127 obtain the successor item for the entire tile + * int tile_successor_item; + * if (threadIdx.x == 127) tile_successor_item == ... + * + * // Collectively compute head and flags for discontinuities in the segment + * int head_flags[4]; + * int tail_flags[4]; + * BlockDiscontinuity(temp_storage).FlagTails( + * head_flags, tile_predecessor_item, tail_flags, tile_successor_item, + * thread_data, cub::Inequality()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,0,1,1], [1,1,1,1], [2,3,3,3], ..., [124,125,125,125] }, + * that the \p tile_predecessor_item is \p 0, and that the + * \p tile_successor_item is \p 125. The corresponding output \p head_flags + * in those threads will be { [0,0,1,0], [0,0,0,0], [1,1,0,0], [0,1,0,0], ... }. + * and the corresponding output \p tail_flags in those threads will be + * { [0,1,0,0], [0,0,0,1], [1,0,0,...], ..., [1,0,0,0] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam FlagT [inferred] The flag type (must be an integer type) + * \tparam FlagOp [inferred] Binary predicate functor type having member T operator()(const T &a, const T &b) or member T operator()(const T &a, const T &b, unsigned int b_index), and returning \p true if a discontinuity exists between \p a and \p b, otherwise \p false. \p b_index is the rank of b in the aggregate tile of data. + */ + template < + int ITEMS_PER_THREAD, + typename FlagT, + typename FlagOp> + __device__ __forceinline__ void FlagHeadsAndTails( + FlagT (&head_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity head_flags + T tile_predecessor_item, ///< [in] [thread0 only] Item with which to compare the first tile item (input0 from thread0). + FlagT (&tail_flags)[ITEMS_PER_THREAD], ///< [out] Calling thread's discontinuity tail_flags + T tile_successor_item, ///< [in] [threadBLOCK_THREADS-1 only] Item with which to compare the last tile item (inputITEMS_PER_THREAD-1 from threadBLOCK_THREADS-1). + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + FlagOp flag_op) ///< [in] Binary boolean flag predicate + { + // Share first and last items + temp_storage.first_items[linear_tid] = input[0]; + temp_storage.last_items[linear_tid] = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + T preds[ITEMS_PER_THREAD]; + + // Set flag for first thread-item + preds[0] = (linear_tid == 0) ? + tile_predecessor_item : // First thread + temp_storage.last_items[linear_tid - 1]; + + head_flags[0] = ApplyOp::FlagT( + flag_op, + preds[0], + input[0], + linear_tid * ITEMS_PER_THREAD); + + // Set flag for last thread-item + T successor_item = (linear_tid == BLOCK_THREADS - 1) ? + tile_successor_item : // Last thread + temp_storage.first_items[linear_tid + 1]; + + tail_flags[ITEMS_PER_THREAD - 1] = ApplyOp::FlagT( + flag_op, + input[ITEMS_PER_THREAD - 1], + successor_item, + (linear_tid * ITEMS_PER_THREAD) + ITEMS_PER_THREAD); + + // Set head_flags for remaining items + Iterate<1, ITEMS_PER_THREAD>::FlagHeads(linear_tid, head_flags, input, preds, flag_op); + + // Set tail_flags for remaining items + Iterate<0, ITEMS_PER_THREAD - 1>::FlagTails(linear_tid, tail_flags, input, flag_op); + } + + + + + //@} end member group + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_exchange.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_exchange.cuh new file mode 100644 index 0000000..3ae9934 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_exchange.cuh @@ -0,0 +1,1248 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../util_ptx.cuh" +#include "../util_arch.cuh" +#include "../util_macro.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief The BlockExchange class provides [collective](index.html#sec0) methods for rearranging data partitioned across a CUDA thread block. ![](transpose_logo.png) + * \ingroup BlockModule + * + * \tparam T The data type to be exchanged. + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ITEMS_PER_THREAD The number of items partitioned onto each thread. + * \tparam WARP_TIME_SLICING [optional] When \p true, only use enough shared memory for a single warp's worth of tile data, time-slicing the block-wide exchange over multiple synchronized rounds. Yields a smaller memory footprint at the expense of decreased parallelism. (Default: false) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - It is commonplace for blocks of threads to rearrange data items between + * threads. For example, the device-accessible memory subsystem prefers access patterns + * where data items are "striped" across threads (where consecutive threads access consecutive items), + * yet most block-wide operations prefer a "blocked" partitioning of items across threads + * (where consecutive items belong to a single thread). + * - BlockExchange supports the following types of data exchanges: + * - Transposing between [blocked](index.html#sec5sec3) and [striped](index.html#sec5sec3) arrangements + * - Transposing between [blocked](index.html#sec5sec3) and [warp-striped](index.html#sec5sec3) arrangements + * - Scattering ranked items to a [blocked arrangement](index.html#sec5sec3) + * - Scattering ranked items to a [striped arrangement](index.html#sec5sec3) + * - \rowmajor + * + * \par A Simple Example + * \blockcollective{BlockExchange} + * \par + * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement + * of 512 integer items partitioned across 128 threads where each thread owns 4 items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockExchange BlockExchange; + * + * // Allocate shared memory for BlockExchange + * __shared__ typename BlockExchange::TempStorage temp_storage; + * + * // Load a tile of data striped across threads + * int thread_data[4]; + * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); + * + * // Collectively exchange data into a blocked arrangement across threads + * BlockExchange(temp_storage).StripedToBlocked(thread_data); + * + * \endcode + * \par + * Suppose the set of striped input \p thread_data across the block of threads is + * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] }. + * The corresponding output \p thread_data in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * + * \par Performance Considerations + * - Proper device-specific padding ensures zero bank conflicts for most types. + * + */ +template < + typename InputT, + int BLOCK_DIM_X, + int ITEMS_PER_THREAD, + bool WARP_TIME_SLICING = false, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockExchange +{ +private: + + /****************************************************************************** + * Constants + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), + WARP_THREADS = 1 << LOG_WARP_THREADS, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), + SMEM_BANKS = 1 << LOG_SMEM_BANKS, + + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + + TIME_SLICES = (WARP_TIME_SLICING) ? WARPS : 1, + + TIME_SLICED_THREADS = (WARP_TIME_SLICING) ? CUB_MIN(BLOCK_THREADS, WARP_THREADS) : BLOCK_THREADS, + TIME_SLICED_ITEMS = TIME_SLICED_THREADS * ITEMS_PER_THREAD, + + WARP_TIME_SLICED_THREADS = CUB_MIN(BLOCK_THREADS, WARP_THREADS), + WARP_TIME_SLICED_ITEMS = WARP_TIME_SLICED_THREADS * ITEMS_PER_THREAD, + + // Insert padding to avoid bank conflicts during raking when items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) + INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), + PADDING_ITEMS = (INSERT_PADDING) ? (TIME_SLICED_ITEMS >> LOG_SMEM_BANKS) : 0, + }; + + /****************************************************************************** + * Type definitions + ******************************************************************************/ + + /// Shared memory storage layout type + struct __align__(16) _TempStorage + { + InputT buff[TIME_SLICED_ITEMS + PADDING_ITEMS]; + }; + +public: + + /// \smemstorage{BlockExchange} + struct TempStorage : Uninitialized<_TempStorage> {}; + +private: + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + unsigned int lane_id; + unsigned int warp_id; + unsigned int warp_offset; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /** + * Transposes data items from blocked arrangement to striped arrangement. Specialized for no timeslicing. + */ + template + __device__ __forceinline__ void BlockedToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + /** + * Transposes data items from blocked arrangement to striped arrangement. Specialized for warp-timeslicing. + */ + template + __device__ __forceinline__ void BlockedToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + InputT temp_items[ITEMS_PER_THREAD]; + + #pragma unroll + for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) + { + const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; + const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; + + CTA_SYNC(); + + if (warp_id == SLICE) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + // Read a strip of items + const int STRIP_OFFSET = ITEM * BLOCK_THREADS; + const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; + + if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) + { + int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; + if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) + { + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + } + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + output_items[ITEM] = temp_items[ITEM]; + } + } + + + /** + * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for no timeslicing + */ + template + __device__ __forceinline__ void BlockedToWarpStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + /** + * Transposes data items from blocked arrangement to warp-striped arrangement. Specialized for warp-timeslicing + */ + template + __device__ __forceinline__ void BlockedToWarpStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + if (warp_id == 0) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + #pragma unroll + for (unsigned int SLICE = 1; SLICE < TIME_SLICES; ++SLICE) + { + CTA_SYNC(); + + if (warp_id == SLICE) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + } + + + /** + * Transposes data items from striped arrangement to blocked arrangement. Specialized for no timeslicing. + */ + template + __device__ __forceinline__ void StripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + // No timeslicing + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + /** + * Transposes data items from striped arrangement to blocked arrangement. Specialized for warp-timeslicing. + */ + template + __device__ __forceinline__ void StripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + // Warp time-slicing + InputT temp_items[ITEMS_PER_THREAD]; + + #pragma unroll + for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) + { + const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; + const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + // Write a strip of items + const int STRIP_OFFSET = ITEM * BLOCK_THREADS; + const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; + + if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) + { + int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; + if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) + { + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + } + } + + CTA_SYNC(); + + if (warp_id == SLICE) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + output_items[ITEM] = temp_items[ITEM]; + } + } + + + /** + * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for no timeslicing + */ + template + __device__ __forceinline__ void WarpStripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = warp_offset + (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = warp_offset + ITEM + (lane_id * ITEMS_PER_THREAD); + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + /** + * Transposes data items from warp-striped arrangement to blocked arrangement. Specialized for warp-timeslicing + */ + template + __device__ __forceinline__ void WarpStripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + Int2Type /*time_slicing*/) + { + #pragma unroll + for (unsigned int SLICE = 0; SLICE < TIME_SLICES; ++SLICE) + { + CTA_SYNC(); + + if (warp_id == SLICE) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (ITEM * WARP_TIME_SLICED_THREADS) + lane_id; + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ITEM + (lane_id * ITEMS_PER_THREAD); + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + } + + + /** + * Exchanges data items annotated by rank into blocked arrangement. Specialized for no timeslicing. + */ + template + __device__ __forceinline__ void ScatterToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM]; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (linear_tid * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + /** + * Exchanges data items annotated by rank into blocked arrangement. Specialized for warp-timeslicing. + */ + template + __device__ __forceinline__ void ScatterToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + Int2Type /*time_slicing*/) + { + InputT temp_items[ITEMS_PER_THREAD]; + + #pragma unroll + for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) + { + CTA_SYNC(); + + const int SLICE_OFFSET = TIME_SLICED_ITEMS * SLICE; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM] - SLICE_OFFSET; + if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) + { + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + temp_storage.buff[item_offset] = input_items[ITEM]; + } + } + + CTA_SYNC(); + + if (warp_id == SLICE) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (lane_id * ITEMS_PER_THREAD) + ITEM; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + temp_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + output_items[ITEM] = temp_items[ITEM]; + } + } + + + /** + * Exchanges data items annotated by rank into striped arrangement. Specialized for no timeslicing. + */ + template + __device__ __forceinline__ void ScatterToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + Int2Type /*time_slicing*/) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM]; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + /** + * Exchanges data items annotated by rank into striped arrangement. Specialized for warp-timeslicing. + */ + template + __device__ __forceinline__ void ScatterToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between blocked and striped arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items to exchange, converting between blocked and striped arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + Int2Type /*time_slicing*/) + { + InputT temp_items[ITEMS_PER_THREAD]; + + #pragma unroll + for (int SLICE = 0; SLICE < TIME_SLICES; SLICE++) + { + const int SLICE_OFFSET = SLICE * TIME_SLICED_ITEMS; + const int SLICE_OOB = SLICE_OFFSET + TIME_SLICED_ITEMS; + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM] - SLICE_OFFSET; + if ((item_offset >= 0) && (item_offset < WARP_TIME_SLICED_ITEMS)) + { + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + temp_storage.buff[item_offset] = input_items[ITEM]; + } + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + // Read a strip of items + const int STRIP_OFFSET = ITEM * BLOCK_THREADS; + const int STRIP_OOB = STRIP_OFFSET + BLOCK_THREADS; + + if ((SLICE_OFFSET < STRIP_OOB) && (SLICE_OOB > STRIP_OFFSET)) + { + int item_offset = STRIP_OFFSET + linear_tid - SLICE_OFFSET; + if ((item_offset >= 0) && (item_offset < TIME_SLICED_ITEMS)) + { + if (INSERT_PADDING) item_offset += item_offset >> LOG_SMEM_BANKS; + temp_items[ITEM] = temp_storage.buff[item_offset]; + } + } + } + } + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + output_items[ITEM] = temp_items[ITEM]; + } + } + + +public: + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockExchange() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), + lane_id(LaneId()), + warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockExchange( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + lane_id(LaneId()), + warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), + warp_offset(warp_id * WARP_TIME_SLICED_ITEMS) + {} + + + //@} end member group + /******************************************************************//** + * \name Structured exchanges + *********************************************************************/ + //@{ + + /** + * \brief Transposes data items from striped arrangement to blocked arrangement. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the conversion from a "striped" to a "blocked" arrangement + * of 512 integer items partitioned across 128 threads where each thread owns 4 items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockExchange BlockExchange; + * + * // Allocate shared memory for BlockExchange + * __shared__ typename BlockExchange::TempStorage temp_storage; + * + * // Load a tile of ordered data into a striped arrangement across block threads + * int thread_data[4]; + * cub::LoadDirectStriped<128>(threadIdx.x, d_data, thread_data); + * + * // Collectively exchange data into a blocked arrangement across threads + * BlockExchange(temp_storage).StripedToBlocked(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of striped input \p thread_data across the block of threads is + * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } after loading from device-accessible memory. + * The corresponding output \p thread_data in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * + */ + template + __device__ __forceinline__ void StripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. + { + StripedToBlocked(input_items, output_items, Int2Type()); + } + + + /** + * \brief Transposes data items from blocked arrangement to striped arrangement. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the conversion from a "blocked" to a "striped" arrangement + * of 512 integer items partitioned across 128 threads where each thread owns 4 items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockExchange BlockExchange; + * + * // Allocate shared memory for BlockExchange + * __shared__ typename BlockExchange::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively exchange data into a striped arrangement across threads + * BlockExchange(temp_storage).BlockedToStriped(thread_data, thread_data); + * + * // Store data striped across block threads into an ordered tile + * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); + * + * \endcode + * \par + * Suppose the set of blocked input \p thread_data across the block of threads is + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * The corresponding output \p thread_data in those threads will be + * { [0,128,256,384], [1,129,257,385], ..., [127,255,383,511] } in + * preparation for storing to device-accessible memory. + * + */ + template + __device__ __forceinline__ void BlockedToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. + { + BlockedToStriped(input_items, output_items, Int2Type()); + } + + + + /** + * \brief Transposes data items from warp-striped arrangement to blocked arrangement. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the conversion from a "warp-striped" to a "blocked" arrangement + * of 512 integer items partitioned across 128 threads where each thread owns 4 items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockExchange BlockExchange; + * + * // Allocate shared memory for BlockExchange + * __shared__ typename BlockExchange::TempStorage temp_storage; + * + * // Load a tile of ordered data into a warp-striped arrangement across warp threads + * int thread_data[4]; + * cub::LoadSWarptriped(threadIdx.x, d_data, thread_data); + * + * // Collectively exchange data into a blocked arrangement across threads + * BlockExchange(temp_storage).WarpStripedToBlocked(thread_data); + * + * \endcode + * \par + * Suppose the set of warp-striped input \p thread_data across the block of threads is + * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } + * after loading from device-accessible memory. (The first 128 items are striped across + * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) + * The corresponding output \p thread_data in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * + */ + template + __device__ __forceinline__ void WarpStripedToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. + { + WarpStripedToBlocked(input_items, output_items, Int2Type()); + } + + + + /** + * \brief Transposes data items from blocked arrangement to warp-striped arrangement. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the conversion from a "blocked" to a "warp-striped" arrangement + * of 512 integer items partitioned across 128 threads where each thread owns 4 items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockExchange for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockExchange BlockExchange; + * + * // Allocate shared memory for BlockExchange + * __shared__ typename BlockExchange::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively exchange data into a warp-striped arrangement across threads + * BlockExchange(temp_storage).BlockedToWarpStriped(thread_data, thread_data); + * + * // Store data striped across warp threads into an ordered tile + * cub::StoreDirectStriped(threadIdx.x, d_data, thread_data); + * + * \endcode + * \par + * Suppose the set of blocked input \p thread_data across the block of threads is + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * The corresponding output \p thread_data in those threads will be + * { [0,32,64,96], [1,33,65,97], [2,34,66,98], ..., [415,447,479,511] } + * in preparation for storing to device-accessible memory. (The first 128 items are striped across + * the first warp of 32 threads, the second 128 items are striped across the second warp, etc.) + * + */ + template + __device__ __forceinline__ void BlockedToWarpStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD]) ///< [out] Items from exchange, converting between striped and blocked arrangements. + { + BlockedToWarpStriped(input_items, output_items, Int2Type()); + } + + + + //@} end member group + /******************************************************************//** + * \name Scatter exchanges + *********************************************************************/ + //@{ + + + /** + * \brief Exchanges data items annotated by rank into blocked arrangement. + * + * \par + * - \smemreuse + * + * \tparam OffsetT [inferred] Signed integer type for local offsets + */ + template + __device__ __forceinline__ void ScatterToBlocked( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + ScatterToBlocked(input_items, output_items, ranks, Int2Type()); + } + + + + /** + * \brief Exchanges data items annotated by rank into striped arrangement. + * + * \par + * - \smemreuse + * + * \tparam OffsetT [inferred] Signed integer type for local offsets + */ + template + __device__ __forceinline__ void ScatterToStriped( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + ScatterToStriped(input_items, output_items, ranks, Int2Type()); + } + + + + /** + * \brief Exchanges data items annotated by rank into striped arrangement. Items with rank -1 are not exchanged. + * + * \par + * - \smemreuse + * + * \tparam OffsetT [inferred] Signed integer type for local offsets + */ + template + __device__ __forceinline__ void ScatterToStripedGuarded( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM]; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + if (ranks[ITEM] >= 0) + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + + + /** + * \brief Exchanges valid data items annotated by rank into striped arrangement. + * + * \par + * - \smemreuse + * + * \tparam OffsetT [inferred] Signed integer type for local offsets + * \tparam ValidFlag [inferred] FlagT type denoting which items are valid + */ + template + __device__ __forceinline__ void ScatterToStripedFlagged( + InputT input_items[ITEMS_PER_THREAD], ///< [in] Items to exchange, converting between striped and blocked arrangements. + OutputT output_items[ITEMS_PER_THREAD], ///< [out] Items from exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = ranks[ITEM]; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + if (is_valid[ITEM]) + temp_storage.buff[item_offset] = input_items[ITEM]; + } + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = int(ITEM * BLOCK_THREADS) + linear_tid; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + output_items[ITEM] = temp_storage.buff[item_offset]; + } + } + + + //@} end member group + + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + + __device__ __forceinline__ void StripedToBlocked( + InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + { + StripedToBlocked(items, items); + } + + __device__ __forceinline__ void BlockedToStriped( + InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + { + BlockedToStriped(items, items); + } + + __device__ __forceinline__ void WarpStripedToBlocked( + InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + { + WarpStripedToBlocked(items, items); + } + + __device__ __forceinline__ void BlockedToWarpStriped( + InputT items[ITEMS_PER_THREAD]) ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + { + BlockedToWarpStriped(items, items); + } + + template + __device__ __forceinline__ void ScatterToBlocked( + InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + ScatterToBlocked(items, items, ranks); + } + + template + __device__ __forceinline__ void ScatterToStriped( + InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + ScatterToStriped(items, items, ranks); + } + + template + __device__ __forceinline__ void ScatterToStripedGuarded( + InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + ScatterToStripedGuarded(items, items, ranks); + } + + template + __device__ __forceinline__ void ScatterToStripedFlagged( + InputT items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange, converting between striped and blocked arrangements. + OffsetT ranks[ITEMS_PER_THREAD], ///< [in] Corresponding scatter ranks + ValidFlag is_valid[ITEMS_PER_THREAD]) ///< [in] Corresponding flag denoting item validity + { + ScatterToStriped(items, items, ranks, is_valid); + } + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +}; + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +template < + typename T, + int ITEMS_PER_THREAD, + int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, + int PTX_ARCH = CUB_PTX_ARCH> +class WarpExchange +{ +private: + + /****************************************************************************** + * Constants + ******************************************************************************/ + + /// Constants + enum + { + // Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + WARP_ITEMS = (ITEMS_PER_THREAD * LOGICAL_WARP_THREADS) + 1, + + LOG_SMEM_BANKS = CUB_LOG_SMEM_BANKS(PTX_ARCH), + SMEM_BANKS = 1 << LOG_SMEM_BANKS, + + // Insert padding if the number of items per thread is a power of two and > 4 (otherwise we can typically use 128b loads) + INSERT_PADDING = (ITEMS_PER_THREAD > 4) && (PowerOfTwo::VALUE), + PADDING_ITEMS = (INSERT_PADDING) ? (WARP_ITEMS >> LOG_SMEM_BANKS) : 0, + }; + + /****************************************************************************** + * Type definitions + ******************************************************************************/ + + /// Shared memory storage layout type + struct _TempStorage + { + T buff[WARP_ITEMS + PADDING_ITEMS]; + }; + +public: + + /// \smemstorage{WarpExchange} + struct TempStorage : Uninitialized<_TempStorage> {}; + +private: + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + _TempStorage &temp_storage; + int lane_id; + +public: + + /****************************************************************************** + * Construction + ******************************************************************************/ + + /// Constructor + __device__ __forceinline__ WarpExchange( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + lane_id(IS_ARCH_WARP ? + LaneId() : + LaneId() % LOGICAL_WARP_THREADS) + {} + + + /****************************************************************************** + * Interface + ******************************************************************************/ + + /** + * \brief Exchanges valid data items annotated by rank into striped arrangement. + * + * \par + * - \smemreuse + * + * \tparam OffsetT [inferred] Signed integer type for local offsets + */ + template + __device__ __forceinline__ void ScatterToStriped( + T items[ITEMS_PER_THREAD], ///< [in-out] Items to exchange + OffsetT ranks[ITEMS_PER_THREAD]) ///< [in] Corresponding scatter ranks + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (INSERT_PADDING) ranks[ITEM] = SHR_ADD(ranks[ITEM], LOG_SMEM_BANKS, ranks[ITEM]); + temp_storage.buff[ranks[ITEM]] = items[ITEM]; + } + + WARP_SYNC(0xffffffff); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + int item_offset = (ITEM * LOGICAL_WARP_THREADS) + lane_id; + if (INSERT_PADDING) item_offset = SHR_ADD(item_offset, LOG_SMEM_BANKS, item_offset); + items[ITEM] = temp_storage.buff[item_offset]; + } + } + +}; + + + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_histogram.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_histogram.cuh new file mode 100644 index 0000000..b7cb970 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_histogram.cuh @@ -0,0 +1,415 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + */ + +#pragma once + +#include "specializations/block_histogram_sort.cuh" +#include "specializations/block_histogram_atomic.cuh" +#include "../util_ptx.cuh" +#include "../util_arch.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Algorithmic variants + ******************************************************************************/ + +/** + * \brief BlockHistogramAlgorithm enumerates alternative algorithms for the parallel construction of block-wide histograms. + */ +enum BlockHistogramAlgorithm +{ + + /** + * \par Overview + * Sorting followed by differentiation. Execution is comprised of two phases: + * -# Sort the data using efficient radix sort + * -# Look for "runs" of same-valued keys by detecting discontinuities; the run-lengths are histogram bin counts. + * + * \par Performance Considerations + * Delivers consistent throughput regardless of sample bin distribution. + */ + BLOCK_HISTO_SORT, + + + /** + * \par Overview + * Use atomic addition to update byte counts directly + * + * \par Performance Considerations + * Performance is strongly tied to the hardware implementation of atomic + * addition, and may be significantly degraded for non uniformly-random + * input distributions where many concurrent updates are likely to be + * made to the same bin counter. + */ + BLOCK_HISTO_ATOMIC, +}; + + + +/****************************************************************************** + * Block histogram + ******************************************************************************/ + + +/** + * \brief The BlockHistogram class provides [collective](index.html#sec0) methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. ![](histogram_logo.png) + * \ingroup BlockModule + * + * \tparam T The sample type being histogrammed (must be castable to an integer bin identifier) + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ITEMS_PER_THREAD The number of items per thread + * \tparam BINS The number bins within the histogram + * \tparam ALGORITHM [optional] cub::BlockHistogramAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_HISTO_SORT) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - A histogram + * counts the number of observations that fall into each of the disjoint categories (known as bins). + * - BlockHistogram can be optionally specialized to use different algorithms: + * -# cub::BLOCK_HISTO_SORT. Sorting followed by differentiation. [More...](\ref cub::BlockHistogramAlgorithm) + * -# cub::BLOCK_HISTO_ATOMIC. Use atomic addition to update byte counts directly. [More...](\ref cub::BlockHistogramAlgorithm) + * + * \par Performance Considerations + * - \granularity + * + * \par A Simple Example + * \blockcollective{BlockHistogram} + * \par + * The code snippet below illustrates a 256-bin histogram of 512 integer samples that + * are partitioned across 128 threads where each thread owns 4 samples. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each + * typedef cub::BlockHistogram BlockHistogram; + * + * // Allocate shared memory for BlockHistogram + * __shared__ typename BlockHistogram::TempStorage temp_storage; + * + * // Allocate shared memory for block-wide histogram bin counts + * __shared__ unsigned int smem_histogram[256]; + * + * // Obtain input samples per thread + * unsigned char data[4]; + * ... + * + * // Compute the block-wide histogram + * BlockHistogram(temp_storage).Histogram(data, smem_histogram); + * + * \endcode + * + * \par Performance and Usage Considerations + * - The histogram output can be constructed in shared or device-accessible memory + * - See cub::BlockHistogramAlgorithm for performance details regarding algorithmic alternatives + * + */ +template < + typename T, + int BLOCK_DIM_X, + int ITEMS_PER_THREAD, + int BINS, + BlockHistogramAlgorithm ALGORITHM = BLOCK_HISTO_SORT, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockHistogram +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + /** + * Ensure the template parameterization meets the requirements of the + * targeted device architecture. BLOCK_HISTO_ATOMIC can only be used + * on version SM120 or later. Otherwise BLOCK_HISTO_SORT is used + * regardless. + */ + static const BlockHistogramAlgorithm SAFE_ALGORITHM = + ((ALGORITHM == BLOCK_HISTO_ATOMIC) && (PTX_ARCH < 120)) ? + BLOCK_HISTO_SORT : + ALGORITHM; + + /// Internal specialization. + typedef typename If<(SAFE_ALGORITHM == BLOCK_HISTO_SORT), + BlockHistogramSort, + BlockHistogramAtomic >::Type InternalBlockHistogram; + + /// Shared memory storage layout type for BlockHistogram + typedef typename InternalBlockHistogram::TempStorage _TempStorage; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + +public: + + /// \smemstorage{BlockHistogram} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockHistogram() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockHistogram( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Histogram operations + *********************************************************************/ + //@{ + + + /** + * \brief Initialize the shared histogram counters to zero. + * + * \par Snippet + * The code snippet below illustrates a the initialization and update of a + * histogram of 512 integer samples that are partitioned across 128 threads + * where each thread owns 4 samples. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each + * typedef cub::BlockHistogram BlockHistogram; + * + * // Allocate shared memory for BlockHistogram + * __shared__ typename BlockHistogram::TempStorage temp_storage; + * + * // Allocate shared memory for block-wide histogram bin counts + * __shared__ unsigned int smem_histogram[256]; + * + * // Obtain input samples per thread + * unsigned char thread_samples[4]; + * ... + * + * // Initialize the block-wide histogram + * BlockHistogram(temp_storage).InitHistogram(smem_histogram); + * + * // Update the block-wide histogram + * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); + * + * \endcode + * + * \tparam CounterT [inferred] Histogram counter type + */ + template + __device__ __forceinline__ void InitHistogram(CounterT histogram[BINS]) + { + // Initialize histogram bin counts to zeros + int histo_offset = 0; + + #pragma unroll + for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) + { + histogram[histo_offset + linear_tid] = 0; + } + // Finish up with guarded initialization if necessary + if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) + { + histogram[histo_offset + linear_tid] = 0; + } + } + + + /** + * \brief Constructs a block-wide histogram in shared/device-accessible memory. Each thread contributes an array of input elements. + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a 256-bin histogram of 512 integer samples that + * are partitioned across 128 threads where each thread owns 4 samples. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each + * typedef cub::BlockHistogram BlockHistogram; + * + * // Allocate shared memory for BlockHistogram + * __shared__ typename BlockHistogram::TempStorage temp_storage; + * + * // Allocate shared memory for block-wide histogram bin counts + * __shared__ unsigned int smem_histogram[256]; + * + * // Obtain input samples per thread + * unsigned char thread_samples[4]; + * ... + * + * // Compute the block-wide histogram + * BlockHistogram(temp_storage).Histogram(thread_samples, smem_histogram); + * + * \endcode + * + * \tparam CounterT [inferred] Histogram counter type + */ + template < + typename CounterT > + __device__ __forceinline__ void Histogram( + T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram + CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram + { + // Initialize histogram bin counts to zeros + InitHistogram(histogram); + + CTA_SYNC(); + + // Composite the histogram + InternalBlockHistogram(temp_storage).Composite(items, histogram); + } + + + + /** + * \brief Updates an existing block-wide histogram in shared/device-accessible memory. Each thread composites an array of input elements. + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a the initialization and update of a + * histogram of 512 integer samples that are partitioned across 128 threads + * where each thread owns 4 samples. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize a 256-bin BlockHistogram type for a 1D block of 128 threads having 4 character samples each + * typedef cub::BlockHistogram BlockHistogram; + * + * // Allocate shared memory for BlockHistogram + * __shared__ typename BlockHistogram::TempStorage temp_storage; + * + * // Allocate shared memory for block-wide histogram bin counts + * __shared__ unsigned int smem_histogram[256]; + * + * // Obtain input samples per thread + * unsigned char thread_samples[4]; + * ... + * + * // Initialize the block-wide histogram + * BlockHistogram(temp_storage).InitHistogram(smem_histogram); + * + * // Update the block-wide histogram + * BlockHistogram(temp_storage).Composite(thread_samples, smem_histogram); + * + * \endcode + * + * \tparam CounterT [inferred] Histogram counter type + */ + template < + typename CounterT > + __device__ __forceinline__ void Composite( + T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram + CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram + { + InternalBlockHistogram(temp_storage).Composite(items, histogram); + } + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_load.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_load.cuh new file mode 100644 index 0000000..217f521 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_load.cuh @@ -0,0 +1,1241 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2016, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Operations for reading linear tiles of data into the CUDA thread block. + */ + +#pragma once + +#include + +#include "block_exchange.cuh" +#include "../iterator/cache_modified_input_iterator.cuh" +#include "../util_ptx.cuh" +#include "../util_macro.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIo + * @{ + */ + + +/******************************************************************//** + * \name Blocked arrangement I/O (direct) + *********************************************************************/ +//@{ + + +/** + * \brief Load a linear segment of items into a blocked arrangement across the thread block. + * + * \blocked + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectBlocked( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); + + // Load directly in thread-blocked order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + items[ITEM] = thread_itr[ITEM]; + } +} + + +/** + * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range. + * + * \blocked + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectBlocked( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load +{ + InputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if ((linear_tid * ITEMS_PER_THREAD) + ITEM < valid_items) + { + items[ITEM] = thread_itr[ITEM]; + } + } +} + + +/** + * \brief Load a linear segment of items into a blocked arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements.. + * + * \blocked + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + typename DefaultT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectBlocked( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items +{ + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + items[ITEM] = oob_default; + + LoadDirectBlocked(linear_tid, block_itr, items, valid_items); +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +/** + * Internal implementation for load vectorization + */ +template < + CacheLoadModifier MODIFIER, + typename T, + int ITEMS_PER_THREAD> +__device__ __forceinline__ void InternalLoadDirectBlockedVectorized( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + T *block_ptr, ///< [in] Input pointer for loading from + T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + // Biggest memory access word that T is a whole multiple of + typedef typename UnitWord::DeviceWord DeviceWord; + + enum + { + TOTAL_WORDS = sizeof(items) / sizeof(DeviceWord), + + VECTOR_SIZE = (TOTAL_WORDS % 4 == 0) ? + 4 : + (TOTAL_WORDS % 2 == 0) ? + 2 : + 1, + + VECTORS_PER_THREAD = TOTAL_WORDS / VECTOR_SIZE, + }; + + // Vector type + typedef typename CubVector::Type Vector; + + // Vector items + Vector vec_items[VECTORS_PER_THREAD]; + + // Aliased input ptr + Vector* vec_ptr = reinterpret_cast(block_ptr) + (linear_tid * VECTORS_PER_THREAD); + + // Load directly in thread-blocked order + #pragma unroll + for (int ITEM = 0; ITEM < VECTORS_PER_THREAD; ITEM++) + { + vec_items[ITEM] = ThreadLoad(vec_ptr + ITEM); + } + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + items[ITEM] = *(reinterpret_cast(vec_items) + ITEM); + } +} + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** + * \brief Load a linear segment of items into a blocked arrangement across the thread block. + * + * \blocked + * + * The input offset (\p block_ptr + \p block_offset) must be quad-item aligned + * + * The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: + * - \p ITEMS_PER_THREAD is odd + * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ +template < + typename T, + int ITEMS_PER_THREAD> +__device__ __forceinline__ void LoadDirectBlockedVectorized( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + T *block_ptr, ///< [in] Input pointer for loading from + T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); +} + + +//@} end member group +/******************************************************************//** + * \name Striped arrangement I/O (direct) + *********************************************************************/ +//@{ + + +/** + * \brief Load a linear segment of items into a striped arrangement across the thread block. + * + * \striped + * + * \tparam BLOCK_THREADS The thread block size in threads + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + int BLOCK_THREADS, + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + InputIteratorT thread_itr = block_itr + linear_tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } +} + + +/** + * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range + * + * \striped + * + * \tparam BLOCK_THREADS The thread block size in threads + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + int BLOCK_THREADS, + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load +{ + InputIteratorT thread_itr = block_itr + linear_tid; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (linear_tid + (ITEM * BLOCK_THREADS) < valid_items) + { + items[ITEM] = thread_itr[ITEM * BLOCK_THREADS]; + } + } +} + + +/** + * \brief Load a linear segment of items into a striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. + * + * \striped + * + * \tparam BLOCK_THREADS The thread block size in threads + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + int BLOCK_THREADS, + typename InputT, + typename DefaultT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items +{ + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + items[ITEM] = oob_default; + + LoadDirectStriped(linear_tid, block_itr, items, valid_items); +} + + + +//@} end member group +/******************************************************************//** + * \name Warp-striped arrangement I/O (direct) + *********************************************************************/ +//@{ + + +/** + * \brief Load a linear segment of items into a warp-striped arrangement across the thread block. + * + * \warpstriped + * + * \par Usage Considerations + * The number of threads in the thread block must be a multiple of the architecture's warp size. + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectWarpStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); + int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; + int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; + + InputIteratorT thread_itr = block_itr + warp_offset + tid ; + + // Load directly in warp-striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; + } +} + + +/** + * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range + * + * \warpstriped + * + * \par Usage Considerations + * The number of threads in the thread block must be a multiple of the architecture's warp size. + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectWarpStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load +{ + int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); + int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; + int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; + + InputIteratorT thread_itr = block_itr + warp_offset + tid ; + + // Load directly in warp-striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) + { + items[ITEM] = thread_itr[(ITEM * CUB_PTX_WARP_THREADS)]; + } + } +} + + +/** + * \brief Load a linear segment of items into a warp-striped arrangement across the thread block, guarded by range, with a fall-back assignment of out-of-bound elements. + * + * \warpstriped + * + * \par Usage Considerations + * The number of threads in the thread block must be a multiple of the architecture's warp size. + * + * \tparam T [inferred] The data type to load. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam InputIteratorT [inferred] The random-access iterator type for input \iterator. + */ +template < + typename InputT, + typename DefaultT, + int ITEMS_PER_THREAD, + typename InputIteratorT> +__device__ __forceinline__ void LoadDirectWarpStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items +{ + // Load directly in warp-striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + items[ITEM] = oob_default; + + LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items); +} + + + +//@} end member group + +/** @} */ // end group UtilIo + + + +//----------------------------------------------------------------------------- +// Generic BlockLoad abstraction +//----------------------------------------------------------------------------- + +/** + * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. + */ + +/** + * \brief cub::BlockLoadAlgorithm enumerates alternative algorithms for cub::BlockLoad to read a linear segment of data from memory into a blocked arrangement across a CUDA thread block. + */ +enum BlockLoadAlgorithm +{ + /** + * \par Overview + * + * A [blocked arrangement](index.html#sec5sec3) of data is read + * directly from memory. + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) decreases as the + * access stride between threads increases (i.e., the number items per thread). + */ + BLOCK_LOAD_DIRECT, + + /** + * \par Overview + * + * A [blocked arrangement](index.html#sec5sec3) of data is read + * from memory using CUDA's built-in vectorized loads as a coalescing optimization. + * For example, ld.global.v4.s32 instructions will be generated + * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high until the the + * access stride between threads (i.e., the number items per thread) exceeds the + * maximum vector load width (typically 4 items or 64B, whichever is lower). + * - The following conditions will prevent vectorization and loading will fall back to cub::BLOCK_LOAD_DIRECT: + * - \p ITEMS_PER_THREAD is odd + * - The \p InputIteratorTis not a simple pointer type + * - The block input offset is not quadword-aligned + * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) + */ + BLOCK_LOAD_VECTORIZE, + + /** + * \par Overview + * + * A [striped arrangement](index.html#sec5sec3) of data is read + * efficiently from memory and then locally transposed into a + * [blocked arrangement](index.html#sec5sec3). + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items loaded per thread. + * - The local reordering incurs slightly longer latencies and throughput than the + * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. + */ + BLOCK_LOAD_TRANSPOSE, + + + /** + * \par Overview + * + * A [warp-striped arrangement](index.html#sec5sec3) of data is + * read efficiently from memory and then locally transposed into a + * [blocked arrangement](index.html#sec5sec3). + * + * \par Usage Considerations + * - BLOCK_THREADS must be a multiple of WARP_THREADS + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items loaded per thread. + * - The local reordering incurs slightly larger latencies than the + * direct cub::BLOCK_LOAD_DIRECT and cub::BLOCK_LOAD_VECTORIZE alternatives. + * - Provisions more shared storage, but incurs smaller latencies than the + * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED alternative. + */ + BLOCK_LOAD_WARP_TRANSPOSE, + + + /** + * \par Overview + * + * Like \p BLOCK_LOAD_WARP_TRANSPOSE, a [warp-striped arrangement](index.html#sec5sec3) + * of data is read directly from memory and then is locally transposed into a + * [blocked arrangement](index.html#sec5sec3). To reduce the shared memory + * requirement, only one warp's worth of shared memory is provisioned and is + * subsequently time-sliced among warps. + * + * \par Usage Considerations + * - BLOCK_THREADS must be a multiple of WARP_THREADS + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items loaded per thread. + * - Provisions less shared memory temporary storage, but incurs larger + * latencies than the BLOCK_LOAD_WARP_TRANSPOSE alternative. + */ + BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED, +}; + + +/** + * \brief The BlockLoad class provides [collective](index.html#sec0) data movement methods for loading a linear segment of items from memory into a [blocked arrangement](index.html#sec5sec3) across a CUDA thread block. ![](block_load_logo.png) + * \ingroup BlockModule + * \ingroup UtilIo + * + * \tparam InputT The data type to read into (which must be convertible from the input iterator's value type). + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. + * \tparam ALGORITHM [optional] cub::BlockLoadAlgorithm tuning policy. default: cub::BLOCK_LOAD_DIRECT. + * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - The BlockLoad class provides a single data movement abstraction that can be specialized + * to implement different cub::BlockLoadAlgorithm strategies. This facilitates different + * performance policies for different architectures, data types, granularity sizes, etc. + * - BlockLoad can be optionally specialized by different data movement strategies: + * -# cub::BLOCK_LOAD_DIRECT. A [blocked arrangement](index.html#sec5sec3) + * of data is read directly from memory. [More...](\ref cub::BlockLoadAlgorithm) + * -# cub::BLOCK_LOAD_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) + * of data is read directly from memory using CUDA's built-in vectorized loads as a + * coalescing optimization. [More...](\ref cub::BlockLoadAlgorithm) + * -# cub::BLOCK_LOAD_TRANSPOSE. A [striped arrangement](index.html#sec5sec3) + * of data is read directly from memory and is then locally transposed into a + * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) + * -# cub::BLOCK_LOAD_WARP_TRANSPOSE. A [warp-striped arrangement](index.html#sec5sec3) + * of data is read directly from memory and is then locally transposed into a + * [blocked arrangement](index.html#sec5sec3). [More...](\ref cub::BlockLoadAlgorithm) + * -# cub::BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED,. A [warp-striped arrangement](index.html#sec5sec3) + * of data is read directly from memory and is then locally transposed into a + * [blocked arrangement](index.html#sec5sec3) one warp at a time. [More...](\ref cub::BlockLoadAlgorithm) + * - \rowmajor + * + * \par A Simple Example + * \blockcollective{BlockLoad} + * \par + * The code snippet below illustrates the loading of a linear + * segment of 512 integers into a "blocked" arrangement across 128 threads where each + * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, + * meaning memory references are efficiently coalesced using a warp-striped access + * pattern (after which items are locally reordered among threads). + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockLoad BlockLoad; + * + * // Allocate shared memory for BlockLoad + * __shared__ typename BlockLoad::TempStorage temp_storage; + * + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage).Load(d_data, thread_data); + * + * \endcode + * \par + * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... + * The set of \p thread_data across the block of threads in those threads will be + * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * + */ +template < + typename InputT, + int BLOCK_DIM_X, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm ALGORITHM = BLOCK_LOAD_DIRECT, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockLoad +{ +private: + + /****************************************************************************** + * Constants and typed definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + + /****************************************************************************** + * Algorithmic variants + ******************************************************************************/ + + /// Load helper + template + struct LoadInternal; + + + /** + * BLOCK_LOAD_DIRECT specialization of load helper + */ + template + struct LoadInternal + { + /// Shared memory storage layout type + typedef NullType TempStorage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ LoadInternal( + TempStorage &/*temp_storage*/, + int linear_tid) + : + linear_tid(linear_tid) + {} + + /// Load a linear segment of items from memory + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + LoadDirectBlocked(linear_tid, block_itr, items); + } + + /// Load a linear segment of items from memory, guarded by range + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + LoadDirectBlocked(linear_tid, block_itr, items, valid_items); + } + + /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); + } + + }; + + + /** + * BLOCK_LOAD_VECTORIZE specialization of load helper + */ + template + struct LoadInternal + { + /// Shared memory storage layout type + typedef NullType TempStorage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ LoadInternal( + TempStorage &/*temp_storage*/, + int linear_tid) + : + linear_tid(linear_tid) + {} + + /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) + template + __device__ __forceinline__ void Load( + InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); + } + + /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) + template + __device__ __forceinline__ void Load( + const InputT *block_ptr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + InternalLoadDirectBlockedVectorized(linear_tid, block_ptr, items); + } + + /// Load a linear segment of items from memory, specialized for native pointer types (attempts vectorization) + template < + CacheLoadModifier MODIFIER, + typename ValueType, + typename OffsetT> + __device__ __forceinline__ void Load( + CacheModifiedInputIterator block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + InternalLoadDirectBlockedVectorized(linear_tid, block_itr.ptr, items); + } + + /// Load a linear segment of items from memory, specialized for opaque input iterators (skips vectorization) + template + __device__ __forceinline__ void Load( + _InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + LoadDirectBlocked(linear_tid, block_itr, items); + } + + /// Load a linear segment of items from memory, guarded by range (skips vectorization) + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + LoadDirectBlocked(linear_tid, block_itr, items, valid_items); + } + + /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements (skips vectorization) + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + LoadDirectBlocked(linear_tid, block_itr, items, valid_items, oob_default); + } + + }; + + + /** + * BLOCK_LOAD_TRANSPOSE specialization of load helper + */ + template + struct LoadInternal + { + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + {}; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ LoadInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Load a linear segment of items from memory + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ + { + LoadDirectStriped(linear_tid, block_itr, items); + BlockExchange(temp_storage).StripedToBlocked(items, items); + } + + /// Load a linear segment of items from memory, guarded by range + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + LoadDirectStriped(linear_tid, block_itr, items, valid_items); + BlockExchange(temp_storage).StripedToBlocked(items, items); + } + + /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + LoadDirectStriped(linear_tid, block_itr, items, valid_items, oob_default); + BlockExchange(temp_storage).StripedToBlocked(items, items); + } + + }; + + + /** + * BLOCK_LOAD_WARP_TRANSPOSE specialization of load helper + */ + template + struct LoadInternal + { + enum + { + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) + }; + + // Assert BLOCK_THREADS must be a multiple of WARP_THREADS + CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); + + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + {}; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ LoadInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Load a linear segment of items from memory + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ + { + LoadDirectWarpStriped(linear_tid, block_itr, items); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + + /// Load a linear segment of items from memory, guarded by range + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + + + /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items, oob_default); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + }; + + + /** + * BLOCK_LOAD_WARP_TRANSPOSE_TIMESLICED specialization of load helper + */ + template + struct LoadInternal + { + enum + { + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) + }; + + // Assert BLOCK_THREADS must be a multiple of WARP_THREADS + CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); + + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + {}; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ LoadInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Load a linear segment of items from memory + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load{ + { + LoadDirectWarpStriped(linear_tid, block_itr, items); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + + /// Load a linear segment of items from memory, guarded by range + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + + + /// Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + LoadDirectWarpStriped(linear_tid, block_itr, items, valid_items, oob_default); + BlockExchange(temp_storage).WarpStripedToBlocked(items, items); + } + }; + + + /****************************************************************************** + * Type definitions + ******************************************************************************/ + + /// Internal load implementation to use + typedef LoadInternal InternalLoad; + + + /// Shared memory storage layout type + typedef typename InternalLoad::TempStorage _TempStorage; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + +public: + + /// \smemstorage{BlockLoad} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockLoad() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockLoad( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + + + //@} end member group + /******************************************************************//** + * \name Data movement + *********************************************************************/ + //@{ + + + /** + * \brief Load a linear segment of items from memory. + * + * \par + * - \blocked + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the loading of a linear + * segment of 512 integers into a "blocked" arrangement across 128 threads where each + * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, + * meaning memory references are efficiently coalesced using a warp-striped access + * pattern (after which items are locally reordered among threads). + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockLoad BlockLoad; + * + * // Allocate shared memory for BlockLoad + * __shared__ typename BlockLoad::TempStorage temp_storage; + * + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage).Load(d_data, thread_data); + * + * \endcode + * \par + * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, .... + * The set of \p thread_data across the block of threads in those threads will be + * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * + */ + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load + { + InternalLoad(temp_storage, linear_tid).Load(block_itr, items); + } + + + /** + * \brief Load a linear segment of items from memory, guarded by range. + * + * \par + * - \blocked + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the guarded loading of a linear + * segment of 512 integers into a "blocked" arrangement across 128 threads where each + * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, + * meaning memory references are efficiently coalesced using a warp-striped access + * pattern (after which items are locally reordered among threads). + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, int valid_items, ...) + * { + * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockLoad BlockLoad; + * + * // Allocate shared memory for BlockLoad + * __shared__ typename BlockLoad::TempStorage temp_storage; + * + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items); + * + * \endcode + * \par + * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6... and \p valid_items is \p 5. + * The set of \p thread_data across the block of threads in those threads will be + * { [0,1,2,3], [4,?,?,?], ..., [?,?,?,?] }, with only the first two threads + * being unmasked to load portions of valid data (and other items remaining unassigned). + * + */ + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items) ///< [in] Number of valid items to load + { + InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items); + } + + + /** + * \brief Load a linear segment of items from memory, guarded by range, with a fall-back assignment of out-of-bound elements + * + * \par + * - \blocked + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the guarded loading of a linear + * segment of 512 integers into a "blocked" arrangement across 128 threads where each + * thread owns 4 consecutive items. The load is specialized for \p BLOCK_LOAD_WARP_TRANSPOSE, + * meaning memory references are efficiently coalesced using a warp-striped access + * pattern (after which items are locally reordered among threads). + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, int valid_items, ...) + * { + * // Specialize BlockLoad for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockLoad BlockLoad; + * + * // Allocate shared memory for BlockLoad + * __shared__ typename BlockLoad::TempStorage temp_storage; + * + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage).Load(d_data, thread_data, valid_items, -1); + * + * \endcode + * \par + * Suppose the input \p d_data is 0, 1, 2, 3, 4, 5, 6..., + * \p valid_items is \p 5, and the out-of-bounds default is \p -1. + * The set of \p thread_data across the block of threads in those threads will be + * { [0,1,2,3], [4,-1,-1,-1], ..., [-1,-1,-1,-1] }, with only the first two threads + * being unmasked to load portions of valid data (and other items are assigned \p -1) + * + */ + template + __device__ __forceinline__ void Load( + InputIteratorT block_itr, ///< [in] The thread block's base input iterator for loading from + InputT (&items)[ITEMS_PER_THREAD], ///< [out] Data to load + int valid_items, ///< [in] Number of valid items to load + DefaultT oob_default) ///< [in] Default value to assign out-of-bound items + { + InternalLoad(temp_storage, linear_tid).Load(block_itr, items, valid_items, oob_default); + } + + + //@} end member group + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_rank.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_rank.cuh new file mode 100644 index 0000000..c26451c --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_rank.cuh @@ -0,0 +1,696 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block + */ + +#pragma once + +#include + +#include "../thread/thread_reduce.cuh" +#include "../thread/thread_scan.cuh" +#include "../block/block_scan.cuh" +#include "../util_ptx.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief BlockRadixRank provides operations for ranking unsigned integer types within a CUDA thread block. + * \ingroup BlockModule + * + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam RADIX_BITS The number of radix bits per digit place + * \tparam IS_DESCENDING Whether or not the sorted-order is high-to-low + * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). See BlockScanAlgorithm::BLOCK_SCAN_RAKING_MEMOIZE for more details. + * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) + * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * Blah... + * - Keys must be in a form suitable for radix ranking (i.e., unsigned bits). + * - \blocked + * + * \par Performance Considerations + * - \granularity + * + * \par Examples + * \par + * - Example 1: Simple radix rank of 32-bit integer keys + * \code + * #include + * + * template + * __global__ void ExampleKernel(...) + * { + * + * \endcode + */ +template < + int BLOCK_DIM_X, + int RADIX_BITS, + bool IS_DESCENDING, + bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, + BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, + cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockRadixRank +{ +private: + + /****************************************************************************** + * Type definitions and constants + ******************************************************************************/ + + // Integer type for digit counters (to be packed into words of type PackedCounters) + typedef unsigned short DigitCounter; + + // Integer type for packing DigitCounters into columns of shared memory banks + typedef typename If<(SMEM_CONFIG == cudaSharedMemBankSizeEightByte), + unsigned long long, + unsigned int>::Type PackedCounter; + + enum + { + // The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + RADIX_DIGITS = 1 << RADIX_BITS, + + LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), + WARP_THREADS = 1 << LOG_WARP_THREADS, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + BYTES_PER_COUNTER = sizeof(DigitCounter), + LOG_BYTES_PER_COUNTER = Log2::VALUE, + + PACKING_RATIO = sizeof(PackedCounter) / sizeof(DigitCounter), + LOG_PACKING_RATIO = Log2::VALUE, + + LOG_COUNTER_LANES = CUB_MAX((RADIX_BITS - LOG_PACKING_RATIO), 0), // Always at least one lane + COUNTER_LANES = 1 << LOG_COUNTER_LANES, + + // The number of packed counters per thread (plus one for padding) + PADDED_COUNTER_LANES = COUNTER_LANES + 1, + RAKING_SEGMENT = PADDED_COUNTER_LANES, + }; + +public: + + enum + { + /// Number of bin-starting offsets tracked per thread + BINS_TRACKED_PER_THREAD = CUB_MAX(1, (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS), + }; + +private: + + + /// BlockScan type + typedef BlockScan< + PackedCounter, + BLOCK_DIM_X, + INNER_SCAN_ALGORITHM, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + BlockScan; + + + /// Shared memory storage layout type for BlockRadixRank + struct __align__(16) _TempStorage + { + union Aliasable + { + DigitCounter digit_counters[PADDED_COUNTER_LANES][BLOCK_THREADS][PACKING_RATIO]; + PackedCounter raking_grid[BLOCK_THREADS][RAKING_SEGMENT]; + + } aliasable; + + // Storage for scanning local ranks + typename BlockScan::TempStorage block_scan; + }; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + /// Copy of raking segment, promoted to registers + PackedCounter cached_segment[RAKING_SEGMENT]; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /** + * Internal storage allocator + */ + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /** + * Performs upsweep raking reduction, returning the aggregate + */ + __device__ __forceinline__ PackedCounter Upsweep() + { + PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; + PackedCounter *raking_ptr; + + if (MEMOIZE_OUTER_SCAN) + { + // Copy data into registers + #pragma unroll + for (int i = 0; i < RAKING_SEGMENT; i++) + { + cached_segment[i] = smem_raking_ptr[i]; + } + raking_ptr = cached_segment; + } + else + { + raking_ptr = smem_raking_ptr; + } + + return internal::ThreadReduce(raking_ptr, Sum()); + } + + + /// Performs exclusive downsweep raking scan + __device__ __forceinline__ void ExclusiveDownsweep( + PackedCounter raking_partial) + { + PackedCounter *smem_raking_ptr = temp_storage.aliasable.raking_grid[linear_tid]; + + PackedCounter *raking_ptr = (MEMOIZE_OUTER_SCAN) ? + cached_segment : + smem_raking_ptr; + + // Exclusive raking downsweep scan + internal::ThreadScanExclusive(raking_ptr, raking_ptr, Sum(), raking_partial); + + if (MEMOIZE_OUTER_SCAN) + { + // Copy data back to smem + #pragma unroll + for (int i = 0; i < RAKING_SEGMENT; i++) + { + smem_raking_ptr[i] = cached_segment[i]; + } + } + } + + + /** + * Reset shared memory digit counters + */ + __device__ __forceinline__ void ResetCounters() + { + // Reset shared memory digit counters + #pragma unroll + for (int LANE = 0; LANE < PADDED_COUNTER_LANES; LANE++) + { + *((PackedCounter*) temp_storage.aliasable.digit_counters[LANE][linear_tid]) = 0; + } + } + + + /** + * Block-scan prefix callback + */ + struct PrefixCallBack + { + __device__ __forceinline__ PackedCounter operator()(PackedCounter block_aggregate) + { + PackedCounter block_prefix = 0; + + // Propagate totals in packed fields + #pragma unroll + for (int PACKED = 1; PACKED < PACKING_RATIO; PACKED++) + { + block_prefix += block_aggregate << (sizeof(DigitCounter) * 8 * PACKED); + } + + return block_prefix; + } + }; + + + /** + * Scan shared memory digit counters. + */ + __device__ __forceinline__ void ScanCounters() + { + // Upsweep scan + PackedCounter raking_partial = Upsweep(); + + // Compute exclusive sum + PackedCounter exclusive_partial; + PrefixCallBack prefix_call_back; + BlockScan(temp_storage.block_scan).ExclusiveSum(raking_partial, exclusive_partial, prefix_call_back); + + // Downsweep scan with exclusive partial + ExclusiveDownsweep(exclusive_partial); + } + +public: + + /// \smemstorage{BlockScan} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockRadixRank() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockRadixRank( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Raking + *********************************************************************/ + //@{ + + /** + * \brief Rank keys. + */ + template < + typename UnsignedBits, + int KEYS_PER_THREAD> + __device__ __forceinline__ void RankKeys( + UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile + int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile + int current_bit, ///< [in] The least-significant bit position of the current digit to extract + int num_bits) ///< [in] The number of bits in the current digit + { + DigitCounter thread_prefixes[KEYS_PER_THREAD]; // For each key, the count of previous keys in this tile having the same digit + DigitCounter* digit_counters[KEYS_PER_THREAD]; // For each key, the byte-offset of its corresponding digit counter in smem + + // Reset shared memory digit counters + ResetCounters(); + + #pragma unroll + for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) + { + // Get digit + unsigned int digit = BFE(keys[ITEM], current_bit, num_bits); + + // Get sub-counter + unsigned int sub_counter = digit >> LOG_COUNTER_LANES; + + // Get counter lane + unsigned int counter_lane = digit & (COUNTER_LANES - 1); + + if (IS_DESCENDING) + { + sub_counter = PACKING_RATIO - 1 - sub_counter; + counter_lane = COUNTER_LANES - 1 - counter_lane; + } + + // Pointer to smem digit counter + digit_counters[ITEM] = &temp_storage.aliasable.digit_counters[counter_lane][linear_tid][sub_counter]; + + // Load thread-exclusive prefix + thread_prefixes[ITEM] = *digit_counters[ITEM]; + + // Store inclusive prefix + *digit_counters[ITEM] = thread_prefixes[ITEM] + 1; + } + + CTA_SYNC(); + + // Scan shared memory counters + ScanCounters(); + + CTA_SYNC(); + + // Extract the local ranks of each key + for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) + { + // Add in thread block exclusive prefix + ranks[ITEM] = thread_prefixes[ITEM] + *digit_counters[ITEM]; + } + } + + + /** + * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. + */ + template < + typename UnsignedBits, + int KEYS_PER_THREAD> + __device__ __forceinline__ void RankKeys( + UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile + int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) + int current_bit, ///< [in] The least-significant bit position of the current digit to extract + int num_bits, ///< [in] The number of bits in the current digit + int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] + { + // Rank keys + RankKeys(keys, ranks, current_bit, num_bits); + + // Get the inclusive and exclusive digit totals corresponding to the calling thread. + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + if (IS_DESCENDING) + bin_idx = RADIX_DIGITS - bin_idx - 1; + + // Obtain ex/inclusive digit counts. (Unfortunately these all reside in the + // first counter column, resulting in unavoidable bank conflicts.) + unsigned int counter_lane = (bin_idx & (COUNTER_LANES - 1)); + unsigned int sub_counter = bin_idx >> (LOG_COUNTER_LANES); + + exclusive_digit_prefix[track] = temp_storage.aliasable.digit_counters[counter_lane][0][sub_counter]; + } + } + } +}; + + + + + +/** + * Radix-rank using match.any + */ +template < + int BLOCK_DIM_X, + int RADIX_BITS, + bool IS_DESCENDING, + BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockRadixRankMatch +{ +private: + + /****************************************************************************** + * Type definitions and constants + ******************************************************************************/ + + typedef int32_t RankT; + typedef int32_t DigitCounterT; + + enum + { + // The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + RADIX_DIGITS = 1 << RADIX_BITS, + + LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), + WARP_THREADS = 1 << LOG_WARP_THREADS, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + PADDED_WARPS = ((WARPS & 0x1) == 0) ? + WARPS + 1 : + WARPS, + + COUNTERS = PADDED_WARPS * RADIX_DIGITS, + RAKING_SEGMENT = (COUNTERS + BLOCK_THREADS - 1) / BLOCK_THREADS, + PADDED_RAKING_SEGMENT = ((RAKING_SEGMENT & 0x1) == 0) ? + RAKING_SEGMENT + 1 : + RAKING_SEGMENT, + }; + +public: + + enum + { + /// Number of bin-starting offsets tracked per thread + BINS_TRACKED_PER_THREAD = CUB_MAX(1, (RADIX_DIGITS + BLOCK_THREADS - 1) / BLOCK_THREADS), + }; + +private: + + /// BlockScan type + typedef BlockScan< + DigitCounterT, + BLOCK_THREADS, + INNER_SCAN_ALGORITHM, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + BlockScanT; + + + /// Shared memory storage layout type for BlockRadixRank + struct __align__(16) _TempStorage + { + typename BlockScanT::TempStorage block_scan; + + union __align__(16) Aliasable + { + volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS]; + DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT]; + + } aliasable; + }; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + + +public: + + /// \smemstorage{BlockScan} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockRadixRankMatch( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Raking + *********************************************************************/ + //@{ + + /** + * \brief Rank keys. + */ + template < + typename UnsignedBits, + int KEYS_PER_THREAD> + __device__ __forceinline__ void RankKeys( + UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile + int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile + int current_bit, ///< [in] The least-significant bit position of the current digit to extract + int num_bits) ///< [in] The number of bits in the current digit + { + // Initialize shared digit counters + + #pragma unroll + for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) + temp_storage.aliasable.raking_grid[linear_tid][ITEM] = 0; + + CTA_SYNC(); + + // Each warp will strip-mine its section of input, one strip at a time + + volatile DigitCounterT *digit_counters[KEYS_PER_THREAD]; + uint32_t warp_id = linear_tid >> LOG_WARP_THREADS; + uint32_t lane_mask_lt = LaneMaskLt(); + + #pragma unroll + for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) + { + // My digit + uint32_t digit = BFE(keys[ITEM], current_bit, num_bits); + + if (IS_DESCENDING) + digit = RADIX_DIGITS - digit - 1; + + // Mask of peers who have same digit as me + uint32_t peer_mask = MatchAny(digit); + + // Pointer to smem digit counter for this key + digit_counters[ITEM] = &temp_storage.aliasable.warp_digit_counters[digit][warp_id]; + + // Number of occurrences in previous strips + DigitCounterT warp_digit_prefix = *digit_counters[ITEM]; + + // Warp-sync + WARP_SYNC(0xFFFFFFFF); + + // Number of peers having same digit as me + int32_t digit_count = __popc(peer_mask); + + // Number of lower-ranked peers having same digit seen so far + int32_t peer_digit_prefix = __popc(peer_mask & lane_mask_lt); + + if (peer_digit_prefix == 0) + { + // First thread for each digit updates the shared warp counter + *digit_counters[ITEM] = DigitCounterT(warp_digit_prefix + digit_count); + } + + // Warp-sync + WARP_SYNC(0xFFFFFFFF); + + // Number of prior keys having same digit + ranks[ITEM] = warp_digit_prefix + DigitCounterT(peer_digit_prefix); + } + + CTA_SYNC(); + + // Scan warp counters + + DigitCounterT scan_counters[PADDED_RAKING_SEGMENT]; + + #pragma unroll + for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) + scan_counters[ITEM] = temp_storage.aliasable.raking_grid[linear_tid][ITEM]; + + BlockScanT(temp_storage.block_scan).ExclusiveSum(scan_counters, scan_counters); + + #pragma unroll + for (int ITEM = 0; ITEM < PADDED_RAKING_SEGMENT; ++ITEM) + temp_storage.aliasable.raking_grid[linear_tid][ITEM] = scan_counters[ITEM]; + + CTA_SYNC(); + + // Seed ranks with counter values from previous warps + #pragma unroll + for (int ITEM = 0; ITEM < KEYS_PER_THREAD; ++ITEM) + ranks[ITEM] += *digit_counters[ITEM]; + } + + + /** + * \brief Rank keys. For the lower \p RADIX_DIGITS threads, digit counts for each digit are provided for the corresponding thread. + */ + template < + typename UnsignedBits, + int KEYS_PER_THREAD> + __device__ __forceinline__ void RankKeys( + UnsignedBits (&keys)[KEYS_PER_THREAD], ///< [in] Keys for this tile + int (&ranks)[KEYS_PER_THREAD], ///< [out] For each key, the local rank within the tile (out parameter) + int current_bit, ///< [in] The least-significant bit position of the current digit to extract + int num_bits, ///< [in] The number of bits in the current digit + int (&exclusive_digit_prefix)[BINS_TRACKED_PER_THREAD]) ///< [out] The exclusive prefix sum for the digits [(threadIdx.x * BINS_TRACKED_PER_THREAD) ... (threadIdx.x * BINS_TRACKED_PER_THREAD) + BINS_TRACKED_PER_THREAD - 1] + { + RankKeys(keys, ranks, current_bit, num_bits); + + // Get exclusive count for each digit + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (linear_tid * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + { + if (IS_DESCENDING) + bin_idx = RADIX_DIGITS - bin_idx - 1; + + exclusive_digit_prefix[track] = temp_storage.aliasable.warp_digit_counters[bin_idx][0]; + } + } + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_sort.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_sort.cuh new file mode 100644 index 0000000..ac0c9f8 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_radix_sort.cuh @@ -0,0 +1,863 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockRadixSort class provides [collective](index.html#sec0) methods for radix sorting of items partitioned across a CUDA thread block. + */ + + +#pragma once + +#include "block_exchange.cuh" +#include "block_radix_rank.cuh" +#include "../util_ptx.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief The BlockRadixSort class provides [collective](index.html#sec0) methods for sorting items partitioned across a CUDA thread block using a radix sorting method. ![](sorting_logo.png) + * \ingroup BlockModule + * + * \tparam KeyT KeyT type + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ITEMS_PER_THREAD The number of items per thread + * \tparam ValueT [optional] ValueT type (default: cub::NullType, which indicates a keys-only sort) + * \tparam RADIX_BITS [optional] The number of radix bits per digit place (default: 4 bits) + * \tparam MEMOIZE_OUTER_SCAN [optional] Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure (default: true for architectures SM35 and newer, false otherwise). + * \tparam INNER_SCAN_ALGORITHM [optional] The cub::BlockScanAlgorithm algorithm to use (default: cub::BLOCK_SCAN_WARP_SCANS) + * \tparam SMEM_CONFIG [optional] Shared memory bank mode (default: \p cudaSharedMemBankSizeFourByte) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges + * items into ascending order. It relies upon a positional representation for + * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, + * characters, etc.) specified from least-significant to most-significant. For a + * given input sequence of keys and a set of rules specifying a total ordering + * of the symbolic alphabet, the radix sorting method produces a lexicographic + * ordering of those keys. + * - BlockRadixSort can sort all of the built-in C++ numeric primitive types + * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half + * half-precision floating-point type. Within each key, the implementation treats fixed-length + * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting + * method can only be applied to unsigned integral types, BlockRadixSort + * is able to sort signed and floating-point types via simple bit-wise transformations + * that ensure lexicographic key ordering. + * - \rowmajor + * + * \par Performance Considerations + * - \granularity + * + * \par A Simple Example + * \blockcollective{BlockRadixSort} + * \par + * The code snippet below illustrates a sort of 512 integer keys that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * ... + * + * // Collectively sort the keys + * BlockRadixSort(temp_storage).Sort(thread_keys); + * + * ... + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * + */ +template < + typename KeyT, + int BLOCK_DIM_X, + int ITEMS_PER_THREAD, + typename ValueT = NullType, + int RADIX_BITS = 4, + bool MEMOIZE_OUTER_SCAN = (CUB_PTX_ARCH >= 350) ? true : false, + BlockScanAlgorithm INNER_SCAN_ALGORITHM = BLOCK_SCAN_WARP_SCANS, + cudaSharedMemConfig SMEM_CONFIG = cudaSharedMemBankSizeFourByte, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockRadixSort +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + enum + { + // The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + // Whether or not there are values to be trucked along with keys + KEYS_ONLY = Equals::VALUE, + }; + + // KeyT traits and unsigned bits type + typedef Traits KeyTraits; + typedef typename KeyTraits::UnsignedBits UnsignedBits; + + /// Ascending BlockRadixRank utility type + typedef BlockRadixRank< + BLOCK_DIM_X, + RADIX_BITS, + false, + MEMOIZE_OUTER_SCAN, + INNER_SCAN_ALGORITHM, + SMEM_CONFIG, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + AscendingBlockRadixRank; + + /// Descending BlockRadixRank utility type + typedef BlockRadixRank< + BLOCK_DIM_X, + RADIX_BITS, + true, + MEMOIZE_OUTER_SCAN, + INNER_SCAN_ALGORITHM, + SMEM_CONFIG, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + DescendingBlockRadixRank; + + /// BlockExchange utility type for keys + typedef BlockExchange BlockExchangeKeys; + + /// BlockExchange utility type for values + typedef BlockExchange BlockExchangeValues; + + /// Shared memory storage layout type + union _TempStorage + { + typename AscendingBlockRadixRank::TempStorage asending_ranking_storage; + typename DescendingBlockRadixRank::TempStorage descending_ranking_storage; + typename BlockExchangeKeys::TempStorage exchange_keys; + typename BlockExchangeValues::TempStorage exchange_values; + }; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + /// Rank keys (specialized for ascending sort) + __device__ __forceinline__ void RankKeys( + UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + int begin_bit, + int pass_bits, + Int2Type /*is_descending*/) + { + AscendingBlockRadixRank(temp_storage.asending_ranking_storage).RankKeys( + unsigned_keys, + ranks, + begin_bit, + pass_bits); + } + + /// Rank keys (specialized for descending sort) + __device__ __forceinline__ void RankKeys( + UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + int begin_bit, + int pass_bits, + Int2Type /*is_descending*/) + { + DescendingBlockRadixRank(temp_storage.descending_ranking_storage).RankKeys( + unsigned_keys, + ranks, + begin_bit, + pass_bits); + } + + /// ExchangeValues (specialized for key-value sort, to-blocked arrangement) + __device__ __forceinline__ void ExchangeValues( + ValueT (&values)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + Int2Type /*is_keys_only*/, + Int2Type /*is_blocked*/) + { + CTA_SYNC(); + + // Exchange values through shared memory in blocked arrangement + BlockExchangeValues(temp_storage.exchange_values).ScatterToBlocked(values, ranks); + } + + /// ExchangeValues (specialized for key-value sort, to-striped arrangement) + __device__ __forceinline__ void ExchangeValues( + ValueT (&values)[ITEMS_PER_THREAD], + int (&ranks)[ITEMS_PER_THREAD], + Int2Type /*is_keys_only*/, + Int2Type /*is_blocked*/) + { + CTA_SYNC(); + + // Exchange values through shared memory in blocked arrangement + BlockExchangeValues(temp_storage.exchange_values).ScatterToStriped(values, ranks); + } + + /// ExchangeValues (specialized for keys-only sort) + template + __device__ __forceinline__ void ExchangeValues( + ValueT (&/*values*/)[ITEMS_PER_THREAD], + int (&/*ranks*/)[ITEMS_PER_THREAD], + Int2Type /*is_keys_only*/, + Int2Type /*is_blocked*/) + {} + + /// Sort blocked arrangement + template + __device__ __forceinline__ void SortBlocked( + KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort + int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison + int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison + Int2Type is_descending, ///< Tag whether is a descending-order sort + Int2Type is_keys_only) ///< Tag whether is keys-only sort + { + UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = + reinterpret_cast(keys); + + // Twiddle bits if necessary + #pragma unroll + for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) + { + unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); + } + + // Radix sorting passes + while (true) + { + int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); + + // Rank the blocked keys + int ranks[ITEMS_PER_THREAD]; + RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); + begin_bit += RADIX_BITS; + + CTA_SYNC(); + + // Exchange keys through shared memory in blocked arrangement + BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); + + // Exchange values through shared memory in blocked arrangement + ExchangeValues(values, ranks, is_keys_only, Int2Type()); + + // Quit if done + if (begin_bit >= end_bit) break; + + CTA_SYNC(); + } + + // Untwiddle bits if necessary + #pragma unroll + for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) + { + unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); + } + } + +public: + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + /// Sort blocked -> striped arrangement + template + __device__ __forceinline__ void SortBlockedToStriped( + KeyT (&keys)[ITEMS_PER_THREAD], ///< Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< Values to sort + int begin_bit, ///< The beginning (least-significant) bit index needed for key comparison + int end_bit, ///< The past-the-end (most-significant) bit index needed for key comparison + Int2Type is_descending, ///< Tag whether is a descending-order sort + Int2Type is_keys_only) ///< Tag whether is keys-only sort + { + UnsignedBits (&unsigned_keys)[ITEMS_PER_THREAD] = + reinterpret_cast(keys); + + // Twiddle bits if necessary + #pragma unroll + for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) + { + unsigned_keys[KEY] = KeyTraits::TwiddleIn(unsigned_keys[KEY]); + } + + // Radix sorting passes + while (true) + { + int pass_bits = CUB_MIN(RADIX_BITS, end_bit - begin_bit); + + // Rank the blocked keys + int ranks[ITEMS_PER_THREAD]; + RankKeys(unsigned_keys, ranks, begin_bit, pass_bits, is_descending); + begin_bit += RADIX_BITS; + + CTA_SYNC(); + + // Check if this is the last pass + if (begin_bit >= end_bit) + { + // Last pass exchanges keys through shared memory in striped arrangement + BlockExchangeKeys(temp_storage.exchange_keys).ScatterToStriped(keys, ranks); + + // Last pass exchanges through shared memory in striped arrangement + ExchangeValues(values, ranks, is_keys_only, Int2Type()); + + // Quit + break; + } + + // Exchange keys through shared memory in blocked arrangement + BlockExchangeKeys(temp_storage.exchange_keys).ScatterToBlocked(keys, ranks); + + // Exchange values through shared memory in blocked arrangement + ExchangeValues(values, ranks, is_keys_only, Int2Type()); + + CTA_SYNC(); + } + + // Untwiddle bits if necessary + #pragma unroll + for (int KEY = 0; KEY < ITEMS_PER_THREAD; KEY++) + { + unsigned_keys[KEY] = KeyTraits::TwiddleOut(unsigned_keys[KEY]); + } + } + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + /// \smemstorage{BlockRadixSort} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockRadixSort() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockRadixSort( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Sorting (blocked arrangements) + *********************************************************************/ + //@{ + + /** + * \brief Performs an ascending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive keys. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * ... + * + * // Collectively sort the keys + * BlockRadixSort(temp_storage).Sort(thread_keys); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. + * The corresponding output \p thread_keys in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + */ + __device__ __forceinline__ void Sort( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + NullType values[ITEMS_PER_THREAD]; + + SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + /** + * \brief Performs an ascending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. + * + * \par + * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" + * more than one tile of values, simply perform a key-value sort of the keys paired + * with a temporary value array that enumerates the key indices. The reordered indices + * can then be used as a gather-vector for exchanging other associated tile data through + * shared memory. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys and values that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive pairs. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * int thread_values[4]; + * ... + * + * // Collectively sort the keys and values among block threads + * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [0,1,2,3], [4,5,6,7], [8,9,10,11], ..., [508,509,510,511] }. + * + */ + __device__ __forceinline__ void Sort( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + /** + * \brief Performs a descending block-wide radix sort over a [blocked arrangement](index.html#sec5sec3) of keys. + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive keys. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * ... + * + * // Collectively sort the keys + * BlockRadixSort(temp_storage).Sort(thread_keys); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. + * The corresponding output \p thread_keys in those threads will be + * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. + */ + __device__ __forceinline__ void SortDescending( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + NullType values[ITEMS_PER_THREAD]; + + SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + /** + * \brief Performs a descending block-wide radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values. + * + * \par + * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" + * more than one tile of values, simply perform a key-value sort of the keys paired + * with a temporary value array that enumerates the key indices. The reordered indices + * can then be used as a gather-vector for exchanging other associated tile data through + * shared memory. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys and values that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive pairs. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * int thread_values[4]; + * ... + * + * // Collectively sort the keys and values among block threads + * BlockRadixSort(temp_storage).Sort(thread_keys, thread_values); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [511,510,509,508], [11,10,9,8], [7,6,5,4], ..., [3,2,1,0] }. + * + */ + __device__ __forceinline__ void SortDescending( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + SortBlocked(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + //@} end member group + /******************************************************************//** + * \name Sorting (blocked arrangement -> striped arrangement) + *********************************************************************/ + //@{ + + + /** + * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys that + * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive keys. The final partitioning is striped. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * ... + * + * // Collectively sort the keys + * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. + * + */ + __device__ __forceinline__ void SortBlockedToStriped( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + NullType values[ITEMS_PER_THREAD]; + + SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + /** + * \brief Performs an ascending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). + * + * \par + * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" + * more than one tile of values, simply perform a key-value sort of the keys paired + * with a temporary value array that enumerates the key indices. The reordered indices + * can then be used as a gather-vector for exchanging other associated tile data through + * shared memory. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys and values that + * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive pairs. The final partitioning is striped. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * int thread_values[4]; + * ... + * + * // Collectively sort the keys and values among block threads + * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [0,128,256,384], [1,129,257,385], [2,130,258,386], ..., [127,255,383,511] }. + * + */ + __device__ __forceinline__ void SortBlockedToStriped( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + /** + * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys, leaving them in a [striped arrangement](index.html#sec5sec3). + * + * \par + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys that + * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive keys. The final partitioning is striped. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * ... + * + * // Collectively sort the keys + * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. + * + */ + __device__ __forceinline__ void SortDescendingBlockedToStriped( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + NullType values[ITEMS_PER_THREAD]; + + SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + /** + * \brief Performs a descending radix sort across a [blocked arrangement](index.html#sec5sec3) of keys and values, leaving them in a [striped arrangement](index.html#sec5sec3). + * + * \par + * - BlockRadixSort can only accommodate one associated tile of values. To "truck along" + * more than one tile of values, simply perform a key-value sort of the keys paired + * with a temporary value array that enumerates the key indices. The reordered indices + * can then be used as a gather-vector for exchanging other associated tile data through + * shared memory. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sort of 512 integer keys and values that + * are initially partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive pairs. The final partitioning is striped. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockRadixSort for a 1D block of 128 threads owning 4 integer keys and values each + * typedef cub::BlockRadixSort BlockRadixSort; + * + * // Allocate shared memory for BlockRadixSort + * __shared__ typename BlockRadixSort::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_keys[4]; + * int thread_values[4]; + * ... + * + * // Collectively sort the keys and values among block threads + * BlockRadixSort(temp_storage).SortBlockedToStriped(thread_keys, thread_values); + * + * \endcode + * \par + * Suppose the set of input \p thread_keys across the block of threads is + * { [0,511,1,510], [2,509,3,508], [4,507,5,506], ..., [254,257,255,256] }. The + * corresponding output \p thread_keys in those threads will be + * { [511,383,255,127], [386,258,130,2], [385,257,128,1], ..., [384,256,128,0] }. + * + */ + __device__ __forceinline__ void SortDescendingBlockedToStriped( + KeyT (&keys)[ITEMS_PER_THREAD], ///< [in-out] Keys to sort + ValueT (&values)[ITEMS_PER_THREAD], ///< [in-out] Values to sort + int begin_bit = 0, ///< [in] [optional] The beginning (least-significant) bit index needed for key comparison + int end_bit = sizeof(KeyT) * 8) ///< [in] [optional] The past-the-end (most-significant) bit index needed for key comparison + { + SortBlockedToStriped(keys, values, begin_bit, end_bit, Int2Type(), Int2Type()); + } + + + //@} end member group + +}; + +/** + * \example example_block_radix_sort.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_raking_layout.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_raking_layout.cuh new file mode 100644 index 0000000..3500616 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_raking_layout.cuh @@ -0,0 +1,152 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockRakingLayout provides a conflict-free shared memory layout abstraction for warp-raking across thread block data. + */ + + +#pragma once + +#include "../util_macro.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief BlockRakingLayout provides a conflict-free shared memory layout abstraction for 1D raking across thread block data. ![](raking.png) + * \ingroup BlockModule + * + * \par Overview + * This type facilitates a shared memory usage pattern where a block of CUDA + * threads places elements into shared memory and then reduces the active + * parallelism to one "raking" warp of threads for serially aggregating consecutive + * sequences of shared items. Padding is inserted to eliminate bank conflicts + * (for most data types). + * + * \tparam T The data type to be exchanged. + * \tparam BLOCK_THREADS The thread block size in threads. + * \tparam PTX_ARCH [optional] \ptxversion + */ +template < + typename T, + int BLOCK_THREADS, + int PTX_ARCH = CUB_PTX_ARCH> +struct BlockRakingLayout +{ + //--------------------------------------------------------------------- + // Constants and type definitions + //--------------------------------------------------------------------- + + enum + { + /// The total number of elements that need to be cooperatively reduced + SHARED_ELEMENTS = BLOCK_THREADS, + + /// Maximum number of warp-synchronous raking threads + MAX_RAKING_THREADS = CUB_MIN(BLOCK_THREADS, CUB_WARP_THREADS(PTX_ARCH)), + + /// Number of raking elements per warp-synchronous raking thread (rounded up) + SEGMENT_LENGTH = (SHARED_ELEMENTS + MAX_RAKING_THREADS - 1) / MAX_RAKING_THREADS, + + /// Never use a raking thread that will have no valid data (e.g., when BLOCK_THREADS is 62 and SEGMENT_LENGTH is 2, we should only use 31 raking threads) + RAKING_THREADS = (SHARED_ELEMENTS + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH, + + /// Whether we will have bank conflicts (technically we should find out if the GCD is > 1) + HAS_CONFLICTS = (CUB_SMEM_BANKS(PTX_ARCH) % SEGMENT_LENGTH == 0), + + /// Degree of bank conflicts (e.g., 4-way) + CONFLICT_DEGREE = (HAS_CONFLICTS) ? + (MAX_RAKING_THREADS * SEGMENT_LENGTH) / CUB_SMEM_BANKS(PTX_ARCH) : + 1, + + /// Pad each segment length with one element if segment length is not relatively prime to warp size and can't be optimized as a vector load + USE_SEGMENT_PADDING = ((SEGMENT_LENGTH & 1) == 0) && (SEGMENT_LENGTH > 2), + + /// Total number of elements in the raking grid + GRID_ELEMENTS = RAKING_THREADS * (SEGMENT_LENGTH + USE_SEGMENT_PADDING), + + /// Whether or not we need bounds checking during raking (the number of reduction elements is not a multiple of the number of raking threads) + UNGUARDED = (SHARED_ELEMENTS % RAKING_THREADS == 0), + }; + + + /** + * \brief Shared memory storage type + */ + struct __align__(16) _TempStorage + { + T buff[BlockRakingLayout::GRID_ELEMENTS]; + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /** + * \brief Returns the location for the calling thread to place data into the grid + */ + static __device__ __forceinline__ T* PlacementPtr( + TempStorage &temp_storage, + unsigned int linear_tid) + { + // Offset for partial + unsigned int offset = linear_tid; + + // Add in one padding element for every segment + if (USE_SEGMENT_PADDING > 0) + { + offset += offset / SEGMENT_LENGTH; + } + + // Incorporating a block of padding partials every shared memory segment + return temp_storage.Alias().buff + offset; + } + + + /** + * \brief Returns the location for the calling thread to begin sequential raking + */ + static __device__ __forceinline__ T* RakingPtr( + TempStorage &temp_storage, + unsigned int linear_tid) + { + return temp_storage.Alias().buff + (linear_tid * (SEGMENT_LENGTH + USE_SEGMENT_PADDING)); + } +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_reduce.cuh new file mode 100644 index 0000000..261f2ea --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_reduce.cuh @@ -0,0 +1,607 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. + */ + +#pragma once + +#include "specializations/block_reduce_raking.cuh" +#include "specializations/block_reduce_raking_commutative_only.cuh" +#include "specializations/block_reduce_warp_reductions.cuh" +#include "../util_ptx.cuh" +#include "../util_type.cuh" +#include "../thread/thread_operators.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + + +/****************************************************************************** + * Algorithmic variants + ******************************************************************************/ + +/** + * BlockReduceAlgorithm enumerates alternative algorithms for parallel + * reduction across a CUDA thread block. + */ +enum BlockReduceAlgorithm +{ + + /** + * \par Overview + * An efficient "raking" reduction algorithm that only supports commutative + * reduction operators (true for most operations, e.g., addition). + * + * \par + * Execution is comprised of three phases: + * -# Upsweep sequential reduction in registers (if threads contribute more + * than one input each). Threads in warps other than the first warp place + * their partial reductions into shared memory. + * -# Upsweep sequential reduction in shared memory. Threads within the first + * warp continue to accumulate by raking across segments of shared partial reductions + * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. + * + * \par + * \image html block_reduce.png + *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
+ * + * \par Performance Considerations + * - This variant performs less communication than BLOCK_REDUCE_RAKING_NON_COMMUTATIVE + * and is preferable when the reduction operator is commutative. This variant + * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall + * throughput across the GPU when suitably occupied. However, turn-around latency may be + * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable + * when the GPU is under-occupied. + */ + BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY, + + + /** + * \par Overview + * An efficient "raking" reduction algorithm that supports commutative + * (e.g., addition) and non-commutative (e.g., string concatenation) reduction + * operators. \blocked. + * + * \par + * Execution is comprised of three phases: + * -# Upsweep sequential reduction in registers (if threads contribute more + * than one input each). Each thread then places the partial reduction + * of its item(s) into shared memory. + * -# Upsweep sequential reduction in shared memory. Threads within a + * single warp rake across segments of shared partial reductions. + * -# A warp-synchronous Kogge-Stone style reduction within the raking warp. + * + * \par + * \image html block_reduce.png + *
\p BLOCK_REDUCE_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
+ * + * \par Performance Considerations + * - This variant performs more communication than BLOCK_REDUCE_RAKING + * and is only preferable when the reduction operator is non-commutative. This variant + * applies fewer reduction operators than BLOCK_REDUCE_WARP_REDUCTIONS, and can provide higher overall + * throughput across the GPU when suitably occupied. However, turn-around latency may be + * higher than to BLOCK_REDUCE_WARP_REDUCTIONS and thus less-desirable + * when the GPU is under-occupied. + */ + BLOCK_REDUCE_RAKING, + + + /** + * \par Overview + * A quick "tiled warp-reductions" reduction algorithm that supports commutative + * (e.g., addition) and non-commutative (e.g., string concatenation) reduction + * operators. + * + * \par + * Execution is comprised of four phases: + * -# Upsweep sequential reduction in registers (if threads contribute more + * than one input each). Each thread then places the partial reduction + * of its item(s) into shared memory. + * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style + * reduction within each warp. + * -# A propagation phase where the warp reduction outputs in each warp are + * updated with the aggregate from each preceding warp. + * + * \par + * \image html block_scan_warpscans.png + *
\p BLOCK_REDUCE_WARP_REDUCTIONS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
+ * + * \par Performance Considerations + * - This variant applies more reduction operators than BLOCK_REDUCE_RAKING + * or BLOCK_REDUCE_RAKING_NON_COMMUTATIVE, which may result in lower overall + * throughput across the GPU. However turn-around latency may be lower and + * thus useful when the GPU is under-occupied. + */ + BLOCK_REDUCE_WARP_REDUCTIONS, +}; + + +/****************************************************************************** + * Block reduce + ******************************************************************************/ + +/** + * \brief The BlockReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread block. ![](reduce_logo.png) + * \ingroup BlockModule + * + * \tparam T Data type being reduced + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ALGORITHM [optional] cub::BlockReduceAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_REDUCE_WARP_REDUCTIONS) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - A reduction (or fold) + * uses a binary combining operator to compute a single aggregate from a list of input elements. + * - \rowmajor + * - BlockReduce can be optionally specialized by algorithm to accommodate different latency/throughput workload profiles: + * -# cub::BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY. An efficient "raking" reduction algorithm that only supports commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) + * -# cub::BLOCK_REDUCE_RAKING. An efficient "raking" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) + * -# cub::BLOCK_REDUCE_WARP_REDUCTIONS. A quick "tiled warp-reductions" reduction algorithm that supports commutative and non-commutative reduction operators. [More...](\ref cub::BlockReduceAlgorithm) + * + * \par Performance Considerations + * - \granularity + * - Very efficient (only one synchronization barrier). + * - Incurs zero bank conflicts for most types + * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: + * - Summation (vs. generic reduction) + * - \p BLOCK_THREADS is a multiple of the architecture's warp size + * - Every thread has a valid input (i.e., full vs. partial-tiles) + * - See cub::BlockReduceAlgorithm for performance details regarding algorithmic alternatives + * + * \par A Simple Example + * \blockcollective{BlockReduce} + * \par + * The code snippet below illustrates a sum reduction of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Compute the block-wide sum for thread0 + * int aggregate = BlockReduce(temp_storage).Sum(thread_data); + * + * \endcode + * + */ +template < + typename T, + int BLOCK_DIM_X, + BlockReduceAlgorithm ALGORITHM = BLOCK_REDUCE_WARP_REDUCTIONS, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockReduce +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + typedef BlockReduceWarpReductions WarpReductions; + typedef BlockReduceRakingCommutativeOnly RakingCommutativeOnly; + typedef BlockReduceRaking Raking; + + /// Internal specialization type + typedef typename If<(ALGORITHM == BLOCK_REDUCE_WARP_REDUCTIONS), + WarpReductions, + typename If<(ALGORITHM == BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY), + RakingCommutativeOnly, + Raking>::Type>::Type InternalBlockReduce; // BlockReduceRaking + + /// Shared memory storage layout type for BlockReduce + typedef typename InternalBlockReduce::TempStorage _TempStorage; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + +public: + + /// \smemstorage{BlockReduce} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockReduce() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockReduce( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Generic reductions + *********************************************************************/ + //@{ + + + /** + * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes one input element. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a max reduction of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Each thread obtains an input item + * int thread_data; + * ... + * + * // Compute the block-wide max for thread0 + * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); + * + * \endcode + * + * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + ReductionOp reduction_op) ///< [in] Binary reduction functor + { + return InternalBlockReduce(temp_storage).template Reduce(input, BLOCK_THREADS, reduction_op); + } + + + /** + * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. Each thread contributes an array of consecutive input elements. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a max reduction of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Compute the block-wide max for thread0 + * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max()); + * + * \endcode + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T (&inputs)[ITEMS_PER_THREAD], ///< [in] Calling thread's input segment + ReductionOp reduction_op) ///< [in] Binary reduction functor + { + // Reduce partials + T partial = internal::ThreadReduce(inputs, reduction_op); + return Reduce(partial, reduction_op); + } + + + /** + * \brief Computes a block-wide reduction for thread0 using the specified binary reduction functor. The first \p num_valid threads each contribute one input element. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a max reduction of a partially-full tile of integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int num_valid, ...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Each thread obtains an input item + * int thread_data; + * if (threadIdx.x < num_valid) thread_data = ... + * + * // Compute the block-wide max for thread0 + * int aggregate = BlockReduce(temp_storage).Reduce(thread_data, cub::Max(), num_valid); + * + * \endcode + * + * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + ReductionOp reduction_op, ///< [in] Binary reduction functor + int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) + { + // Determine if we scan skip bounds checking + if (num_valid >= BLOCK_THREADS) + { + return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); + } + else + { + return InternalBlockReduce(temp_storage).template Reduce(input, num_valid, reduction_op); + } + } + + + //@} end member group + /******************************************************************//** + * \name Summation reductions + *********************************************************************/ + //@{ + + + /** + * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes one input element. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sum reduction of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Each thread obtains an input item + * int thread_data; + * ... + * + * // Compute the block-wide sum for thread0 + * int aggregate = BlockReduce(temp_storage).Sum(thread_data); + * + * \endcode + * + */ + __device__ __forceinline__ T Sum( + T input) ///< [in] Calling thread's input + { + return InternalBlockReduce(temp_storage).template Sum(input, BLOCK_THREADS); + } + + /** + * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. Each thread contributes an array of consecutive input elements. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sum reduction of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Compute the block-wide sum for thread0 + * int aggregate = BlockReduce(temp_storage).Sum(thread_data); + * + * \endcode + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ + template + __device__ __forceinline__ T Sum( + T (&inputs)[ITEMS_PER_THREAD]) ///< [in] Calling thread's input segment + { + // Reduce partials + T partial = internal::ThreadReduce(inputs, cub::Sum()); + return Sum(partial); + } + + + /** + * \brief Computes a block-wide reduction for thread0 using addition (+) as the reduction operator. The first \p num_valid threads each contribute one input element. + * + * \par + * - The return value is undefined in threads other than thread0. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sum reduction of a partially-full tile of integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int num_valid, ...) + * { + * // Specialize BlockReduce for a 1D block of 128 threads on type int + * typedef cub::BlockReduce BlockReduce; + * + * // Allocate shared memory for BlockReduce + * __shared__ typename BlockReduce::TempStorage temp_storage; + * + * // Each thread obtains an input item (up to num_items) + * int thread_data; + * if (threadIdx.x < num_valid) + * thread_data = ... + * + * // Compute the block-wide sum for thread0 + * int aggregate = BlockReduce(temp_storage).Sum(thread_data, num_valid); + * + * \endcode + * + */ + __device__ __forceinline__ T Sum( + T input, ///< [in] Calling thread's input + int num_valid) ///< [in] Number of threads containing valid elements (may be less than BLOCK_THREADS) + { + // Determine if we scan skip bounds checking + if (num_valid >= BLOCK_THREADS) + { + return InternalBlockReduce(temp_storage).template Sum(input, num_valid); + } + else + { + return InternalBlockReduce(temp_storage).template Sum(input, num_valid); + } + } + + + //@} end member group +}; + +/** + * \example example_block_reduce.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_scan.cuh new file mode 100644 index 0000000..27ea7ed --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_scan.cuh @@ -0,0 +1,2126 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. + */ + +#pragma once + +#include "specializations/block_scan_raking.cuh" +#include "specializations/block_scan_warp_scans.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_ptx.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Algorithmic variants + ******************************************************************************/ + +/** + * \brief BlockScanAlgorithm enumerates alternative algorithms for cub::BlockScan to compute a parallel prefix scan across a CUDA thread block. + */ +enum BlockScanAlgorithm +{ + + /** + * \par Overview + * An efficient "raking reduce-then-scan" prefix scan algorithm. Execution is comprised of five phases: + * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. + * -# Upsweep sequential reduction in shared memory. Threads within a single warp rake across segments of shared partial reductions. + * -# A warp-synchronous Kogge-Stone style exclusive scan within the raking warp. + * -# Downsweep sequential exclusive scan in shared memory. Threads within a single warp rake across segments of shared partial reductions, seeded with the warp-scan output. + * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. + * + * \par + * \image html block_scan_raking.png + *
\p BLOCK_SCAN_RAKING data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
+ * + * \par Performance Considerations + * - Although this variant may suffer longer turnaround latencies when the + * GPU is under-occupied, it can often provide higher overall throughput + * across the GPU when suitably occupied. + */ + BLOCK_SCAN_RAKING, + + + /** + * \par Overview + * Similar to cub::BLOCK_SCAN_RAKING, but with fewer shared memory reads at + * the expense of higher register pressure. Raking threads preserve their + * "upsweep" segment of values in registers while performing warp-synchronous + * scan, allowing the "downsweep" not to re-read them from shared memory. + */ + BLOCK_SCAN_RAKING_MEMOIZE, + + + /** + * \par Overview + * A quick "tiled warpscans" prefix scan algorithm. Execution is comprised of four phases: + * -# Upsweep sequential reduction in registers (if threads contribute more than one input each). Each thread then places the partial reduction of its item(s) into shared memory. + * -# Compute a shallow, but inefficient warp-synchronous Kogge-Stone style scan within each warp. + * -# A propagation phase where the warp scan outputs in each warp are updated with the aggregate from each preceding warp. + * -# Downsweep sequential scan in registers (if threads contribute more than one input), seeded with the raking scan output. + * + * \par + * \image html block_scan_warpscans.png + *
\p BLOCK_SCAN_WARP_SCANS data flow for a hypothetical 16-thread thread block and 4-thread raking warp.
+ * + * \par Performance Considerations + * - Although this variant may suffer lower overall throughput across the + * GPU because due to a heavy reliance on inefficient warpscans, it can + * often provide lower turnaround latencies when the GPU is under-occupied. + */ + BLOCK_SCAN_WARP_SCANS, +}; + + +/****************************************************************************** + * Block scan + ******************************************************************************/ + +/** + * \brief The BlockScan class provides [collective](index.html#sec0) methods for computing a parallel prefix sum/scan of items partitioned across a CUDA thread block. ![](block_scan_logo.png) + * \ingroup BlockModule + * + * \tparam T Data type being scanned + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ALGORITHM [optional] cub::BlockScanAlgorithm enumerator specifying the underlying algorithm to use (default: cub::BLOCK_SCAN_RAKING) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) + * produces an output list where each element is computed to be the reduction + * of the elements occurring earlier in the input list. Prefix sum + * connotes a prefix scan with the addition operator. The term \em inclusive indicates + * that the ith output reduction incorporates the ith input. + * The term \em exclusive indicates the ith input is not incorporated into + * the ith output reduction. + * - \rowmajor + * - BlockScan can be optionally specialized by algorithm to accommodate different workload profiles: + * -# cub::BLOCK_SCAN_RAKING. An efficient (high throughput) "raking reduce-then-scan" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) + * -# cub::BLOCK_SCAN_RAKING_MEMOIZE. Similar to cub::BLOCK_SCAN_RAKING, but having higher throughput at the expense of additional register pressure for intermediate storage. [More...](\ref cub::BlockScanAlgorithm) + * -# cub::BLOCK_SCAN_WARP_SCANS. A quick (low latency) "tiled warpscans" prefix scan algorithm. [More...](\ref cub::BlockScanAlgorithm) + * + * \par Performance Considerations + * - \granularity + * - Uses special instructions when applicable (e.g., warp \p SHFL) + * - Uses synchronization-free communication between warp lanes when applicable + * - Invokes a minimal number of minimal block-wide synchronization barriers (only + * one or two depending on algorithm selection) + * - Incurs zero bank conflicts for most types + * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: + * - Prefix sum variants (vs. generic scan) + * - \blocksize + * - See cub::BlockScanAlgorithm for performance details regarding algorithmic alternatives + * + * \par A Simple Example + * \blockcollective{BlockScan} + * \par + * The code snippet below illustrates an exclusive prefix sum of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide exclusive prefix sum + * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * {[1,1,1,1], [1,1,1,1], ..., [1,1,1,1]}. + * The corresponding output \p thread_data in those threads will be + * {[0,1,2,3], [4,5,6,7], ..., [508,509,510,511]}. + * + */ +template < + typename T, + int BLOCK_DIM_X, + BlockScanAlgorithm ALGORITHM = BLOCK_SCAN_RAKING, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockScan +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + /** + * Ensure the template parameterization meets the requirements of the + * specified algorithm. Currently, the BLOCK_SCAN_WARP_SCANS policy + * cannot be used with thread block sizes not a multiple of the + * architectural warp size. + */ + static const BlockScanAlgorithm SAFE_ALGORITHM = + ((ALGORITHM == BLOCK_SCAN_WARP_SCANS) && (BLOCK_THREADS % CUB_WARP_THREADS(PTX_ARCH) != 0)) ? + BLOCK_SCAN_RAKING : + ALGORITHM; + + typedef BlockScanWarpScans WarpScans; + typedef BlockScanRaking Raking; + + /// Define the delegate type for the desired algorithm + typedef typename If<(SAFE_ALGORITHM == BLOCK_SCAN_WARP_SCANS), + WarpScans, + Raking>::Type InternalBlockScan; + + /// Shared memory storage layout type for BlockScan + typedef typename InternalBlockScan::TempStorage _TempStorage; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /****************************************************************************** + * Public types + ******************************************************************************/ +public: + + /// \smemstorage{BlockScan} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockScan() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockScan( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + + //@} end member group + /******************************************************************//** + * \name Exclusive prefix sum operations + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. + * + * \par + * - \identityzero + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix sum of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide exclusive prefix sum + * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The + * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. + * + */ + __device__ __forceinline__ void ExclusiveSum( + T input, ///< [in] Calling thread's input item + T &output) ///< [out] Calling thread's output item (may be aliased to \p input) + { + T initial_value = 0; + ExclusiveScan(input, output, initial_value, cub::Sum()); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. The value of 0 is applied as the initial value, and is assigned to \p output in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \identityzero + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix sum of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide exclusive prefix sum + * int block_aggregate; + * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The + * corresponding output \p thread_data in those threads will be 0, 1, ..., 127. + * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. + * + */ + __device__ __forceinline__ void ExclusiveSum( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + T initial_value = 0; + ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \identityzero + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an exclusive prefix sum over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total += block_aggregate; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockScan for a 1D block of 128 threads + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data = d_data[block_offset]; + * + * // Collectively compute the block-wide exclusive prefix sum + * BlockScan(temp_storage).ExclusiveSum( + * thread_data, thread_data, prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * d_data[block_offset] = thread_data; + * } + * \endcode + * \par + * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... + * The corresponding output for the first segment will be 0, 1, ..., 127. + * The output for the second segment will be 128, 129, ..., 255. + * + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template + __device__ __forceinline__ void ExclusiveSum( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); + } + + + //@} end member group + /******************************************************************//** + * \name Exclusive prefix sum operations (multiple data per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. + * + * \par + * - \identityzero + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix sum of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide exclusive prefix sum + * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The + * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ + template + __device__ __forceinline__ void ExclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) + { + T initial_value = 0; + ExclusiveScan(input, output, initial_value, cub::Sum()); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. The value of 0 is applied as the initial value, and is assigned to \p output[0] in thread0. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \identityzero + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix sum of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide exclusive prefix sum + * int block_aggregate; + * BlockScan(temp_storage).ExclusiveSum(thread_data, thread_data, block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The + * corresponding output \p thread_data in those threads will be { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ + template + __device__ __forceinline__ void ExclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + // Reduce consecutive thread items in registers + T initial_value = 0; + ExclusiveScan(input, output, initial_value, cub::Sum(), block_aggregate); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \identityzero + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an exclusive prefix sum over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) + * across 128 threads where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total += block_aggregate; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread + * typedef cub::BlockLoad BlockLoad; + * typedef cub::BlockStore BlockStore; + * typedef cub::BlockScan BlockScan; + * + * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan + * __shared__ union { + * typename BlockLoad::TempStorage load; + * typename BlockScan::TempStorage scan; + * typename BlockStore::TempStorage store; + * } temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); + * CTA_SYNC(); + * + * // Collectively compute the block-wide exclusive prefix sum + * int block_aggregate; + * BlockScan(temp_storage.scan).ExclusiveSum( + * thread_data, thread_data, prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); + * CTA_SYNC(); + * } + * \endcode + * \par + * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... + * The corresponding output for the first segment will be 0, 1, 2, 3, ..., 510, 511. + * The output for the second segment will be 512, 513, 514, 515, ..., 1022, 1023. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + int ITEMS_PER_THREAD, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + ExclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); + } + + + + //@} end member group // Exclusive prefix sums + /******************************************************************//** + * \name Exclusive prefix scan operations + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide exclusive prefix max scan + * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The + * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) + ScanOp scan_op) ///< [in] Binary scan functor + { + InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix max scan of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide exclusive prefix max scan + * int block_aggregate; + * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The + * corresponding output \p thread_data in those threads will be INT_MIN, 0, 0, 2, ..., 124, 126. + * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &output, ///< [out] Calling thread's output items (may be aliased to \p input) + T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + InternalBlockScan(temp_storage).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an exclusive prefix max scan over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockScan for a 1D block of 128 threads + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(INT_MIN); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data = d_data[block_offset]; + * + * // Collectively compute the block-wide exclusive prefix max scan + * BlockScan(temp_storage).ExclusiveScan( + * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * d_data[block_offset] = thread_data; + * } + * \endcode + * \par + * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... + * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, ..., 124, 126. + * The output for the second segment will be 126, 128, 128, 130, ..., 252, 254. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_prefix_callback_op); + } + + + //@} end member group // Inclusive prefix sums + /******************************************************************//** + * \name Exclusive prefix scan operations (multiple data per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide exclusive prefix max scan + * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. + * The corresponding output \p thread_data in those threads will be + * { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void ExclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) + ScanOp scan_op) ///< [in] Binary scan functor + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op); + + // Exclusive scan in registers with prefix as seed + internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an exclusive prefix max scan of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide exclusive prefix max scan + * int block_aggregate; + * BlockScan(temp_storage).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The + * corresponding output \p thread_data in those threads will be { [INT_MIN,0,0,2], [2,4,4,6], ..., [506,508,508,510] }. + * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void ExclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + T initial_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to \p output[0] in thread0) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_prefix, thread_prefix, initial_value, scan_op, block_aggregate); + + // Exclusive scan in registers with prefix as seed + internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an exclusive prefix max scan over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread + * typedef cub::BlockLoad BlockLoad; + * typedef cub::BlockStore BlockStore; + * typedef cub::BlockScan BlockScan; + * + * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan + * __shared__ union { + * typename BlockLoad::TempStorage load; + * typename BlockScan::TempStorage scan; + * typename BlockStore::TempStorage store; + * } temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); + * CTA_SYNC(); + * + * // Collectively compute the block-wide exclusive prefix max scan + * BlockScan(temp_storage.scan).ExclusiveScan( + * thread_data, thread_data, INT_MIN, cub::Max(), prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); + * CTA_SYNC(); + * } + * \endcode + * \par + * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... + * The corresponding output for the first segment will be INT_MIN, 0, 0, 2, 2, 4, ..., 508, 510. + * The output for the second segment will be 510, 512, 512, 514, 514, 516, ..., 1020, 1022. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); + + // Exclusive scan in registers with prefix as seed + internal::ThreadScanExclusive(input, output, scan_op, thread_prefix); + } + + + //@} end member group +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans + + /******************************************************************//** + * \name Exclusive prefix scan operations (no initial value, single datum per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan functor + { + InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_aggregate); + } + + //@} end member group + /******************************************************************//** + * \name Exclusive prefix scan operations (no initial value, multiple data per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no initial value, the output computed for thread0 is undefined. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void ExclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan functor + { + // Reduce consecutive thread items in registers + T thread_partial = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_partial, thread_partial, scan_op); + + // Exclusive scan in registers with prefix + internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); + } + + + /** + * \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void ExclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + // Reduce consecutive thread items in registers + T thread_partial = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate); + + // Exclusive scan in registers with prefix + internal::ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0)); + } + + + //@} end member group +#endif // DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans + + /******************************************************************//** + * \name Inclusive prefix sum operations + *********************************************************************/ + //@{ + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. + * + * \par + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix sum of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide inclusive prefix sum + * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The + * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. + * + */ + __device__ __forceinline__ void InclusiveSum( + T input, ///< [in] Calling thread's input item + T &output) ///< [out] Calling thread's output item (may be aliased to \p input) + { + InclusiveScan(input, output, cub::Sum()); + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix sum of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide inclusive prefix sum + * int block_aggregate; + * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 1, 1, ..., 1. The + * corresponding output \p thread_data in those threads will be 1, 2, ..., 128. + * Furthermore the value \p 128 will be stored in \p block_aggregate for all threads. + * + */ + __device__ __forceinline__ void InclusiveSum( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + InclusiveScan(input, output, cub::Sum(), block_aggregate); + } + + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes one input element. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an inclusive prefix sum over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total += block_aggregate; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockScan for a 1D block of 128 threads + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data = d_data[block_offset]; + * + * // Collectively compute the block-wide inclusive prefix sum + * BlockScan(temp_storage).InclusiveSum( + * thread_data, thread_data, prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * d_data[block_offset] = thread_data; + * } + * \endcode + * \par + * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... + * The corresponding output for the first segment will be 1, 2, ..., 128. + * The output for the second segment will be 129, 130, ..., 256. + * + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template + __device__ __forceinline__ void InclusiveSum( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + InclusiveScan(input, output, cub::Sum(), block_prefix_callback_op); + } + + + //@} end member group + /******************************************************************//** + * \name Inclusive prefix sum operations (multiple data per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix sum of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide inclusive prefix sum + * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The + * corresponding output \p thread_data in those threads will be { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + */ + template + __device__ __forceinline__ void InclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD]) ///< [out] Calling thread's output items (may be aliased to \p input) + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveSum(input[0], output[0]); + } + else + { + // Reduce consecutive thread items in registers + Sum scan_op; + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveSum(thread_prefix, thread_prefix); + + // Inclusive scan in registers with prefix as seed + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); + } + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix sum of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide inclusive prefix sum + * int block_aggregate; + * BlockScan(temp_storage).InclusiveSum(thread_data, thread_data, block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [1,1,1,1], [1,1,1,1], ..., [1,1,1,1] }. The + * corresponding output \p thread_data in those threads will be + * { [1,2,3,4], [5,6,7,8], ..., [509,510,511,512] }. + * Furthermore the value \p 512 will be stored in \p block_aggregate for all threads. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void InclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveSum(input[0], output[0], block_aggregate); + } + else + { + // Reduce consecutive thread items in registers + Sum scan_op; + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveSum(thread_prefix, thread_prefix, block_aggregate); + + // Inclusive scan in registers with prefix as seed + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); + } + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using addition (+) as the scan operator. Each thread contributes an array of consecutive input elements. Instead of using 0 as the block-wide prefix, the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an inclusive prefix sum over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 512 integer items that are partitioned in a [blocked arrangement](index.html#sec5sec3) + * across 128 threads where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total += block_aggregate; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread + * typedef cub::BlockLoad BlockLoad; + * typedef cub::BlockStore BlockStore; + * typedef cub::BlockScan BlockScan; + * + * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan + * __shared__ union { + * typename BlockLoad::TempStorage load; + * typename BlockScan::TempStorage scan; + * typename BlockStore::TempStorage store; + * } temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); + * CTA_SYNC(); + * + * // Collectively compute the block-wide inclusive prefix sum + * BlockScan(temp_storage.scan).IncluisveSum( + * thread_data, thread_data, prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); + * CTA_SYNC(); + * } + * \endcode + * \par + * Suppose the input \p d_data is 1, 1, 1, 1, 1, 1, 1, 1, .... + * The corresponding output for the first segment will be 1, 2, 3, 4, ..., 511, 512. + * The output for the second segment will be 513, 514, 515, 516, ..., 1023, 1024. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + int ITEMS_PER_THREAD, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveSum( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveSum(input[0], output[0], block_prefix_callback_op); + } + else + { + // Reduce consecutive thread items in registers + Sum scan_op; + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveSum(thread_prefix, thread_prefix, block_prefix_callback_op); + + // Inclusive scan in registers with prefix as seed + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); + } + } + + + //@} end member group + /******************************************************************//** + * \name Inclusive prefix scan operations + *********************************************************************/ + //@{ + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide inclusive prefix max scan + * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The + * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan functor + { + InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op); + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix max scan of 128 integer items that + * are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain input item for each thread + * int thread_data; + * ... + * + * // Collectively compute the block-wide inclusive prefix max scan + * int block_aggregate; + * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is 0, -1, 2, -3, ..., 126, -127. The + * corresponding output \p thread_data in those threads will be 0, 0, 2, 2, ..., 126, 126. + * Furthermore the value \p 126 will be stored in \p block_aggregate for all threads. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_aggregate); + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - Supports non-commutative scan operators. + * - \rowmajor + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an inclusive prefix max scan over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockScan for a 1D block of 128 threads + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(INT_MIN); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data = d_data[block_offset]; + * + * // Collectively compute the block-wide inclusive prefix max scan + * BlockScan(temp_storage).InclusiveScan( + * thread_data, thread_data, cub::Max(), prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * d_data[block_offset] = thread_data; + * } + * \endcode + * \par + * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... + * The corresponding output for the first segment will be 0, 0, 2, 2, ..., 126, 126. + * The output for the second segment will be 128, 128, 130, 130, ..., 254, 254. + * + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + InternalBlockScan(temp_storage).InclusiveScan(input, output, scan_op, block_prefix_callback_op); + } + + + //@} end member group + /******************************************************************//** + * \name Inclusive prefix scan operations (multiple data per thread) + *********************************************************************/ + //@{ + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide inclusive prefix max scan + * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. The + * corresponding output \p thread_data in those threads will be { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void InclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan functor + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveScan(input[0], output[0], scan_op); + } + else + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_prefix, thread_prefix, scan_op); + + // Inclusive scan in registers with prefix as seed (first thread does not seed) + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); + } + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates an inclusive prefix max scan of 512 integer items that + * are partitioned in a [blocked arrangement](index.html#sec5sec3) across 128 threads + * where each thread owns 4 consecutive items. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Specialize BlockScan for a 1D block of 128 threads on type int + * typedef cub::BlockScan BlockScan; + * + * // Allocate shared memory for BlockScan + * __shared__ typename BlockScan::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Collectively compute the block-wide inclusive prefix max scan + * int block_aggregate; + * BlockScan(temp_storage).InclusiveScan(thread_data, thread_data, cub::Max(), block_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is + * { [0,-1,2,-3], [4,-5,6,-7], ..., [508,-509,510,-511] }. + * The corresponding output \p thread_data in those threads will be + * { [0,0,2,2], [4,4,6,6], ..., [508,508,510,510] }. + * Furthermore the value \p 510 will be stored in \p block_aggregate for all threads. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp> + __device__ __forceinline__ void InclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + T &block_aggregate) ///< [out] block-wide aggregate reduction of input items + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveScan(input[0], output[0], scan_op, block_aggregate); + } + else + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan (with no initial value) + ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_aggregate); + + // Inclusive scan in registers with prefix as seed (first thread does not seed) + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix, (linear_tid != 0)); + } + } + + + /** + * \brief Computes an inclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + * + * \par + * - The \p block_prefix_callback_op functor must implement a member function T operator()(T block_aggregate). + * The functor's input parameter \p block_aggregate is the same value also returned by the scan operation. + * The functor will be invoked by the first warp of threads in the block, however only the return value from + * lane0 is applied as the block-wide prefix. Can be stateful. + * - Supports non-commutative scan operators. + * - \blocked + * - \granularity + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates a single thread block that progressively + * computes an inclusive prefix max scan over multiple "tiles" of input using a + * prefix functor to maintain a running total between block-wide scans. Each tile consists + * of 128 integer items that are partitioned across 128 threads. + * \par + * \code + * #include // or equivalently + * + * // A stateful callback functor that maintains a running prefix to be applied + * // during consecutive scan operations. + * struct BlockPrefixCallbackOp + * { + * // Running prefix + * int running_total; + * + * // Constructor + * __device__ BlockPrefixCallbackOp(int running_total) : running_total(running_total) {} + * + * // Callback operator to be entered by the first warp of threads in the block. + * // Thread-0 is responsible for returning a value for seeding the block-wide scan. + * __device__ int operator()(int block_aggregate) + * { + * int old_prefix = running_total; + * running_total = (block_aggregate > old_prefix) ? block_aggregate : old_prefix; + * return old_prefix; + * } + * }; + * + * __global__ void ExampleKernel(int *d_data, int num_items, ...) + * { + * // Specialize BlockLoad, BlockStore, and BlockScan for a 1D block of 128 threads, 4 ints per thread + * typedef cub::BlockLoad BlockLoad; + * typedef cub::BlockStore BlockStore; + * typedef cub::BlockScan BlockScan; + * + * // Allocate aliased shared memory for BlockLoad, BlockStore, and BlockScan + * __shared__ union { + * typename BlockLoad::TempStorage load; + * typename BlockScan::TempStorage scan; + * typename BlockStore::TempStorage store; + * } temp_storage; + * + * // Initialize running total + * BlockPrefixCallbackOp prefix_op(0); + * + * // Have the block iterate over segments of items + * for (int block_offset = 0; block_offset < num_items; block_offset += 128 * 4) + * { + * // Load a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * BlockLoad(temp_storage.load).Load(d_data + block_offset, thread_data); + * CTA_SYNC(); + * + * // Collectively compute the block-wide inclusive prefix max scan + * BlockScan(temp_storage.scan).InclusiveScan( + * thread_data, thread_data, cub::Max(), prefix_op); + * CTA_SYNC(); + * + * // Store scanned items to output segment + * BlockStore(temp_storage.store).Store(d_data + block_offset, thread_data); + * CTA_SYNC(); + * } + * \endcode + * \par + * Suppose the input \p d_data is 0, -1, 2, -3, 4, -5, .... + * The corresponding output for the first segment will be 0, 0, 2, 2, 4, 4, ..., 510, 510. + * The output for the second segment will be 512, 512, 514, 514, 516, 516, ..., 1022, 1022. + * + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + * \tparam BlockPrefixCallbackOp [inferred] Call-back functor type having member T operator()(T block_aggregate) + */ + template < + int ITEMS_PER_THREAD, + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items + T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan functor + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a block-wide prefix to be applied to the logical input sequence. + { + if (ITEMS_PER_THREAD == 1) + { + InclusiveScan(input[0], output[0], scan_op, block_prefix_callback_op); + } + else + { + // Reduce consecutive thread items in registers + T thread_prefix = internal::ThreadReduce(input, scan_op); + + // Exclusive thread block-scan + ExclusiveScan(thread_prefix, thread_prefix, scan_op, block_prefix_callback_op); + + // Inclusive scan in registers with prefix as seed + internal::ThreadScanInclusive(input, output, scan_op, thread_prefix); + } + } + + //@} end member group + + +}; + +/** + * \example example_block_scan.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_shuffle.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_shuffle.cuh new file mode 100644 index 0000000..a0cc71d --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_shuffle.cuh @@ -0,0 +1,305 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../util_arch.cuh" +#include "../util_ptx.cuh" +#include "../util_macro.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief The BlockShuffle class provides [collective](index.html#sec0) methods for shuffling data partitioned across a CUDA thread block. + * \ingroup BlockModule + * + * \tparam T The data type to be exchanged. + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * It is commonplace for blocks of threads to rearrange data items between + * threads. The BlockShuffle abstraction allows threads to efficiently shift items + * either (a) up to their successor or (b) down to their predecessor. + * + */ +template < + typename T, + int BLOCK_DIM_X, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockShuffle +{ +private: + + /****************************************************************************** + * Constants + ******************************************************************************/ + + enum + { + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + LOG_WARP_THREADS = CUB_LOG_WARP_THREADS(PTX_ARCH), + WARP_THREADS = 1 << LOG_WARP_THREADS, + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + }; + + /****************************************************************************** + * Type definitions + ******************************************************************************/ + + /// Shared memory storage layout type (last element from each thread's input) + struct _TempStorage + { + T prev[BLOCK_THREADS]; + T next[BLOCK_THREADS]; + }; + + +public: + + /// \smemstorage{BlockShuffle} + struct TempStorage : Uninitialized<_TempStorage> {}; + +private: + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + /// Linear thread-id + unsigned int linear_tid; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + +public: + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockShuffle() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockShuffle( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Shuffle movement + *********************************************************************/ + //@{ + + + /** + * \brief Each threadi obtains the \p input provided by threadi+distance. The offset \p distance may be negative. + * + * \par + * - \smemreuse + */ + __device__ __forceinline__ void Offset( + T input, ///< [in] The input item from the calling thread (threadi) + T& output, ///< [out] The \p input item from the successor (or predecessor) thread threadi+distance (may be aliased to \p input). This value is only updated for for threadi when 0 <= (i + \p distance) < BLOCK_THREADS-1 + int distance = 1) ///< [in] Offset distance (may be negative) + { + temp_storage[linear_tid].prev = input; + + CTA_SYNC(); + + if ((linear_tid + distance >= 0) && (linear_tid + distance < BLOCK_THREADS)) + output = temp_storage[linear_tid + distance].prev; + } + + + /** + * \brief Each threadi obtains the \p input provided by threadi+distance. + * + * \par + * - \smemreuse + */ + __device__ __forceinline__ void Rotate( + T input, ///< [in] The calling thread's input item + T& output, ///< [out] The \p input item from thread thread(i+distance>)% (may be aliased to \p input). This value is not updated for threadBLOCK_THREADS-1 + unsigned int distance = 1) ///< [in] Offset distance (0 < \p distance < BLOCK_THREADS) + { + temp_storage[linear_tid].prev = input; + + CTA_SYNC(); + + unsigned int offset = threadIdx.x + distance; + if (offset >= BLOCK_THREADS) + offset -= BLOCK_THREADS; + + output = temp_storage[offset].prev; + } + + + /** + * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + */ + template + __device__ __forceinline__ void Up( + T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items + T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. + { + temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) + prev[ITEM] = input[ITEM - 1]; + + + if (linear_tid > 0) + prev[0] = temp_storage[linear_tid - 1].prev; + } + + + /** + * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it up by one item. All threads receive the \p input provided by threadBLOCK_THREADS-1. + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + */ + template + __device__ __forceinline__ void Up( + T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items + T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The item \p prev[0] is not updated for thread0. + T &block_suffix) ///< [out] The item \p input[ITEMS_PER_THREAD-1] from threadBLOCK_THREADS-1, provided to all threads + { + Up(input, prev); + block_suffix = temp_storage[BLOCK_THREADS - 1].prev; + } + + + /** + * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of \p input items, shifting it down by one item + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + */ + template + __device__ __forceinline__ void Down( + T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items + T (&prev)[ITEMS_PER_THREAD]) ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. + { + temp_storage[linear_tid].prev = input[ITEMS_PER_THREAD - 1]; + + CTA_SYNC(); + + #pragma unroll + for (int ITEM = ITEMS_PER_THREAD - 1; ITEM > 0; --ITEM) + prev[ITEM] = input[ITEM - 1]; + + if (linear_tid > 0) + prev[0] = temp_storage[linear_tid - 1].prev; + } + + + /** + * \brief The thread block rotates its [blocked arrangement](index.html#sec5sec3) of input items, shifting it down by one item. All threads receive \p input[0] provided by thread0. + * + * \par + * - \blocked + * - \granularity + * - \smemreuse + */ + template + __device__ __forceinline__ void Down( + T (&input)[ITEMS_PER_THREAD], ///< [in] The calling thread's input items + T (&prev)[ITEMS_PER_THREAD], ///< [out] The corresponding predecessor items (may be aliased to \p input). The value \p prev[0] is not updated for threadBLOCK_THREADS-1. + T &block_prefix) ///< [out] The item \p input[0] from thread0, provided to all threads + { + Up(input, prev); + block_prefix = temp_storage[BLOCK_THREADS - 1].prev; + } + + //@} end member group + + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_store.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_store.cuh new file mode 100644 index 0000000..648bf9f --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/block_store.cuh @@ -0,0 +1,1000 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Operations for writing linear segments of data from the CUDA thread block + */ + +#pragma once + +#include + +#include "block_exchange.cuh" +#include "../util_ptx.cuh" +#include "../util_macro.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIo + * @{ + */ + + +/******************************************************************//** + * \name Blocked arrangement I/O (direct) + *********************************************************************/ +//@{ + +/** + * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. + * + * \blocked + * + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectBlocked( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store +{ + OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); + + // Store directly in thread-blocked order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + thread_itr[ITEM] = items[ITEM]; + } +} + + +/** + * \brief Store a blocked arrangement of items across a thread block into a linear segment of items, guarded by range + * + * \blocked + * + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectBlocked( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write +{ + OutputIteratorT thread_itr = block_itr + (linear_tid * ITEMS_PER_THREAD); + + // Store directly in thread-blocked order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (ITEM + (linear_tid * ITEMS_PER_THREAD) < valid_items) + { + thread_itr[ITEM] = items[ITEM]; + } + } +} + + +/** + * \brief Store a blocked arrangement of items across a thread block into a linear segment of items. + * + * \blocked + * + * The output offset (\p block_ptr + \p block_offset) must be quad-item aligned, + * which is the default starting offset returned by \p cudaMalloc() + * + * \par + * The following conditions will prevent vectorization and storing will fall back to cub::BLOCK_STORE_DIRECT: + * - \p ITEMS_PER_THREAD is odd + * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) + * + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * + */ +template < + typename T, + int ITEMS_PER_THREAD> +__device__ __forceinline__ void StoreDirectBlockedVectorized( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + T *block_ptr, ///< [in] Input pointer for storing from + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store +{ + enum + { + // Maximum CUDA vector size is 4 elements + MAX_VEC_SIZE = CUB_MIN(4, ITEMS_PER_THREAD), + + // Vector size must be a power of two and an even divisor of the items per thread + VEC_SIZE = ((((MAX_VEC_SIZE - 1) & MAX_VEC_SIZE) == 0) && ((ITEMS_PER_THREAD % MAX_VEC_SIZE) == 0)) ? + MAX_VEC_SIZE : + 1, + + VECTORS_PER_THREAD = ITEMS_PER_THREAD / VEC_SIZE, + }; + + // Vector type + typedef typename CubVector::Type Vector; + + // Alias global pointer + Vector *block_ptr_vectors = reinterpret_cast(const_cast(block_ptr)); + + // Alias pointers (use "raw" array here which should get optimized away to prevent conservative PTXAS lmem spilling) + Vector raw_vector[VECTORS_PER_THREAD]; + T *raw_items = reinterpret_cast(raw_vector); + + // Copy + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + raw_items[ITEM] = items[ITEM]; + } + + // Direct-store using vector types + StoreDirectBlocked(linear_tid, block_ptr_vectors, raw_vector); +} + + + +//@} end member group +/******************************************************************//** + * \name Striped arrangement I/O (direct) + *********************************************************************/ +//@{ + + +/** + * \brief Store a striped arrangement of data across the thread block into a linear segment of items. + * + * \striped + * + * \tparam BLOCK_THREADS The thread block size in threads + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + int BLOCK_THREADS, + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store +{ + OutputIteratorT thread_itr = block_itr + linear_tid; + + // Store directly in striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; + } +} + + +/** + * \brief Store a striped arrangement of data across the thread block into a linear segment of items, guarded by range + * + * \striped + * + * \tparam BLOCK_THREADS The thread block size in threads + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + int BLOCK_THREADS, + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write +{ + OutputIteratorT thread_itr = block_itr + linear_tid; + + // Store directly in striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if ((ITEM * BLOCK_THREADS) + linear_tid < valid_items) + { + thread_itr[(ITEM * BLOCK_THREADS)] = items[ITEM]; + } + } +} + + + +//@} end member group +/******************************************************************//** + * \name Warp-striped arrangement I/O (direct) + *********************************************************************/ +//@{ + + +/** + * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items. + * + * \warpstriped + * + * \par Usage Considerations + * The number of threads in the thread block must be a multiple of the architecture's warp size. + * + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectWarpStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [out] Data to load +{ + int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); + int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; + int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; + + OutputIteratorT thread_itr = block_itr + warp_offset + tid; + + // Store directly in warp-striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; + } +} + + +/** + * \brief Store a warp-striped arrangement of data across the thread block into a linear segment of items, guarded by range + * + * \warpstriped + * + * \par Usage Considerations + * The number of threads in the thread block must be a multiple of the architecture's warp size. + * + * \tparam T [inferred] The data type to store. + * \tparam ITEMS_PER_THREAD [inferred] The number of consecutive items partitioned onto each thread. + * \tparam OutputIteratorT [inferred] The random-access iterator type for output \iterator. + */ +template < + typename T, + int ITEMS_PER_THREAD, + typename OutputIteratorT> +__device__ __forceinline__ void StoreDirectWarpStriped( + int linear_tid, ///< [in] A suitable 1D thread-identifier for the calling thread (e.g., (threadIdx.y * blockDim.x) + linear_tid for 2D thread blocks) + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write +{ + int tid = linear_tid & (CUB_PTX_WARP_THREADS - 1); + int wid = linear_tid >> CUB_PTX_LOG_WARP_THREADS; + int warp_offset = wid * CUB_PTX_WARP_THREADS * ITEMS_PER_THREAD; + + OutputIteratorT thread_itr = block_itr + warp_offset + tid; + + // Store directly in warp-striped order + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (warp_offset + tid + (ITEM * CUB_PTX_WARP_THREADS) < valid_items) + { + thread_itr[(ITEM * CUB_PTX_WARP_THREADS)] = items[ITEM]; + } + } +} + + +//@} end member group + + +/** @} */ // end group UtilIo + + +//----------------------------------------------------------------------------- +// Generic BlockStore abstraction +//----------------------------------------------------------------------------- + +/** + * \brief cub::BlockStoreAlgorithm enumerates alternative algorithms for cub::BlockStore to write a blocked arrangement of items across a CUDA thread block to a linear segment of memory. + */ +enum BlockStoreAlgorithm +{ + /** + * \par Overview + * + * A [blocked arrangement](index.html#sec5sec3) of data is written + * directly to memory. + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) decreases as the + * access stride between threads increases (i.e., the number items per thread). + */ + BLOCK_STORE_DIRECT, + + /** + * \par Overview + * + * A [blocked arrangement](index.html#sec5sec3) of data is written directly + * to memory using CUDA's built-in vectorized stores as a coalescing optimization. + * For example, st.global.v4.s32 instructions will be generated + * when \p T = \p int and \p ITEMS_PER_THREAD % 4 == 0. + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high until the the + * access stride between threads (i.e., the number items per thread) exceeds the + * maximum vector store width (typically 4 items or 64B, whichever is lower). + * - The following conditions will prevent vectorization and writing will fall back to cub::BLOCK_STORE_DIRECT: + * - \p ITEMS_PER_THREAD is odd + * - The \p OutputIteratorT is not a simple pointer type + * - The block output offset is not quadword-aligned + * - The data type \p T is not a built-in primitive or CUDA vector type (e.g., \p short, \p int2, \p double, \p float2, etc.) + */ + BLOCK_STORE_VECTORIZE, + + /** + * \par Overview + * A [blocked arrangement](index.html#sec5sec3) is locally + * transposed and then efficiently written to memory as a [striped arrangement](index.html#sec5sec3). + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items written per thread. + * - The local reordering incurs slightly longer latencies and throughput than the + * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. + */ + BLOCK_STORE_TRANSPOSE, + + /** + * \par Overview + * A [blocked arrangement](index.html#sec5sec3) is locally + * transposed and then efficiently written to memory as a + * [warp-striped arrangement](index.html#sec5sec3) + * + * \par Usage Considerations + * - BLOCK_THREADS must be a multiple of WARP_THREADS + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items written per thread. + * - The local reordering incurs slightly longer latencies and throughput than the + * direct cub::BLOCK_STORE_DIRECT and cub::BLOCK_STORE_VECTORIZE alternatives. + */ + BLOCK_STORE_WARP_TRANSPOSE, + + /** + * \par Overview + * A [blocked arrangement](index.html#sec5sec3) is locally + * transposed and then efficiently written to memory as a + * [warp-striped arrangement](index.html#sec5sec3) + * To reduce the shared memory requirement, only one warp's worth of shared + * memory is provisioned and is subsequently time-sliced among warps. + * + * \par Usage Considerations + * - BLOCK_THREADS must be a multiple of WARP_THREADS + * + * \par Performance Considerations + * - The utilization of memory transactions (coalescing) remains high regardless + * of items written per thread. + * - Provisions less shared memory temporary storage, but incurs larger + * latencies than the BLOCK_STORE_WARP_TRANSPOSE alternative. + */ + BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, + +}; + + +/** + * \brief The BlockStore class provides [collective](index.html#sec0) data movement methods for writing a [blocked arrangement](index.html#sec5sec3) of items partitioned across a CUDA thread block to a linear segment of memory. ![](block_store_logo.png) + * \ingroup BlockModule + * \ingroup UtilIo + * + * \tparam T The type of data to be written. + * \tparam BLOCK_DIM_X The thread block length in threads along the X dimension + * \tparam ITEMS_PER_THREAD The number of consecutive items partitioned onto each thread. + * \tparam ALGORITHM [optional] cub::BlockStoreAlgorithm tuning policy enumeration. default: cub::BLOCK_STORE_DIRECT. + * \tparam WARP_TIME_SLICING [optional] Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage). (default: false) + * \tparam BLOCK_DIM_Y [optional] The thread block length in threads along the Y dimension (default: 1) + * \tparam BLOCK_DIM_Z [optional] The thread block length in threads along the Z dimension (default: 1) + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - The BlockStore class provides a single data movement abstraction that can be specialized + * to implement different cub::BlockStoreAlgorithm strategies. This facilitates different + * performance policies for different architectures, data types, granularity sizes, etc. + * - BlockStore can be optionally specialized by different data movement strategies: + * -# cub::BLOCK_STORE_DIRECT. A [blocked arrangement](index.html#sec5sec3) of data is written + * directly to memory. [More...](\ref cub::BlockStoreAlgorithm) + * -# cub::BLOCK_STORE_VECTORIZE. A [blocked arrangement](index.html#sec5sec3) + * of data is written directly to memory using CUDA's built-in vectorized stores as a + * coalescing optimization. [More...](\ref cub::BlockStoreAlgorithm) + * -# cub::BLOCK_STORE_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) + * is locally transposed into a [striped arrangement](index.html#sec5sec3) which is + * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) + * -# cub::BLOCK_STORE_WARP_TRANSPOSE. A [blocked arrangement](index.html#sec5sec3) + * is locally transposed into a [warp-striped arrangement](index.html#sec5sec3) which is + * then written to memory. [More...](\ref cub::BlockStoreAlgorithm) + * - \rowmajor + * + * \par A Simple Example + * \blockcollective{BlockStore} + * \par + * The code snippet below illustrates the storing of a "blocked" arrangement + * of 512 integers across 128 threads (where each thread owns 4 consecutive items) + * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, + * meaning items are locally reordered among threads so that memory references will be + * efficiently coalesced using a warp-striped access pattern. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockStore BlockStore; + * + * // Allocate shared memory for BlockStore + * __shared__ typename BlockStore::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Store items to linear memory + * int thread_data[4]; + * BlockStore(temp_storage).Store(d_data, thread_data); + * + * \endcode + * \par + * Suppose the set of \p thread_data across the block of threads is + * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... + * + */ +template < + typename T, + int BLOCK_DIM_X, + int ITEMS_PER_THREAD, + BlockStoreAlgorithm ALGORITHM = BLOCK_STORE_DIRECT, + int BLOCK_DIM_Y = 1, + int BLOCK_DIM_Z = 1, + int PTX_ARCH = CUB_PTX_ARCH> +class BlockStore +{ +private: + /****************************************************************************** + * Constants and typed definitions + ******************************************************************************/ + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + + /****************************************************************************** + * Algorithmic variants + ******************************************************************************/ + + /// Store helper + template + struct StoreInternal; + + + /** + * BLOCK_STORE_DIRECT specialization of store helper + */ + template + struct StoreInternal + { + /// Shared memory storage layout type + typedef NullType TempStorage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ StoreInternal( + TempStorage &/*temp_storage*/, + int linear_tid) + : + linear_tid(linear_tid) + {} + + /// Store items into a linear segment of memory + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + StoreDirectBlocked(linear_tid, block_itr, items); + } + + /// Store items into a linear segment of memory, guarded by range + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + StoreDirectBlocked(linear_tid, block_itr, items, valid_items); + } + }; + + + /** + * BLOCK_STORE_VECTORIZE specialization of store helper + */ + template + struct StoreInternal + { + /// Shared memory storage layout type + typedef NullType TempStorage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ StoreInternal( + TempStorage &/*temp_storage*/, + int linear_tid) + : + linear_tid(linear_tid) + {} + + /// Store items into a linear segment of memory, specialized for native pointer types (attempts vectorization) + __device__ __forceinline__ void Store( + T *block_ptr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + StoreDirectBlockedVectorized(linear_tid, block_ptr, items); + } + + /// Store items into a linear segment of memory, specialized for opaque input iterators (skips vectorization) + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + StoreDirectBlocked(linear_tid, block_itr, items); + } + + /// Store items into a linear segment of memory, guarded by range + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + StoreDirectBlocked(linear_tid, block_itr, items, valid_items); + } + }; + + + /** + * BLOCK_STORE_TRANSPOSE specialization of store helper + */ + template + struct StoreInternal + { + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + { + /// Temporary storage for partially-full block guard + volatile int valid_items; + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ StoreInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Store items into a linear segment of memory + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + BlockExchange(temp_storage).BlockedToStriped(items); + StoreDirectStriped(linear_tid, block_itr, items); + } + + /// Store items into a linear segment of memory, guarded by range + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + BlockExchange(temp_storage).BlockedToStriped(items); + if (linear_tid == 0) + temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads + CTA_SYNC(); + StoreDirectStriped(linear_tid, block_itr, items, temp_storage.valid_items); + } + }; + + + /** + * BLOCK_STORE_WARP_TRANSPOSE specialization of store helper + */ + template + struct StoreInternal + { + enum + { + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) + }; + + // Assert BLOCK_THREADS must be a multiple of WARP_THREADS + CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); + + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + { + /// Temporary storage for partially-full block guard + volatile int valid_items; + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ StoreInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Store items into a linear segment of memory + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + BlockExchange(temp_storage).BlockedToWarpStriped(items); + StoreDirectWarpStriped(linear_tid, block_itr, items); + } + + /// Store items into a linear segment of memory, guarded by range + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + BlockExchange(temp_storage).BlockedToWarpStriped(items); + if (linear_tid == 0) + temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads + CTA_SYNC(); + StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); + } + }; + + + /** + * BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED specialization of store helper + */ + template + struct StoreInternal + { + enum + { + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH) + }; + + // Assert BLOCK_THREADS must be a multiple of WARP_THREADS + CUB_STATIC_ASSERT((BLOCK_THREADS % WARP_THREADS == 0), "BLOCK_THREADS must be a multiple of WARP_THREADS"); + + // BlockExchange utility type for keys + typedef BlockExchange BlockExchange; + + /// Shared memory storage layout type + struct _TempStorage : BlockExchange::TempStorage + { + /// Temporary storage for partially-full block guard + volatile int valid_items; + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + + /// Constructor + __device__ __forceinline__ StoreInternal( + TempStorage &temp_storage, + int linear_tid) + : + temp_storage(temp_storage.Alias()), + linear_tid(linear_tid) + {} + + /// Store items into a linear segment of memory + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + BlockExchange(temp_storage).BlockedToWarpStriped(items); + StoreDirectWarpStriped(linear_tid, block_itr, items); + } + + /// Store items into a linear segment of memory, guarded by range + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + BlockExchange(temp_storage).BlockedToWarpStriped(items); + if (linear_tid == 0) + temp_storage.valid_items = valid_items; // Move through volatile smem as a workaround to prevent RF spilling on subsequent loads + CTA_SYNC(); + StoreDirectWarpStriped(linear_tid, block_itr, items, temp_storage.valid_items); + } + }; + + /****************************************************************************** + * Type definitions + ******************************************************************************/ + + /// Internal load implementation to use + typedef StoreInternal InternalStore; + + + /// Shared memory storage layout type + typedef typename InternalStore::TempStorage _TempStorage; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Internal storage allocator + __device__ __forceinline__ _TempStorage& PrivateStorage() + { + __shared__ _TempStorage private_storage; + return private_storage; + } + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Thread reference to shared storage + _TempStorage &temp_storage; + + /// Linear thread-id + int linear_tid; + +public: + + + /// \smemstorage{BlockStore} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using a private static allocation of shared memory as temporary storage. + */ + __device__ __forceinline__ BlockStore() + : + temp_storage(PrivateStorage()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. + */ + __device__ __forceinline__ BlockStore( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //@} end member group + /******************************************************************//** + * \name Data movement + *********************************************************************/ + //@{ + + + /** + * \brief Store items into a linear segment of memory. + * + * \par + * - \blocked + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the storing of a "blocked" arrangement + * of 512 integers across 128 threads (where each thread owns 4 consecutive items) + * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, + * meaning items are locally reordered among threads so that memory references will be + * efficiently coalesced using a warp-striped access pattern. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, ...) + * { + * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockStore BlockStore; + * + * // Allocate shared memory for BlockStore + * __shared__ typename BlockStore::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Store items to linear memory + * int thread_data[4]; + * BlockStore(temp_storage).Store(d_data, thread_data); + * + * \endcode + * \par + * Suppose the set of \p thread_data across the block of threads is + * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] }. + * The output \p d_data will be 0, 1, 2, 3, 4, 5, .... + * + */ + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD]) ///< [in] Data to store + { + InternalStore(temp_storage, linear_tid).Store(block_itr, items); + } + + /** + * \brief Store items into a linear segment of memory, guarded by range. + * + * \par + * - \blocked + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the guarded storing of a "blocked" arrangement + * of 512 integers across 128 threads (where each thread owns 4 consecutive items) + * into a linear segment of memory. The store is specialized for \p BLOCK_STORE_WARP_TRANSPOSE, + * meaning items are locally reordered among threads so that memory references will be + * efficiently coalesced using a warp-striped access pattern. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(int *d_data, int valid_items, ...) + * { + * // Specialize BlockStore for a 1D block of 128 threads owning 4 integer items each + * typedef cub::BlockStore BlockStore; + * + * // Allocate shared memory for BlockStore + * __shared__ typename BlockStore::TempStorage temp_storage; + * + * // Obtain a segment of consecutive items that are blocked across threads + * int thread_data[4]; + * ... + * + * // Store items to linear memory + * int thread_data[4]; + * BlockStore(temp_storage).Store(d_data, thread_data, valid_items); + * + * \endcode + * \par + * Suppose the set of \p thread_data across the block of threads is + * { [0,1,2,3], [4,5,6,7], ..., [508,509,510,511] } and \p valid_items is \p 5. + * The output \p d_data will be 0, 1, 2, 3, 4, ?, ?, ?, ..., with + * only the first two threads being unmasked to store portions of valid data. + * + */ + template + __device__ __forceinline__ void Store( + OutputIteratorT block_itr, ///< [in] The thread block's base output iterator for storing to + T (&items)[ITEMS_PER_THREAD], ///< [in] Data to store + int valid_items) ///< [in] Number of valid items to write + { + InternalStore(temp_storage, linear_tid).Store(block_itr, items, valid_items); + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_atomic.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_atomic.cuh new file mode 100644 index 0000000..29db0df --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_atomic.cuh @@ -0,0 +1,82 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief The BlockHistogramAtomic class provides atomic-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + */ +template +struct BlockHistogramAtomic +{ + /// Shared memory storage layout type + struct TempStorage {}; + + + /// Constructor + __device__ __forceinline__ BlockHistogramAtomic( + TempStorage &temp_storage) + {} + + + /// Composite data onto an existing histogram + template < + typename T, + typename CounterT, + int ITEMS_PER_THREAD> + __device__ __forceinline__ void Composite( + T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram + CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram + { + // Update histogram + #pragma unroll + for (int i = 0; i < ITEMS_PER_THREAD; ++i) + { + atomicAdd(histogram + items[i], 1); + } + } + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_sort.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_sort.cuh new file mode 100644 index 0000000..9ef417a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_histogram_sort.cuh @@ -0,0 +1,226 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + */ + +#pragma once + +#include "../../block/block_radix_sort.cuh" +#include "../../block/block_discontinuity.cuh" +#include "../../util_ptx.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + + +/** + * \brief The BlockHistogramSort class provides sorting-based methods for constructing block-wide histograms from data samples partitioned across a CUDA thread block. + */ +template < + typename T, ///< Sample type + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int ITEMS_PER_THREAD, ///< The number of samples per thread + int BINS, ///< The number of bins into which histogram samples may fall + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockHistogramSort +{ + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + // Parameterize BlockRadixSort type for our thread block + typedef BlockRadixSort< + T, + BLOCK_DIM_X, + ITEMS_PER_THREAD, + NullType, + 4, + (PTX_ARCH >= 350) ? true : false, + BLOCK_SCAN_WARP_SCANS, + cudaSharedMemBankSizeFourByte, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + BlockRadixSortT; + + // Parameterize BlockDiscontinuity type for our thread block + typedef BlockDiscontinuity< + T, + BLOCK_DIM_X, + BLOCK_DIM_Y, + BLOCK_DIM_Z, + PTX_ARCH> + BlockDiscontinuityT; + + /// Shared memory + union _TempStorage + { + // Storage for sorting bin values + typename BlockRadixSortT::TempStorage sort; + + struct + { + // Storage for detecting discontinuities in the tile of sorted bin values + typename BlockDiscontinuityT::TempStorage flag; + + // Storage for noting begin/end offsets of bin runs in the tile of sorted bin values + unsigned int run_begin[BINS]; + unsigned int run_end[BINS]; + }; + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + + + /// Constructor + __device__ __forceinline__ BlockHistogramSort( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + // Discontinuity functor + struct DiscontinuityOp + { + // Reference to temp_storage + _TempStorage &temp_storage; + + // Constructor + __device__ __forceinline__ DiscontinuityOp(_TempStorage &temp_storage) : + temp_storage(temp_storage) + {} + + // Discontinuity predicate + __device__ __forceinline__ bool operator()(const T &a, const T &b, int b_index) + { + if (a != b) + { + // Note the begin/end offsets in shared storage + temp_storage.run_begin[b] = b_index; + temp_storage.run_end[a] = b_index; + + return true; + } + else + { + return false; + } + } + }; + + + // Composite data onto an existing histogram + template < + typename CounterT > + __device__ __forceinline__ void Composite( + T (&items)[ITEMS_PER_THREAD], ///< [in] Calling thread's input values to histogram + CounterT histogram[BINS]) ///< [out] Reference to shared/device-accessible memory histogram + { + enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; + + // Sort bytes in blocked arrangement + BlockRadixSortT(temp_storage.sort).Sort(items); + + CTA_SYNC(); + + // Initialize the shared memory's run_begin and run_end for each bin + int histo_offset = 0; + + #pragma unroll + for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) + { + temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; + temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; + } + // Finish up with guarded initialization if necessary + if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) + { + temp_storage.run_begin[histo_offset + linear_tid] = TILE_SIZE; + temp_storage.run_end[histo_offset + linear_tid] = TILE_SIZE; + } + + CTA_SYNC(); + + int flags[ITEMS_PER_THREAD]; // unused + + // Compute head flags to demarcate contiguous runs of the same bin in the sorted tile + DiscontinuityOp flag_op(temp_storage); + BlockDiscontinuityT(temp_storage.flag).FlagHeads(flags, items, flag_op); + + // Update begin for first item + if (linear_tid == 0) temp_storage.run_begin[items[0]] = 0; + + CTA_SYNC(); + + // Composite into histogram + histo_offset = 0; + + #pragma unroll + for(; histo_offset + BLOCK_THREADS <= BINS; histo_offset += BLOCK_THREADS) + { + int thread_offset = histo_offset + linear_tid; + CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; + histogram[thread_offset] += count; + } + + // Finish up with guarded composition if necessary + if ((BINS % BLOCK_THREADS != 0) && (histo_offset + linear_tid < BINS)) + { + int thread_offset = histo_offset + linear_tid; + CounterT count = temp_storage.run_end[thread_offset] - temp_storage.run_begin[thread_offset]; + histogram[thread_offset] += count; + } + } + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking.cuh new file mode 100644 index 0000000..aff97fc --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking.cuh @@ -0,0 +1,226 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. + */ + +#pragma once + +#include "../../block/block_raking_layout.cuh" +#include "../../warp/warp_reduce.cuh" +#include "../../thread/thread_reduce.cuh" +#include "../../util_ptx.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief BlockReduceRaking provides raking-based methods of parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. + * + * Supports non-commutative binary reduction operators. Unlike commutative + * reduction operators (e.g., addition), the application of a non-commutative + * reduction operator (e.g, string concatenation) across a sequence of inputs must + * honor the relative ordering of items and partial reductions when applying the + * reduction operator. + * + * Compared to the implementation of BlockReduceRaking (which does not support + * non-commutative operators), this implementation requires a few extra + * rounds of inter-thread communication. + */ +template < + typename T, ///< Data type being reduced + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockReduceRaking +{ + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + /// Layout type for padded thread block raking grid + typedef BlockRakingLayout BlockRakingLayout; + + /// WarpReduce utility type + typedef typename WarpReduce::InternalWarpReduce WarpReduce; + + /// Constants + enum + { + /// Number of raking threads + RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, + + /// Number of raking elements per warp synchronous raking thread + SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, + + /// Cooperative work can be entirely warp synchronous + WARP_SYNCHRONOUS = (RAKING_THREADS == BLOCK_THREADS), + + /// Whether or not warp-synchronous reduction should be unguarded (i.e., the warp-reduction elements is a power of two + WARP_SYNCHRONOUS_UNGUARDED = PowerOfTwo::VALUE, + + /// Whether or not accesses into smem are unguarded + RAKING_UNGUARDED = BlockRakingLayout::UNGUARDED, + + }; + + + /// Shared memory storage layout type + union _TempStorage + { + typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction + typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + + + /// Constructor + __device__ __forceinline__ BlockReduceRaking( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + template + __device__ __forceinline__ T RakingReduction( + ReductionOp reduction_op, ///< [in] Binary scan operator + T *raking_segment, + T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items + int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + Int2Type /*iteration*/) + { + // Update partial if addend is in range + if ((IS_FULL_TILE && RAKING_UNGUARDED) || ((linear_tid * SEGMENT_LENGTH) + ITERATION < num_valid)) + { + T addend = raking_segment[ITERATION]; + partial = reduction_op(partial, addend); + } + return RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type()); + } + + template + __device__ __forceinline__ T RakingReduction( + ReductionOp /*reduction_op*/, ///< [in] Binary scan operator + T * /*raking_segment*/, + T partial, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items + int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + Int2Type /*iteration*/) + { + return partial; + } + + + + /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template < + bool IS_FULL_TILE, + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T partial, ///< [in] Calling thread's input partial reductions + int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp synchronous reduction (unguarded if active threads is a power-of-two) + partial = WarpReduce(temp_storage.warp_storage).template Reduce( + partial, + num_valid, + reduction_op); + } + else + { + // Place partial into shared memory grid. + *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid) = partial; + + CTA_SYNC(); + + // Reduce parallelism to one warp + if (linear_tid < RAKING_THREADS) + { + // Raking reduction in grid + T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + partial = raking_segment[0]; + + partial = RakingReduction(reduction_op, raking_segment, partial, num_valid, Int2Type<1>()); + + int valid_raking_threads = (IS_FULL_TILE) ? + RAKING_THREADS : + (num_valid + SEGMENT_LENGTH - 1) / SEGMENT_LENGTH; + + partial = WarpReduce(temp_storage.warp_storage).template Reduce( + partial, + valid_raking_threads, + reduction_op); + + } + } + + return partial; + } + + + /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template + __device__ __forceinline__ T Sum( + T partial, ///< [in] Calling thread's input partial reductions + int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + { + cub::Sum reduction_op; + + return Reduce(partial, num_valid, reduction_op); + } + + + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking_commutative_only.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking_commutative_only.cuh new file mode 100644 index 0000000..454fdaf --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_raking_commutative_only.cuh @@ -0,0 +1,199 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. + */ + +#pragma once + +#include "block_reduce_raking.cuh" +#include "../../warp/warp_reduce.cuh" +#include "../../thread/thread_reduce.cuh" +#include "../../util_ptx.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief BlockReduceRakingCommutativeOnly provides raking-based methods of parallel reduction across a CUDA thread block. Does not support non-commutative reduction operators. Does not support block sizes that are not a multiple of the warp size. + */ +template < + typename T, ///< Data type being reduced + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockReduceRakingCommutativeOnly +{ + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + // The fall-back implementation to use when BLOCK_THREADS is not a multiple of the warp size or not all threads have valid values + typedef BlockReduceRaking FallBack; + + /// Constants + enum + { + /// Number of warp threads + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + + /// Whether or not to use fall-back + USE_FALLBACK = ((BLOCK_THREADS % WARP_THREADS != 0) || (BLOCK_THREADS <= WARP_THREADS)), + + /// Number of raking threads + RAKING_THREADS = WARP_THREADS, + + /// Number of threads actually sharing items with the raking threads + SHARING_THREADS = CUB_MAX(1, BLOCK_THREADS - RAKING_THREADS), + + /// Number of raking elements per warp synchronous raking thread + SEGMENT_LENGTH = SHARING_THREADS / WARP_THREADS, + }; + + /// WarpReduce utility type + typedef WarpReduce WarpReduce; + + /// Layout type for padded thread block raking grid + typedef BlockRakingLayout BlockRakingLayout; + + /// Shared memory storage layout type + union _TempStorage + { + struct + { + typename WarpReduce::TempStorage warp_storage; ///< Storage for warp-synchronous reduction + typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid + }; + typename FallBack::TempStorage fallback_storage; ///< Fall-back storage for non-commutative block scan + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + + + /// Constructor + __device__ __forceinline__ BlockReduceRakingCommutativeOnly( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template + __device__ __forceinline__ T Sum( + T partial, ///< [in] Calling thread's input partial reductions + int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + { + if (USE_FALLBACK || !FULL_TILE) + { + return FallBack(temp_storage.fallback_storage).template Sum(partial, num_valid); + } + else + { + // Place partial into shared memory grid + if (linear_tid >= RAKING_THREADS) + *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; + + CTA_SYNC(); + + // Reduce parallelism to one warp + if (linear_tid < RAKING_THREADS) + { + // Raking reduction in grid + T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + partial = internal::ThreadReduce(raking_segment, cub::Sum(), partial); + + // Warpscan + partial = WarpReduce(temp_storage.warp_storage).Sum(partial); + } + } + + return partial; + } + + + /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template < + bool FULL_TILE, + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T partial, ///< [in] Calling thread's input partial reductions + int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + if (USE_FALLBACK || !FULL_TILE) + { + return FallBack(temp_storage.fallback_storage).template Reduce(partial, num_valid, reduction_op); + } + else + { + // Place partial into shared memory grid + if (linear_tid >= RAKING_THREADS) + *BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid - RAKING_THREADS) = partial; + + CTA_SYNC(); + + // Reduce parallelism to one warp + if (linear_tid < RAKING_THREADS) + { + // Raking reduction in grid + T *raking_segment = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + partial = internal::ThreadReduce(raking_segment, reduction_op, partial); + + // Warpscan + partial = WarpReduce(temp_storage.warp_storage).Reduce(partial, reduction_op); + } + } + + return partial; + } + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_warp_reductions.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_warp_reductions.cuh new file mode 100644 index 0000000..10ba303 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_reduce_warp_reductions.cuh @@ -0,0 +1,218 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. + */ + +#pragma once + +#include "../../warp/warp_reduce.cuh" +#include "../../util_ptx.cuh" +#include "../../util_arch.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief BlockReduceWarpReductions provides variants of warp-reduction-based parallel reduction across a CUDA thread block. Supports non-commutative reduction operators. + */ +template < + typename T, ///< Data type being reduced + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockReduceWarpReductions +{ + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + /// Number of warp threads + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + + /// Number of active warps + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + + /// The logical warp size for warp reductions + LOGICAL_WARP_SIZE = CUB_MIN(BLOCK_THREADS, WARP_THREADS), + + /// Whether or not the logical warp size evenly divides the thread block size + EVEN_WARP_MULTIPLE = (BLOCK_THREADS % LOGICAL_WARP_SIZE == 0) + }; + + + /// WarpReduce utility type + typedef typename WarpReduce::InternalWarpReduce WarpReduce; + + + /// Shared memory storage layout type + struct _TempStorage + { + typename WarpReduce::TempStorage warp_reduce[WARPS]; ///< Buffer for warp-synchronous scan + T warp_aggregates[WARPS]; ///< Shared totals from each warp-synchronous scan + T block_prefix; ///< Shared prefix for the entire thread block + }; + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + // Thread fields + _TempStorage &temp_storage; + int linear_tid; + int warp_id; + int lane_id; + + + /// Constructor + __device__ __forceinline__ BlockReduceWarpReductions( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), + lane_id(LaneId()) + {} + + + template + __device__ __forceinline__ T ApplyWarpAggregates( + ReductionOp reduction_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items + int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + Int2Type /*successor_warp*/) + { + if (FULL_TILE || (SUCCESSOR_WARP * LOGICAL_WARP_SIZE < num_valid)) + { + T addend = temp_storage.warp_aggregates[SUCCESSOR_WARP]; + warp_aggregate = reduction_op(warp_aggregate, addend); + } + return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type()); + } + + template + __device__ __forceinline__ T ApplyWarpAggregates( + ReductionOp /*reduction_op*/, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items + int /*num_valid*/, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + Int2Type /*successor_warp*/) + { + return warp_aggregate; + } + + + /// Returns block-wide aggregate in thread0. + template < + bool FULL_TILE, + typename ReductionOp> + __device__ __forceinline__ T ApplyWarpAggregates( + ReductionOp reduction_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [lane0 only] Warp-wide aggregate reduction of input items + int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + { + // Share lane aggregates + if (lane_id == 0) + { + temp_storage.warp_aggregates[warp_id] = warp_aggregate; + } + + CTA_SYNC(); + + // Update total aggregate in warp 0, lane 0 + if (linear_tid == 0) + { + warp_aggregate = ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid, Int2Type<1>()); + } + + return warp_aggregate; + } + + + /// Computes a thread block-wide reduction using addition (+) as the reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template + __device__ __forceinline__ T Sum( + T input, ///< [in] Calling thread's input partial reductions + int num_valid) ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + { + cub::Sum reduction_op; + int warp_offset = (warp_id * LOGICAL_WARP_SIZE); + int warp_num_valid = ((FULL_TILE && EVEN_WARP_MULTIPLE) || (warp_offset + LOGICAL_WARP_SIZE <= num_valid)) ? + LOGICAL_WARP_SIZE : + num_valid - warp_offset; + + // Warp reduction in every warp + T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE)>( + input, + warp_num_valid, + cub::Sum()); + + // Update outputs and block_aggregate with warp-wide aggregates from lane-0s + return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); + } + + + /// Computes a thread block-wide reduction using the specified reduction operator. The first num_valid threads each contribute one reduction partial. The return value is only valid for thread0. + template < + bool FULL_TILE, + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input partial reductions + int num_valid, ///< [in] Number of valid elements (may be less than BLOCK_THREADS) + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + int warp_offset = warp_id * LOGICAL_WARP_SIZE; + int warp_num_valid = ((FULL_TILE && EVEN_WARP_MULTIPLE) || (warp_offset + LOGICAL_WARP_SIZE <= num_valid)) ? + LOGICAL_WARP_SIZE : + num_valid - warp_offset; + + // Warp reduction in every warp + T warp_aggregate = WarpReduce(temp_storage.warp_reduce[warp_id]).template Reduce<(FULL_TILE && EVEN_WARP_MULTIPLE)>( + input, + warp_num_valid, + reduction_op); + + // Update outputs and block_aggregate with warp-wide aggregates from lane-0s + return ApplyWarpAggregates(reduction_op, warp_aggregate, num_valid); + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_raking.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_raking.cuh new file mode 100644 index 0000000..a855cda --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_raking.cuh @@ -0,0 +1,666 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + + +/** + * \file + * cub::BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. + */ + +#pragma once + +#include "../../util_ptx.cuh" +#include "../../util_arch.cuh" +#include "../../block/block_raking_layout.cuh" +#include "../../thread/thread_reduce.cuh" +#include "../../thread/thread_scan.cuh" +#include "../../warp/warp_scan.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief BlockScanRaking provides variants of raking-based parallel prefix scan across a CUDA thread block. + */ +template < + typename T, ///< Data type being scanned + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + bool MEMOIZE, ///< Whether or not to buffer outer raking scan partials to incur fewer shared memory reads at the expense of higher register pressure + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockScanRaking +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + }; + + /// Layout type for padded thread block raking grid + typedef BlockRakingLayout BlockRakingLayout; + + /// Constants + enum + { + /// Number of raking threads + RAKING_THREADS = BlockRakingLayout::RAKING_THREADS, + + /// Number of raking elements per warp synchronous raking thread + SEGMENT_LENGTH = BlockRakingLayout::SEGMENT_LENGTH, + + /// Cooperative work can be entirely warp synchronous + WARP_SYNCHRONOUS = (BLOCK_THREADS == RAKING_THREADS), + }; + + /// WarpScan utility type + typedef WarpScan WarpScan; + + /// Shared memory storage layout type + struct _TempStorage + { + typename WarpScan::TempStorage warp_scan; ///< Buffer for warp-synchronous scan + typename BlockRakingLayout::TempStorage raking_grid; ///< Padded thread block raking grid + T block_aggregate; ///< Block aggregate + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + T cached_segment[SEGMENT_LENGTH]; + + + //--------------------------------------------------------------------- + // Utility methods + //--------------------------------------------------------------------- + + /// Templated reduction + template + __device__ __forceinline__ T GuardedReduce( + T* raking_ptr, ///< [in] Input array + ScanOp scan_op, ///< [in] Binary reduction operator + T raking_partial, ///< [in] Prefix to seed reduction with + Int2Type /*iteration*/) + { + if ((BlockRakingLayout::UNGUARDED) || (((linear_tid * SEGMENT_LENGTH) + ITERATION) < BLOCK_THREADS)) + { + T addend = raking_ptr[ITERATION]; + raking_partial = scan_op(raking_partial, addend); + } + + return GuardedReduce(raking_ptr, scan_op, raking_partial, Int2Type()); + } + + + /// Templated reduction (base case) + template + __device__ __forceinline__ T GuardedReduce( + T* /*raking_ptr*/, ///< [in] Input array + ScanOp /*scan_op*/, ///< [in] Binary reduction operator + T raking_partial, ///< [in] Prefix to seed reduction with + Int2Type /*iteration*/) + { + return raking_partial; + } + + + /// Templated copy + template + __device__ __forceinline__ void CopySegment( + T* out, ///< [out] Out array + T* in, ///< [in] Input array + Int2Type /*iteration*/) + { + out[ITERATION] = in[ITERATION]; + CopySegment(out, in, Int2Type()); + } + + + /// Templated copy (base case) + __device__ __forceinline__ void CopySegment( + T* /*out*/, ///< [out] Out array + T* /*in*/, ///< [in] Input array + Int2Type /*iteration*/) + {} + + + /// Performs upsweep raking reduction, returning the aggregate + template + __device__ __forceinline__ T Upsweep( + ScanOp scan_op) + { + T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + + // Read data into registers + CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); + + T raking_partial = cached_segment[0]; + + return GuardedReduce(cached_segment, scan_op, raking_partial, Int2Type<1>()); + } + + + /// Performs exclusive downsweep raking scan + template + __device__ __forceinline__ void ExclusiveDownsweep( + ScanOp scan_op, + T raking_partial, + bool apply_prefix = true) + { + T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + + // Read data back into registers + if (!MEMOIZE) + { + CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); + } + + internal::ThreadScanExclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); + + // Write data back to smem + CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); + } + + + /// Performs inclusive downsweep raking scan + template + __device__ __forceinline__ void InclusiveDownsweep( + ScanOp scan_op, + T raking_partial, + bool apply_prefix = true) + { + T *smem_raking_ptr = BlockRakingLayout::RakingPtr(temp_storage.raking_grid, linear_tid); + + // Read data back into registers + if (!MEMOIZE) + { + CopySegment(cached_segment, smem_raking_ptr, Int2Type<0>()); + } + + internal::ThreadScanInclusive(cached_segment, cached_segment, scan_op, raking_partial, apply_prefix); + + // Write data back to smem + CopySegment(smem_raking_ptr, cached_segment, Int2Type<0>()); + } + + + //--------------------------------------------------------------------- + // Constructors + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ BlockScanRaking( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)) + {} + + + //--------------------------------------------------------------------- + // Exclusive scans + //--------------------------------------------------------------------- + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).ExclusiveScan(input, exclusive_output, scan_op); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Warp-synchronous scan + T exclusive_partial; + WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); + + // Exclusive raking downsweep scan + ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + exclusive_output = *placement_ptr; + } + } + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Exclusive Warp-synchronous scan + T exclusive_partial; + WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op); + + // Exclusive raking downsweep scan + ExclusiveDownsweep(scan_op, exclusive_partial); + } + + CTA_SYNC(); + + // Grab exclusive partial from shared memory + output = *placement_ptr; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, scan_op, block_aggregate); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial= Upsweep(scan_op); + + // Warp-synchronous scan + T inclusive_partial; + T exclusive_partial; + WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); + + // Exclusive raking downsweep scan + ExclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); + + // Broadcast aggregate to all threads + if (linear_tid == RAKING_THREADS - 1) + temp_storage.block_aggregate = inclusive_partial; + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + output = *placement_ptr; + + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).ExclusiveScan(input, output, initial_value, scan_op, block_aggregate); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Warp-synchronous scan + T exclusive_partial; + WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, initial_value, scan_op, block_aggregate); + + // Exclusive raking downsweep scan + ExclusiveDownsweep(scan_op, exclusive_partial); + + // Broadcast aggregate to other threads + if (linear_tid == 0) + temp_storage.block_aggregate = block_aggregate; + } + + CTA_SYNC(); + + // Grab exclusive partial from shared memory + output = *placement_ptr; + + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + T block_aggregate; + WarpScan warp_scan(temp_storage.warp_scan); + warp_scan.ExclusiveScan(input, output, scan_op, block_aggregate); + + // Obtain warp-wide prefix in lane0, then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = warp_scan.Broadcast(block_prefix, 0); + + output = scan_op(block_prefix, output); + if (linear_tid == 0) + output = block_prefix; + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + WarpScan warp_scan(temp_storage.warp_scan); + + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Warp-synchronous scan + T exclusive_partial, block_aggregate; + warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); + + // Obtain block-wide prefix in lane0, then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = warp_scan.Broadcast(block_prefix, 0); + + // Update prefix with warpscan exclusive partial + T downsweep_prefix = scan_op(block_prefix, exclusive_partial); + if (linear_tid == 0) + downsweep_prefix = block_prefix; + + // Exclusive raking downsweep scan + ExclusiveDownsweep(scan_op, downsweep_prefix); + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + output = *placement_ptr; + } + } + + + //--------------------------------------------------------------------- + // Inclusive scans + //--------------------------------------------------------------------- + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Exclusive Warp-synchronous scan + T exclusive_partial; + WarpScan(temp_storage.warp_scan).ExclusiveScan(upsweep_partial, exclusive_partial, scan_op); + + // Inclusive raking downsweep scan + InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + output = *placement_ptr; + } + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + WarpScan(temp_storage.warp_scan).InclusiveScan(input, output, scan_op, block_aggregate); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Warp-synchronous scan + T inclusive_partial; + T exclusive_partial; + WarpScan(temp_storage.warp_scan).Scan(upsweep_partial, inclusive_partial, exclusive_partial, scan_op); + + // Inclusive raking downsweep scan + InclusiveDownsweep(scan_op, exclusive_partial, (linear_tid != 0)); + + // Broadcast aggregate to all threads + if (linear_tid == RAKING_THREADS - 1) + temp_storage.block_aggregate = inclusive_partial; + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + output = *placement_ptr; + + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + } + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + if (WARP_SYNCHRONOUS) + { + // Short-circuit directly to warp-synchronous scan + T block_aggregate; + WarpScan warp_scan(temp_storage.warp_scan); + warp_scan.InclusiveScan(input, output, scan_op, block_aggregate); + + // Obtain warp-wide prefix in lane0, then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = warp_scan.Broadcast(block_prefix, 0); + + // Update prefix with exclusive warpscan partial + output = scan_op(block_prefix, output); + } + else + { + // Place thread partial into shared memory raking grid + T *placement_ptr = BlockRakingLayout::PlacementPtr(temp_storage.raking_grid, linear_tid); + *placement_ptr = input; + + CTA_SYNC(); + + // Reduce parallelism down to just raking threads + if (linear_tid < RAKING_THREADS) + { + WarpScan warp_scan(temp_storage.warp_scan); + + // Raking upsweep reduction across shared partials + T upsweep_partial = Upsweep(scan_op); + + // Warp-synchronous scan + T exclusive_partial, block_aggregate; + warp_scan.ExclusiveScan(upsweep_partial, exclusive_partial, scan_op, block_aggregate); + + // Obtain block-wide prefix in lane0, then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = warp_scan.Broadcast(block_prefix, 0); + + // Update prefix with warpscan exclusive partial + T downsweep_prefix = scan_op(block_prefix, exclusive_partial); + if (linear_tid == 0) + downsweep_prefix = block_prefix; + + // Inclusive raking downsweep scan + InclusiveDownsweep(scan_op, downsweep_prefix); + } + + CTA_SYNC(); + + // Grab thread prefix from shared memory + output = *placement_ptr; + } + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans.cuh new file mode 100644 index 0000000..85e4d61 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans.cuh @@ -0,0 +1,392 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ + +#pragma once + +#include "../../util_arch.cuh" +#include "../../util_ptx.cuh" +#include "../../warp/warp_scan.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ +template < + typename T, + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockScanWarpScans +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// Constants + enum + { + /// Number of warp threads + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + /// Number of active warps + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + }; + + /// WarpScan utility type + typedef WarpScan WarpScanT; + + /// WarpScan utility type + typedef WarpScan WarpAggregateScan; + + /// Shared memory storage layout type + + struct __align__(32) _TempStorage + { + T warp_aggregates[WARPS]; + typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans + T block_prefix; ///< Shared prefix for the entire thread block + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + unsigned int warp_id; + unsigned int lane_id; + + + //--------------------------------------------------------------------- + // Constructors + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ BlockScanWarpScans( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), + lane_id(LaneId()) + {} + + + //--------------------------------------------------------------------- + // Utility methods + //--------------------------------------------------------------------- + + template + __device__ __forceinline__ void ApplyWarpAggregates( + T &warp_prefix, ///< [out] The calling thread's partial reduction + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items + Int2Type /*addend_warp*/) + { + if (warp_id == WARP) + warp_prefix = block_aggregate; + + T addend = temp_storage.warp_aggregates[WARP]; + block_aggregate = scan_op(block_aggregate, addend); + + ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); + } + + template + __device__ __forceinline__ void ApplyWarpAggregates( + T &/*warp_prefix*/, ///< [out] The calling thread's partial reduction + ScanOp /*scan_op*/, ///< [in] Binary scan operator + T &/*block_aggregate*/, ///< [out] Threadblock-wide aggregate reduction of input items + Int2Type /*addend_warp*/) + {} + + + /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. + template + __device__ __forceinline__ T ComputeWarpPrefix( + ScanOp scan_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Last lane in each warp shares its warp-aggregate + if (lane_id == WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = warp_aggregate; + + CTA_SYNC(); + + // Accumulate block aggregates and save the one that is our warp's prefix + T warp_prefix; + block_aggregate = temp_storage.warp_aggregates[0]; + + // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) + ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); +/* + #pragma unroll + for (int WARP = 1; WARP < WARPS; ++WARP) + { + if (warp_id == WARP) + warp_prefix = block_aggregate; + + T addend = temp_storage.warp_aggregates[WARP]; + block_aggregate = scan_op(block_aggregate, addend); + } +*/ + + return warp_prefix; + } + + + /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. + template + __device__ __forceinline__ T ComputeWarpPrefix( + ScanOp scan_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items + T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items + const T &initial_value) ///< [in] Initial value to seed the exclusive scan + { + T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); + + warp_prefix = scan_op(initial_value, warp_prefix); + + if (warp_id == 0) + warp_prefix = initial_value; + + return warp_prefix; + } + + //--------------------------------------------------------------------- + // Exclusive scans + //--------------------------------------------------------------------- + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. + T block_aggregate; + ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. + T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); + + // Apply warp prefix to our lane's partial + if (warp_id != 0) + { + exclusive_output = scan_op(warp_prefix, exclusive_output); + if (lane_id == 0) + exclusive_output = warp_prefix; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + WarpScanT(temp_storage.warp_scan[warp_id]).Scan(input, inclusive_output, exclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp + T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); + + // Apply warp prefix to our lane's partial + exclusive_output = scan_op(warp_prefix, exclusive_output); + if (lane_id == 0) + exclusive_output = warp_prefix; + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. + T block_aggregate; + ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); + + // Use the first warp to determine the thread block prefix, returning the result in lane0 + if (warp_id == 0) + { + T block_prefix = block_prefix_callback_op(block_aggregate); + if (lane_id == 0) + { + // Share the prefix with all threads + temp_storage.block_prefix = block_prefix; + exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 + } + } + + CTA_SYNC(); + + // Incorporate thread block prefix into outputs + T block_prefix = temp_storage.block_prefix; + if (linear_tid > 0) + { + exclusive_output = scan_op(block_prefix, exclusive_output); + } + } + + + //--------------------------------------------------------------------- + // Inclusive scans + //--------------------------------------------------------------------- + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + InclusiveScan(input, inclusive_output, scan_op, block_aggregate); + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. + T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); + + // Apply warp prefix to our lane's partial + if (warp_id != 0) + { + inclusive_output = scan_op(warp_prefix, inclusive_output); + } + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + T block_aggregate; + InclusiveScan(input, exclusive_output, scan_op, block_aggregate); + + // Use the first warp to determine the thread block prefix, returning the result in lane0 + if (warp_id == 0) + { + T block_prefix = block_prefix_callback_op(block_aggregate); + if (lane_id == 0) + { + // Share the prefix with all threads + temp_storage.block_prefix = block_prefix; + } + } + + CTA_SYNC(); + + // Incorporate thread block prefix into outputs + T block_prefix = temp_storage.block_prefix; + exclusive_output = scan_op(block_prefix, exclusive_output); + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans2.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans2.cuh new file mode 100644 index 0000000..4de7c69 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans2.cuh @@ -0,0 +1,436 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ + +#pragma once + +#include "../../util_arch.cuh" +#include "../../util_ptx.cuh" +#include "../../warp/warp_scan.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ +template < + typename T, + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockScanWarpScans +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// Constants + enum + { + /// Number of warp threads + WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + /// Number of active warps + WARPS = (BLOCK_THREADS + WARP_THREADS - 1) / WARP_THREADS, + }; + + /// WarpScan utility type + typedef WarpScan WarpScanT; + + /// WarpScan utility type + typedef WarpScan WarpAggregateScanT; + + /// Shared memory storage layout type + struct _TempStorage + { + typename WarpAggregateScanT::TempStorage inner_scan[WARPS]; ///< Buffer for warp-synchronous scans + typename WarpScanT::TempStorage warp_scan[WARPS]; ///< Buffer for warp-synchronous scans + T warp_aggregates[WARPS]; + T block_prefix; ///< Shared prefix for the entire thread block + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + unsigned int warp_id; + unsigned int lane_id; + + + //--------------------------------------------------------------------- + // Constructors + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ BlockScanWarpScans( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + warp_id((WARPS == 1) ? 0 : linear_tid / WARP_THREADS), + lane_id(LaneId()) + {} + + + //--------------------------------------------------------------------- + // Utility methods + //--------------------------------------------------------------------- + + template + __device__ __forceinline__ void ApplyWarpAggregates( + T &warp_prefix, ///< [out] The calling thread's partial reduction + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items + Int2Type addend_warp) + { + if (warp_id == WARP) + warp_prefix = block_aggregate; + + T addend = temp_storage.warp_aggregates[WARP]; + block_aggregate = scan_op(block_aggregate, addend); + + ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type()); + } + + template + __device__ __forceinline__ void ApplyWarpAggregates( + T &warp_prefix, ///< [out] The calling thread's partial reduction + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items + Int2Type addend_warp) + {} + + + /// Use the warp-wide aggregates to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. + template + __device__ __forceinline__ T ComputeWarpPrefix( + ScanOp scan_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Last lane in each warp shares its warp-aggregate + if (lane_id == WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = warp_aggregate; + + CTA_SYNC(); + + // Accumulate block aggregates and save the one that is our warp's prefix + T warp_prefix; + block_aggregate = temp_storage.warp_aggregates[0]; + + // Use template unrolling (since the PTX backend can't handle unrolling it for SM1x) + ApplyWarpAggregates(warp_prefix, scan_op, block_aggregate, Int2Type<1>()); +/* + #pragma unroll + for (int WARP = 1; WARP < WARPS; ++WARP) + { + if (warp_id == WARP) + warp_prefix = block_aggregate; + + T addend = temp_storage.warp_aggregates[WARP]; + block_aggregate = scan_op(block_aggregate, addend); + } +*/ + + return warp_prefix; + } + + + /// Use the warp-wide aggregates and initial-value to compute the calling warp's prefix. Also returns block-wide aggregate in all threads. + template + __device__ __forceinline__ T ComputeWarpPrefix( + ScanOp scan_op, ///< [in] Binary scan operator + T warp_aggregate, ///< [in] [laneWARP_THREADS - 1 only] Warp-wide aggregate reduction of input items + T &block_aggregate, ///< [out] Threadblock-wide aggregate reduction of input items + const T &initial_value) ///< [in] Initial value to seed the exclusive scan + { + T warp_prefix = ComputeWarpPrefix(scan_op, warp_aggregate, block_aggregate); + + warp_prefix = scan_op(initial_value, warp_prefix); + + if (warp_id == 0) + warp_prefix = initial_value; + + return warp_prefix; + } + + //--------------------------------------------------------------------- + // Exclusive scans + //--------------------------------------------------------------------- + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. + T block_aggregate; + ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); + + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. +// T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); + +//-------------------------------------------------- + // Last lane in each warp shares its warp-aggregate + if (lane_id == WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + // Get the warp scan partial + T warp_inclusive, warp_prefix; + if (lane_id < WARPS) + { + // Scan the warpscan partials + T warp_val = temp_storage.warp_aggregates[lane_id]; + WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, scan_op); + } + + warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); + block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); +//-------------------------------------------------- + + // Apply warp prefix to our lane's partial + if (warp_id != 0) + { + exclusive_output = scan_op(warp_prefix, exclusive_output); + if (lane_id == 0) + exclusive_output = warp_prefix; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + WarpScanT my_warp_scan(temp_storage.warp_scan[warp_id]); + + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + my_warp_scan.Scan(input, inclusive_output, exclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp +// T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate, initial_value); + +//-------------------------------------------------- + // Last lane in each warp shares its warp-aggregate + if (lane_id == WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + // Get the warp scan partial + T warp_inclusive, warp_prefix; + if (lane_id < WARPS) + { + // Scan the warpscan partials + T warp_val = temp_storage.warp_aggregates[lane_id]; + WarpAggregateScanT(temp_storage.inner_scan[warp_id]).Scan(warp_val, warp_inclusive, warp_prefix, initial_value, scan_op); + } + + warp_prefix = my_warp_scan.Broadcast(warp_prefix, warp_id); + block_aggregate = my_warp_scan.Broadcast(warp_inclusive, WARPS - 1); +//-------------------------------------------------- + + // Apply warp prefix to our lane's partial + exclusive_output = scan_op(warp_prefix, exclusive_output); + if (lane_id == 0) + exclusive_output = warp_prefix; + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. + T block_aggregate; + ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); + + // Use the first warp to determine the thread block prefix, returning the result in lane0 + if (warp_id == 0) + { + T block_prefix = block_prefix_callback_op(block_aggregate); + if (lane_id == 0) + { + // Share the prefix with all threads + temp_storage.block_prefix = block_prefix; + exclusive_output = block_prefix; // The block prefix is the exclusive output for tid0 + } + } + + CTA_SYNC(); + + // Incorporate thread block prefix into outputs + T block_prefix = temp_storage.block_prefix; + if (linear_tid > 0) + { + exclusive_output = scan_op(block_prefix, exclusive_output); + } + } + + + //--------------------------------------------------------------------- + // Inclusive scans + //--------------------------------------------------------------------- + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + InclusiveScan(input, inclusive_output, scan_op, block_aggregate); + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + WarpScanT(temp_storage.warp_scan[warp_id]).InclusiveScan(input, inclusive_output, scan_op); + + // Compute the warp-wide prefix and block-wide aggregate for each warp. Warp prefix for warp0 is invalid. + T warp_prefix = ComputeWarpPrefix(scan_op, inclusive_output, block_aggregate); + + // Apply warp prefix to our lane's partial + if (warp_id != 0) + { + inclusive_output = scan_op(warp_prefix, inclusive_output); + } + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + T block_aggregate; + InclusiveScan(input, exclusive_output, scan_op, block_aggregate); + + // Use the first warp to determine the thread block prefix, returning the result in lane0 + if (warp_id == 0) + { + T block_prefix = block_prefix_callback_op(block_aggregate); + if (lane_id == 0) + { + // Share the prefix with all threads + temp_storage.block_prefix = block_prefix; + } + } + + CTA_SYNC(); + + // Incorporate thread block prefix into outputs + T block_prefix = temp_storage.block_prefix; + exclusive_output = scan_op(block_prefix, exclusive_output); + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans3.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans3.cuh new file mode 100644 index 0000000..147ca4c --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/block/specializations/block_scan_warp_scans3.cuh @@ -0,0 +1,418 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::BlockScanWarpscans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ + +#pragma once + +#include "../../util_arch.cuh" +#include "../../util_ptx.cuh" +#include "../../warp/warp_scan.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief BlockScanWarpScans provides warpscan-based variants of parallel prefix scan across a CUDA thread block. + */ +template < + typename T, + int BLOCK_DIM_X, ///< The thread block length in threads along the X dimension + int BLOCK_DIM_Y, ///< The thread block length in threads along the Y dimension + int BLOCK_DIM_Z, ///< The thread block length in threads along the Z dimension + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct BlockScanWarpScans +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// Constants + enum + { + /// The thread block size in threads + BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z, + + /// Number of warp threads + INNER_WARP_THREADS = CUB_WARP_THREADS(PTX_ARCH), + OUTER_WARP_THREADS = BLOCK_THREADS / INNER_WARP_THREADS, + + /// Number of outer scan warps + OUTER_WARPS = INNER_WARP_THREADS + }; + + /// Outer WarpScan utility type + typedef WarpScan OuterWarpScanT; + + /// Inner WarpScan utility type + typedef WarpScan InnerWarpScanT; + + typedef typename OuterWarpScanT::TempStorage OuterScanArray[OUTER_WARPS]; + + + /// Shared memory storage layout type + struct _TempStorage + { + union Aliasable + { + Uninitialized outer_warp_scan; ///< Buffer for warp-synchronous outer scans + typename InnerWarpScanT::TempStorage inner_warp_scan; ///< Buffer for warp-synchronous inner scan + + } aliasable; + + T warp_aggregates[OUTER_WARPS]; + + T block_aggregate; ///< Shared prefix for the entire thread block + }; + + + /// Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Per-thread fields + //--------------------------------------------------------------------- + + // Thread fields + _TempStorage &temp_storage; + unsigned int linear_tid; + unsigned int warp_id; + unsigned int lane_id; + + + //--------------------------------------------------------------------- + // Constructors + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ BlockScanWarpScans( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + linear_tid(RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z)), + warp_id((OUTER_WARPS == 1) ? 0 : linear_tid / OUTER_WARP_THREADS), + lane_id((OUTER_WARPS == 1) ? linear_tid : linear_tid % OUTER_WARP_THREADS) + {} + + + //--------------------------------------------------------------------- + // Exclusive scans + //--------------------------------------------------------------------- + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + // Compute block-wide exclusive scan. The exclusive output from tid0 is invalid. + T block_aggregate; + ExclusiveScan(input, exclusive_output, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + ExclusiveScan(input, exclusive_output, initial_value, scan_op, block_aggregate); + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for thread0 is undefined. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( + input, inclusive_output, exclusive_output, scan_op); + + // Share outer warp total + if (lane_id == OUTER_WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + if (linear_tid < INNER_WARP_THREADS) + { + T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; + T outer_warp_exclusive; + + InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( + outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); + + temp_storage.block_aggregate = block_aggregate; + temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; + } + + CTA_SYNC(); + + if (warp_id != 0) + { + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + + // Apply warp prefix to our lane's partial + T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; + exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); + if (lane_id == 0) + exclusive_output = outer_warp_exclusive; + } + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input items + T &exclusive_output, ///< [out] Calling thread's output items (may be aliased to \p input) + const T &initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( + input, inclusive_output, exclusive_output, scan_op); + + // Share outer warp total + if (lane_id == OUTER_WARP_THREADS - 1) + { + temp_storage.warp_aggregates[warp_id] = inclusive_output; + } + + CTA_SYNC(); + + if (linear_tid < INNER_WARP_THREADS) + { + T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; + T outer_warp_exclusive; + + InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( + outer_warp_input, outer_warp_exclusive, initial_value, scan_op, block_aggregate); + + temp_storage.block_aggregate = block_aggregate; + temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; + } + + CTA_SYNC(); + + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + + // Apply warp prefix to our lane's partial + T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; + exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); + if (lane_id == 0) + exclusive_output = outer_warp_exclusive; + } + + + /// Computes an exclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. The call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item + T &exclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + T inclusive_output; + OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).Scan( + input, inclusive_output, exclusive_output, scan_op); + + // Share outer warp total + if (lane_id == OUTER_WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + if (linear_tid < INNER_WARP_THREADS) + { + InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); + + T upsweep = temp_storage.warp_aggregates[linear_tid]; + T downsweep_prefix, block_aggregate; + + inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); + + // Use callback functor to get block prefix in lane0 and then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = inner_scan.Broadcast(block_prefix, 0); + + downsweep_prefix = scan_op(block_prefix, downsweep_prefix); + if (linear_tid == 0) + downsweep_prefix = block_prefix; + + temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; + } + + CTA_SYNC(); + + // Apply warp prefix to our lane's partial (or assign it if partial is invalid) + T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; + exclusive_output = scan_op(outer_warp_exclusive, exclusive_output); + if (lane_id == 0) + exclusive_output = outer_warp_exclusive; + } + + + //--------------------------------------------------------------------- + // Inclusive scans + //--------------------------------------------------------------------- + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator + { + T block_aggregate; + InclusiveScan(input, inclusive_output, scan_op, block_aggregate); + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. Also provides every thread with the block-wide \p block_aggregate of all inputs. + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T &block_aggregate) ///< [out] Threadblock-wide aggregate reduction of input items + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( + input, inclusive_output, scan_op); + + // Share outer warp total + if (lane_id == OUTER_WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + if (linear_tid < INNER_WARP_THREADS) + { + T outer_warp_input = temp_storage.warp_aggregates[linear_tid]; + T outer_warp_exclusive; + + InnerWarpScanT(temp_storage.aliasable.inner_warp_scan).ExclusiveScan( + outer_warp_input, outer_warp_exclusive, scan_op, block_aggregate); + + temp_storage.block_aggregate = block_aggregate; + temp_storage.warp_aggregates[linear_tid] = outer_warp_exclusive; + } + + CTA_SYNC(); + + if (warp_id != 0) + { + // Retrieve block aggregate + block_aggregate = temp_storage.block_aggregate; + + // Apply warp prefix to our lane's partial + T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; + inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); + } + } + + + /// Computes an inclusive thread block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes one input element. the call-back functor \p block_prefix_callback_op is invoked by the first warp in the block, and the value returned by lane0 in that warp is used as the "seed" value that logically prefixes the thread block's scan inputs. + template < + typename ScanOp, + typename BlockPrefixCallbackOp> + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item + T &inclusive_output, ///< [out] Calling thread's output item (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + BlockPrefixCallbackOp &block_prefix_callback_op) ///< [in-out] [warp0 only] Call-back functor for specifying a thread block-wide prefix to be applied to all inputs. + { + // Compute warp scan in each warp. The exclusive output from each lane0 is invalid. + OuterWarpScanT(temp_storage.aliasable.outer_warp_scan.Alias()[warp_id]).InclusiveScan( + input, inclusive_output, scan_op); + + // Share outer warp total + if (lane_id == OUTER_WARP_THREADS - 1) + temp_storage.warp_aggregates[warp_id] = inclusive_output; + + CTA_SYNC(); + + if (linear_tid < INNER_WARP_THREADS) + { + InnerWarpScanT inner_scan(temp_storage.aliasable.inner_warp_scan); + + T upsweep = temp_storage.warp_aggregates[linear_tid]; + T downsweep_prefix, block_aggregate; + inner_scan.ExclusiveScan(upsweep, downsweep_prefix, scan_op, block_aggregate); + + // Use callback functor to get block prefix in lane0 and then broadcast to other lanes + T block_prefix = block_prefix_callback_op(block_aggregate); + block_prefix = inner_scan.Broadcast(block_prefix, 0); + + downsweep_prefix = scan_op(block_prefix, downsweep_prefix); + if (linear_tid == 0) + downsweep_prefix = block_prefix; + + temp_storage.warp_aggregates[linear_tid] = downsweep_prefix; + } + + CTA_SYNC(); + + // Apply warp prefix to our lane's partial + T outer_warp_exclusive = temp_storage.warp_aggregates[warp_id]; + inclusive_output = scan_op(outer_warp_exclusive, inclusive_output); + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/cub.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/cub.cuh new file mode 100644 index 0000000..3ece0f6 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/cub.cuh @@ -0,0 +1,95 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * CUB umbrella include file + */ + +#pragma once + + +// Block +#include "block/block_histogram.cuh" +#include "block/block_discontinuity.cuh" +#include "block/block_exchange.cuh" +#include "block/block_load.cuh" +#include "block/block_radix_rank.cuh" +#include "block/block_radix_sort.cuh" +#include "block/block_reduce.cuh" +#include "block/block_scan.cuh" +#include "block/block_store.cuh" +//#include "block/block_shift.cuh" + +// Device +#include "device/device_histogram.cuh" +#include "device/device_partition.cuh" +#include "device/device_radix_sort.cuh" +#include "device/device_reduce.cuh" +#include "device/device_run_length_encode.cuh" +#include "device/device_scan.cuh" +#include "device/device_segmented_radix_sort.cuh" +#include "device/device_segmented_reduce.cuh" +#include "device/device_select.cuh" +#include "device/device_spmv.cuh" + +// Grid +//#include "grid/grid_barrier.cuh" +#include "grid/grid_even_share.cuh" +#include "grid/grid_mapping.cuh" +#include "grid/grid_queue.cuh" + +// Thread +#include "thread/thread_load.cuh" +#include "thread/thread_operators.cuh" +#include "thread/thread_reduce.cuh" +#include "thread/thread_scan.cuh" +#include "thread/thread_store.cuh" + +// Warp +#include "warp/warp_reduce.cuh" +#include "warp/warp_scan.cuh" + +// Iterator +#include "iterator/arg_index_input_iterator.cuh" +#include "iterator/cache_modified_input_iterator.cuh" +#include "iterator/cache_modified_output_iterator.cuh" +#include "iterator/constant_input_iterator.cuh" +#include "iterator/counting_input_iterator.cuh" +#include "iterator/tex_obj_input_iterator.cuh" +#include "iterator/tex_ref_input_iterator.cuh" +#include "iterator/transform_input_iterator.cuh" + +// Util +#include "util_arch.cuh" +#include "util_debug.cuh" +#include "util_device.cuh" +#include "util_macro.cuh" +#include "util_ptx.cuh" +#include "util_type.cuh" + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_histogram.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_histogram.cuh new file mode 100644 index 0000000..a2556a6 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_histogram.cuh @@ -0,0 +1,866 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. + */ + +#pragma once + +#include +#include +#include + +#include "dispatch/dispatch_histogram.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. ![](histogram_logo.png) + * \ingroup SingleModule + * + * \par Overview + * A histogram + * counts the number of observations that fall into each of the disjoint categories (known as bins). + * + * \par Usage Considerations + * \cdp_class{DeviceHistogram} + * + */ +struct DeviceHistogram +{ + /******************************************************************//** + * \name Evenly-segmented bin ranges + *********************************************************************/ + //@{ + + /** + * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. + * + * \par + * - The number of histogram bins is (\p num_levels - 1) + * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of a six-bin histogram + * from a sequence of float samples + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples and + * // output histogram + * int num_samples; // e.g., 10 + * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] + * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] + * int num_levels; // e.g., 7 (seven level boundaries for six bins) + * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) + * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, num_samples); + * + * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; + * + * \endcode + * + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t HistogramEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. + CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. + int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. + LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. + LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. + OffsetT num_samples, ///< [in] The number of input samples (i.e., the length of \p d_samples) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + CounterT* d_histogram1[1] = {d_histogram}; + int num_levels1[1] = {num_levels}; + LevelT lower_level1[1] = {lower_level}; + LevelT upper_level1[1] = {upper_level}; + + return MultiHistogramEven<1, 1>( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram1, + num_levels1, + lower_level1, + upper_level1, + num_samples, + 1, + sizeof(SampleT) * num_samples, + stream, + debug_synchronous); + } + + + /** + * \brief Computes an intensity histogram from a sequence of data samples using equal-width bins. + * + * \par + * - A two-dimensional region of interest within \p d_samples can be specified + * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. + * - The row stride must be a whole multiple of the sample data type + * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. + * - The number of histogram bins is (\p num_levels - 1) + * - All bins comprise the same width of sample values: (\p upper_level - \p lower_level) / (\p num_levels - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of a six-bin histogram + * from a 2x5 region of interest within a flattened 2x7 array of float samples. + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples and + * // output histogram + * int num_row_samples; // e.g., 5 + * int num_rows; // e.g., 2; + * size_t row_stride_bytes; // e.g., 7 * sizeof(float) + * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, + * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] + * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] + * int num_levels; // e.g., 7 (seven level boundaries for six bins) + * float lower_level; // e.g., 0.0 (lower sample value boundary of lowest bin) + * float upper_level; // e.g., 12.0 (upper sample value boundary of upper bin) + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, + * num_row_samples, num_rows, row_stride_bytes); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::HistogramEven(d_temp_storage, temp_storage_bytes, d_samples, d_histogram, + * d_samples, d_histogram, num_levels, lower_level, upper_level, + * num_row_samples, num_rows, row_stride_bytes); + * + * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; + * + * \endcode + * + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t HistogramEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. + CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. + int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. + LevelT lower_level, ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin. + LevelT upper_level, ///< [in] The upper sample value bound (exclusive) for the highest histogram bin. + OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + CounterT* d_histogram1[1] = {d_histogram}; + int num_levels1[1] = {num_levels}; + LevelT lower_level1[1] = {lower_level}; + LevelT upper_level1[1] = {upper_level}; + + return MultiHistogramEven<1, 1>( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram1, + num_levels1, + lower_level1, + upper_level1, + num_row_samples, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + + /** + * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. + * + * \par + * - The input is a sequence of pixel structures, where each pixel comprises + * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). + * - Of the \p NUM_CHANNELS specified, the function will only compute histograms + * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA + * pixel samples). + * - The number of histogram bins for channeli is num_levels[i] - 1. + * - For channeli, the range of values for all histogram bins + * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of three 256-bin RGB histograms + * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples + * // and output histograms + * int num_pixels; // e.g., 5 + * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), + * // (0, 6, 7, 5), (3, 0, 2, 6)] + * int* d_histogram[3]; // e.g., three device pointers to three device buffers, + * // each allocated with 256 integer counters + * int num_levels[3]; // e.g., {257, 257, 257}; + * unsigned int lower_level[3]; // e.g., {0, 0, 0}; + * unsigned int upper_level[3]; // e.g., {256, 256, 256}; + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, num_pixels); + * + * // d_histogram <-- [ [1, 0, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], + * // [0, 3, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], + * // [0, 0, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] + * + * \endcode + * + * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t MultiHistogramEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + return MultiHistogramEven( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram, + num_levels, + lower_level, + upper_level, + num_pixels, + 1, + sizeof(SampleT) * NUM_CHANNELS * num_pixels, + stream, + debug_synchronous); + } + + + /** + * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using equal-width bins. + * + * \par + * - The input is a sequence of pixel structures, where each pixel comprises + * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). + * - Of the \p NUM_CHANNELS specified, the function will only compute histograms + * for the first \p NUM_ACTIVE_CHANNELS (e.g., only RGB histograms from RGBA + * pixel samples). + * - A two-dimensional region of interest within \p d_samples can be specified + * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. + * - The row stride must be a whole multiple of the sample data type + * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. + * - The number of histogram bins for channeli is num_levels[i] - 1. + * - For channeli, the range of values for all histogram bins + * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of three 256-bin RGB histograms from a 2x3 region of + * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples + * // and output histograms + * int num_row_pixels; // e.g., 3 + * int num_rows; // e.g., 2 + * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS + * unsigned char* d_samples; // e.g., [(2, 6, 7, 5), (3, 0, 2, 1), (7, 0, 6, 2), (-, -, -, -), + * // (0, 6, 7, 5), (3, 0, 2, 6), (1, 1, 1, 1), (-, -, -, -)] + * int* d_histogram[3]; // e.g., three device pointers to three device buffers, + * // each allocated with 256 integer counters + * int num_levels[3]; // e.g., {257, 257, 257}; + * unsigned int lower_level[3]; // e.g., {0, 0, 0}; + * unsigned int upper_level[3]; // e.g., {256, 256, 256}; + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, + * num_row_pixels, num_rows, row_stride_bytes); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::MultiHistogramEven<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, lower_level, upper_level, + * num_row_pixels, num_rows, row_stride_bytes); + * + * // d_histogram <-- [ [1, 1, 1, 2, 0, 0, 0, 1, 0, 0, 0, ..., 0], + * // [0, 4, 0, 0, 0, 0, 2, 0, 0, 0, 0, ..., 0], + * // [0, 1, 2, 0, 0, 0, 1, 2, 0, 0, 0, ..., 0] ] + * + * \endcode + * + * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t MultiHistogramEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + Int2Type is_byte_sample; + + if ((sizeof(OffsetT) > sizeof(int)) && + ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) + { + // Down-convert OffsetT data type + + + return DipatchHistogram::DispatchEven( + d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, + (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), + stream, debug_synchronous, is_byte_sample); + } + + return DipatchHistogram::DispatchEven( + d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), + stream, debug_synchronous, is_byte_sample); + } + + + //@} end member group + /******************************************************************//** + * \name Custom bin ranges + *********************************************************************/ + //@{ + + /** + * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. + * + * \par + * - The number of histogram bins is (\p num_levels - 1) + * - The value range for bini is [level[i], level[i+1]) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of an six-bin histogram + * from a sequence of float samples + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples and + * // output histogram + * int num_samples; // e.g., 10 + * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, 0.3, 2.9, 2.0, 6.1, 999.5] + * int* d_histogram; // e.g., [ -, -, -, -, -, -, -, -] + * int num_levels // e.g., 7 (seven level boundaries for six bins) + * float* d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_samples); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_samples); + * + * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; + * + * \endcode + * + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t HistogramRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. + CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. + int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. + LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_samples, ///< [in] The number of data samples per row in the region of interest + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + CounterT* d_histogram1[1] = {d_histogram}; + int num_levels1[1] = {num_levels}; + LevelT* d_levels1[1] = {d_levels}; + + return MultiHistogramRange<1, 1>( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram1, + num_levels1, + d_levels1, + num_samples, + 1, + sizeof(SampleT) * num_samples, + stream, + debug_synchronous); + } + + + /** + * \brief Computes an intensity histogram from a sequence of data samples using the specified bin boundary levels. + * + * \par + * - A two-dimensional region of interest within \p d_samples can be specified + * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. + * - The row stride must be a whole multiple of the sample data type + * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. + * - The number of histogram bins is (\p num_levels - 1) + * - The value range for bini is [level[i], level[i+1]) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of a six-bin histogram + * from a 2x5 region of interest within a flattened 2x7 array of float samples. + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples and + * // output histogram + * int num_row_samples; // e.g., 5 + * int num_rows; // e.g., 2; + * int row_stride_bytes; // e.g., 7 * sizeof(float) + * float* d_samples; // e.g., [2.2, 6.0, 7.1, 2.9, 3.5, -, -, + * // 0.3, 2.9, 2.0, 6.1, 999.5, -, -] + * int* d_histogram; // e.g., [ , , , , , , , ] + * int num_levels // e.g., 7 (seven level boundaries for six bins) + * float *d_levels; // e.g., [0.0, 2.0, 4.0, 6.0, 8.0, 12.0, 16.0] + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, + * num_row_samples, num_rows, row_stride_bytes); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::HistogramRange(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, + * num_row_samples, num_rows, row_stride_bytes); + * + * // d_histogram <-- [1, 0, 5, 0, 3, 0, 0, 0]; + * + * \endcode + * + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t HistogramRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of data samples. + CounterT* d_histogram, ///< [out] The pointer to the histogram counter output array of length num_levels - 1. + int num_levels, ///< [in] The number of boundaries (levels) for delineating histogram samples. Implies that the number of bins is num_levels - 1. + LevelT* d_levels, ///< [in] The pointer to the array of boundaries (levels). Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_samples, ///< [in] The number of data samples per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + CounterT* d_histogram1[1] = {d_histogram}; + int num_levels1[1] = {num_levels}; + LevelT* d_levels1[1] = {d_levels}; + + return MultiHistogramRange<1, 1>( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram1, + num_levels1, + d_levels1, + num_row_samples, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + + /** + * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. + * + * \par + * - The input is a sequence of pixel structures, where each pixel comprises + * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). + * - Of the \p NUM_CHANNELS specified, the function will only compute histograms + * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA + * pixel samples). + * - The number of histogram bins for channeli is num_levels[i] - 1. + * - For channeli, the range of values for all histogram bins + * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of three 4-bin RGB histograms + * from a quad-channel sequence of RGBA pixels (8 bits per channel per pixel) + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples + * // and output histograms + * int num_pixels; // e.g., 5 + * unsigned char *d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(7, 0, 6, 2), + * // (0, 6, 7, 5),(3, 0, 2, 6)] + * unsigned int *d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; + * int num_levels[3]; // e.g., {5, 5, 5}; + * unsigned int *d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], + * // [0, 2, 4, 6, 8], + * // [0, 2, 4, 6, 8] ]; + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_pixels); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_pixels); + * + * // d_histogram <-- [ [1, 3, 0, 1], + * // [3, 0, 0, 2], + * // [0, 2, 0, 3] ] + * + * \endcode + * + * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t MultiHistogramRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_pixels, ///< [in] The number of multi-channel pixels (i.e., the length of \p d_samples / NUM_CHANNELS) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + return MultiHistogramRange( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram, + num_levels, + d_levels, + num_pixels, + 1, + sizeof(SampleT) * NUM_CHANNELS * num_pixels, + stream, + debug_synchronous); + } + + + /** + * \brief Computes per-channel intensity histograms from a sequence of multi-channel "pixel" data samples using the specified bin boundary levels. + * + * \par + * - The input is a sequence of pixel structures, where each pixel comprises + * a record of \p NUM_CHANNELS consecutive data samples (e.g., an RGBA pixel). + * - Of the \p NUM_CHANNELS specified, the function will only compute histograms + * for the first \p NUM_ACTIVE_CHANNELS (e.g., RGB histograms from RGBA + * pixel samples). + * - A two-dimensional region of interest within \p d_samples can be specified + * using the \p num_row_samples, num_rows, and \p row_stride_bytes parameters. + * - The row stride must be a whole multiple of the sample data type + * size, i.e., (row_stride_bytes % sizeof(SampleT)) == 0. + * - The number of histogram bins for channeli is num_levels[i] - 1. + * - For channeli, the range of values for all histogram bins + * have the same width: (upper_level[i] - lower_level[i]) / ( num_levels[i] - 1) + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the computation of three 4-bin RGB histograms from a 2x3 region of + * interest of within a flattened 2x4 array of quad-channel RGBA pixels (8 bits per channel per pixel). + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input samples + * // and output histograms + * int num_row_pixels; // e.g., 3 + * int num_rows; // e.g., 2 + * size_t row_stride_bytes; // e.g., 4 * sizeof(unsigned char) * NUM_CHANNELS + * unsigned char* d_samples; // e.g., [(2, 6, 7, 5),(3, 0, 2, 1),(1, 1, 1, 1),(-, -, -, -), + * // (7, 0, 6, 2),(0, 6, 7, 5),(3, 0, 2, 6),(-, -, -, -)] + * int* d_histogram[3]; // e.g., [[ -, -, -, -],[ -, -, -, -],[ -, -, -, -]]; + * int num_levels[3]; // e.g., {5, 5, 5}; + * unsigned int* d_levels[3]; // e.g., [ [0, 2, 4, 6, 8], + * // [0, 2, 4, 6, 8], + * // [0, 2, 4, 6, 8] ]; + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Compute histograms + * cub::DeviceHistogram::MultiHistogramRange<4, 3>(d_temp_storage, temp_storage_bytes, + * d_samples, d_histogram, num_levels, d_levels, num_row_pixels, num_rows, row_stride_bytes); + * + * // d_histogram <-- [ [2, 3, 0, 1], + * // [3, 0, 0, 2], + * // [1, 2, 0, 3] ] + * + * \endcode + * + * \tparam NUM_CHANNELS Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + * \tparam NUM_ACTIVE_CHANNELS [inferred] Number of channels actively being histogrammed + * \tparam SampleIteratorT [inferred] Random-access input iterator type for reading input samples. \iterator + * \tparam CounterT [inferred] Integer type for histogram bin counters + * \tparam LevelT [inferred] Type for specifying boundaries (levels) + * \tparam OffsetT [inferred] Signed integer type for sequence offsets, list lengths, pointer differences, etc. \offset_size1 + */ + template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleIteratorT, + typename CounterT, + typename LevelT, + typename OffsetT> + CUB_RUNTIME_FUNCTION + static cudaError_t MultiHistogramRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histogram[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT* d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + size_t row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + Int2Type is_byte_sample; + + if ((sizeof(OffsetT) > sizeof(int)) && + ((unsigned long long) (num_rows * row_stride_bytes) < (unsigned long long) std::numeric_limits::max())) + { + // Down-convert OffsetT data type + return DipatchHistogram::DispatchRange( + d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, + (int) num_row_pixels, (int) num_rows, (int) (row_stride_bytes / sizeof(SampleT)), + stream, debug_synchronous, is_byte_sample); + } + + return DipatchHistogram::DispatchRange( + d_temp_storage, temp_storage_bytes, d_samples, d_histogram, num_levels, d_levels, + num_row_pixels, num_rows, (OffsetT) (row_stride_bytes / sizeof(SampleT)), + stream, debug_synchronous, is_byte_sample); + } + + + + //@} end member group +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_partition.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_partition.cuh new file mode 100644 index 0000000..5053540 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_partition.cuh @@ -0,0 +1,273 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_select_if.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DevicePartition provides device-wide, parallel operations for partitioning sequences of data items residing within device-accessible memory. ![](partition_logo.png) + * \ingroup SingleModule + * + * \par Overview + * These operations apply a selection criterion to construct a partitioned output sequence from items selected/unselected from + * a specified input sequence. + * + * \par Usage Considerations + * \cdp_class{DevicePartition} + * + * \par Performance + * \linear_performance{partition} + * + * \par + * The following chart illustrates DevicePartition::If + * performance across different CUDA architectures for \p int32 items, + * where 50% of the items are randomly selected for the first partition. + * \plots_below + * + * \image html partition_if_int32_50_percent.png + * + */ +struct DevicePartition +{ + /** + * \brief Uses the \p d_flags sequence to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_flags_logo.png) + * + * \par + * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). + * - Copies of the selected items are compacted into \p d_out and maintain their original + * relative ordering, however copies of the unselected items are compacted into the + * rear of \p d_out in reverse order. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the compaction of items selected from an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] + * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] + * int *d_out; // e.g., [ , , , , , , , ] + * int *d_num_selected_out; // e.g., [ ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run selection + * cub::DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); + * + * // d_out <-- [1, 4, 6, 7, 8, 5, 3, 2] + * // d_num_selected_out <-- [4] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator + * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator + */ + template < + typename InputIteratorT, + typename FlagIterator, + typename OutputIteratorT, + typename NumSelectedIteratorT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Flagged( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType SelectOp; // Selection op (not used) + typedef NullType EqualityOp; // Equality operator (not used) + + return DispatchSelectIf::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + SelectOp(), + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Uses the \p select_op functor to split the corresponding items from \p d_in into a partitioned sequence \p d_out. The total number of items copied into the first partition is written to \p d_num_selected_out. ![](partition_logo.png) + * + * \par + * - Copies of the selected items are compacted into \p d_out and maintain their original + * relative ordering, however copies of the unselected items are compacted into the + * rear of \p d_out in reverse order. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated partition-if performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. Items are + * selected for the first partition with 50% probability. + * + * \image html partition_if_int32_50_percent.png + * \image html partition_if_int64_50_percent.png + * + * \par + * The following charts are similar, but 5% selection probability for the first partition: + * + * \image html partition_if_int32_5_percent.png + * \image html partition_if_int64_5_percent.png + * + * \par Snippet + * The code snippet below illustrates the compaction of items selected from an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Functor type for selecting values less than some criteria + * struct LessThan + * { + * int compare; + * + * CUB_RUNTIME_FUNCTION __forceinline__ + * LessThan(int compare) : compare(compare) {} + * + * CUB_RUNTIME_FUNCTION __forceinline__ + * bool operator()(const int &a) const { + * return (a < compare); + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] + * int *d_out; // e.g., [ , , , , , , , ] + * int *d_num_selected_out; // e.g., [ ] + * LessThan select_op(7); + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run selection + * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); + * + * // d_out <-- [0, 2, 3, 5, 2, 8, 81, 9] + * // d_num_selected_out <-- [5] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing output items \iterator + * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator + * \tparam SelectOp [inferred] Selection functor type having member bool operator()(const T &a) + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename NumSelectedIteratorT, + typename SelectOp> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t If( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of partitioned data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., the offset of the unselected partition) + int num_items, ///< [in] Total number of items to select from + SelectOp select_op, ///< [in] Unary selection operator + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType* FlagIterator; // FlagT iterator type (not used) + typedef NullType EqualityOp; // Equality operator (not used) + + return DispatchSelectIf::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + NULL, + d_out, + d_num_selected_out, + select_op, + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + +}; + +/** + * \example example_device_partition_flagged.cu + * \example example_device_partition_if.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_radix_sort.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_radix_sort.cuh new file mode 100644 index 0000000..1c0bdbe --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_radix_sort.cuh @@ -0,0 +1,797 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_radix_sort.cuh" +#include "../util_arch.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. ![](sorting_logo.png) + * \ingroup SingleModule + * + * \par Overview + * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges + * items into ascending (or descending) order. The algorithm relies upon a positional representation for + * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, + * characters, etc.) specified from least-significant to most-significant. For a + * given input sequence of keys and a set of rules specifying a total ordering + * of the symbolic alphabet, the radix sorting method produces a lexicographic + * ordering of those keys. + * + * \par + * DeviceRadixSort can sort all of the built-in C++ numeric primitive types + * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half + * half-precision floating-point type. Although the direct radix sorting + * method can only be applied to unsigned integral types, DeviceRadixSort + * is able to sort signed and floating-point types via simple bit-wise transformations + * that ensure lexicographic key ordering. + * + * \par Usage Considerations + * \cdp_class{DeviceRadixSort} + * + * \par Performance + * \linear_performance{radix sort} The following chart illustrates DeviceRadixSort::SortKeys + * performance across different CUDA architectures for uniform-random \p uint32 keys. + * \plots_below + * + * \image html lsb_radix_sort_int32_keys.png + * + */ +struct DeviceRadixSort +{ + + /******************************************************************//** + * \name KeyT-value pairs + *********************************************************************/ + //@{ + + /** + * \brief Sorts key-value pairs into ascending order. (~2N auxiliary storage required) + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated sorting performance across different + * CUDA architectures for uniform-random uint32,uint32 and + * uint64,uint64 pairs, respectively. + * + * \image html lsb_radix_sort_int32_pairs.png + * \image html lsb_radix_sort_int64_pairs.png + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [ ... ] + * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_values_out; // e.g., [ ... ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); + * + * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] + * // d_values_out <-- [5, 4, 3, 1, 2, 0, 6] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + * \tparam ValueT [inferred] ValueT type + */ + template < + typename KeyT, + typename ValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairs( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data + const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items + ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values(const_cast(d_values_in), d_values_out); + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts key-value pairs into ascending order. (~N auxiliary storage required) + * + * \par + * - The sorting operation is given a pair of key buffers and a corresponding + * pair of associated value buffers. Each pair is managed by a DoubleBuffer + * structure that indicates which of the two buffers is "current" (and thus + * contains the input data to be sorted). + * - The contents of both buffers within each pair may be altered by the sorting + * operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within each DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated sorting performance across different + * CUDA architectures for uniform-random uint32,uint32 and + * uint64,uint64 pairs, respectively. + * + * \image html lsb_radix_sort_int32_pairs.png + * \image html lsb_radix_sort_int64_pairs.png + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [ ... ] + * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_value_alt_buf; // e.g., [ ... ] + * ... + * + * // Create a set of DoubleBuffers to wrap pairs of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); + * + * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] + * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + * \tparam ValueT [inferred] ValueT type + */ + template < + typename KeyT, + typename ValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairs( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts key-value pairs into descending order. (~2N auxiliary storage required). + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Performance + * Performance is similar to DeviceRadixSort::SortPairs. + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [ ... ] + * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_values_out; // e.g., [ ... ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, num_items); + * + * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0] + * // d_values_out <-- [6, 0, 2, 1, 3, 4, 5] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + * \tparam ValueT [inferred] ValueT type + */ + template < + typename KeyT, + typename ValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairsDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data + const ValueT *d_values_in, ///< [in] Pointer to the corresponding input sequence of associated value items + ValueT *d_values_out, ///< [out] Pointer to the correspondingly-reordered output sequence of associated value items + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values(const_cast(d_values_in), d_values_out); + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts key-value pairs into descending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers and a corresponding + * pair of associated value buffers. Each pair is managed by a DoubleBuffer + * structure that indicates which of the two buffers is "current" (and thus + * contains the input data to be sorted). + * - The contents of both buffers within each pair may be altered by the sorting + * operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within each DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Performance + * Performance is similar to DeviceRadixSort::SortPairs. + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [ ... ] + * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_value_alt_buf; // e.g., [ ... ] + * ... + * + * // Create a set of DoubleBuffers to wrap pairs of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items); + * + * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] + * // d_values.Current() <-- [6, 0, 2, 1, 3, 4, 5] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + * \tparam ValueT [inferred] ValueT type + */ + template < + typename KeyT, + typename ValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairsDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + //@} end member group + /******************************************************************//** + * \name Keys-only + *********************************************************************/ + //@{ + + + /** + * \brief Sorts keys into ascending order. (~2N auxiliary storage required) + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated sorting performance across different + * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. + * + * \image html lsb_radix_sort_int32_keys.png + * \image html lsb_radix_sort_int64_keys.png + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [ ... ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); + * + * // d_keys_out <-- [0, 3, 5, 6, 7, 8, 9] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + */ + template + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeys( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts keys into ascending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers managed by a + * DoubleBuffer structure that indicates which of the two buffers is + * "current" (and thus contains the input data to be sorted). + * - The contents of both buffers may be altered by the sorting operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within the DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated sorting performance across different + * CUDA architectures for uniform-random \p uint32 and \p uint64 keys, respectively. + * + * \image html lsb_radix_sort_int32_keys.png + * \image html lsb_radix_sort_int64_keys.png + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [ ... ] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, num_items); + * + * // d_keys.Current() <-- [0, 3, 5, 6, 7, 8, 9] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + */ + template + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeys( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_values; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + /** + * \brief Sorts keys into descending order. (~2N auxiliary storage required). + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Performance + * Performance is similar to DeviceRadixSort::SortKeys. + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [ ... ] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, num_items); + * + * // d_keys_out <-- [9, 8, 7, 6, 5, 3, 0]s + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + */ + template + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeysDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] Pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] Pointer to the sorted output sequence of key data + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts keys into descending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers managed by a + * DoubleBuffer structure that indicates which of the two buffers is + * "current" (and thus contains the input data to be sorted). + * - The contents of both buffers may be altered by the sorting operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within the DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Performance + * Performance is similar to DeviceRadixSort::SortKeys. + * + * \par Snippet + * The code snippet below illustrates the sorting of a device vector of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [ ... ] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, num_items); + * + * // d_keys.Current() <-- [9, 8, 7, 6, 5, 3, 0] + * + * \endcode + * + * \tparam KeyT [inferred] KeyT type + */ + template + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeysDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + int num_items, ///< [in] Number of items to sort + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_values; + + return DispatchRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + //@} end member group + + +}; + +/** + * \example example_device_radix_sort.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_reduce.cuh new file mode 100644 index 0000000..13c7a72 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_reduce.cuh @@ -0,0 +1,734 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include +#include + +#include "../iterator/arg_index_input_iterator.cuh" +#include "dispatch/dispatch_reduce.cuh" +#include "dispatch/dispatch_reduce_by_key.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. ![](reduce_logo.png) + * \ingroup SingleModule + * + * \par Overview + * A reduction (or fold) + * uses a binary combining operator to compute a single aggregate from a sequence of input elements. + * + * \par Usage Considerations + * \cdp_class{DeviceReduce} + * + * \par Performance + * \linear_performance{reduction, reduce-by-key, and run-length encode} + * + * \par + * The following chart illustrates DeviceReduce::Sum + * performance across different CUDA architectures for \p int32 keys. + * + * \image html reduce_int32.png + * + * \par + * The following chart illustrates DeviceReduce::ReduceByKey (summation) + * performance across different CUDA architectures for \p fp32 + * values. Segments are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. + * + * \image html reduce_by_key_fp32_len_500.png + * + * \par + * \plots_below + * + */ +struct DeviceReduce +{ + /** + * \brief Computes a device-wide reduction using the specified binary \p reduction_op functor and initial value \p init. + * + * \par + * - Does not support binary reduction operators that are non-commutative. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates a user-defined min-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // CustomMin functor + * struct CustomMin + * { + * template + * __device__ __forceinline__ + * T operator()(const T &a, const T &b) const { + * return (b < a) ? b : a; + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-] + * CustomMin min_op; + * int init; // e.g., INT_MAX + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run reduction + * cub::DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, min_op, init); + * + * // d_out <-- [0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename ReductionOpT, + typename T> + CUB_RUNTIME_FUNCTION + static cudaError_t Reduce( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + ReductionOpT reduction_op, ///< [in] Binary reduction functor + T init, ///< [in] Initial value of the reduction + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_items, + reduction_op, + init, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide sum using the addition (\p +) operator. + * + * \par + * - Uses \p 0 as the initial value of the reduction. + * - Does not support \p + operators that are non-commutative.. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated sum-reduction performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. + * + * \image html reduce_int32.png + * \image html reduce_int64.png + * + * \par Snippet + * The code snippet below illustrates the sum-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sum-reduction + * cub::DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // d_out <-- [38] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Sum( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_items, + cub::Sum(), + OutputT(), // zero-initialize + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide minimum using the less-than ('<') operator. + * + * \par + * - Uses std::numeric_limits::max() as the initial value of the reduction. + * - Does not support \p < operators that are non-commutative. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run min-reduction + * cub::DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // d_out <-- [0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Min( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_items, + cub::Min(), + Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent + stream, + debug_synchronous); + } + + + /** + * \brief Finds the first device-wide minimum using the less-than ('<') operator, also returning the index of that item. + * + * \par + * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) + * - The minimum is written to d_out.value and its offset in the input array is written to d_out.key. + * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs + * - Does not support \p < operators that are non-commutative. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * KeyValuePair *d_out; // e.g., [{-,-}] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run argmin-reduction + * cub::DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_argmin, num_items); + * + * // d_out <-- [{5, 0}] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t ArgMin( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input type + typedef typename std::iterator_traits::value_type InputValueT; + + // The output tuple type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + KeyValuePair, // ... then the key value pair OffsetT + InputValueT + typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type + + // The output value type + typedef typename OutputTupleT::Value OutputValueT; + + // Wrapped input iterator to produce index-value tuples + typedef ArgIndexInputIterator ArgIndexInputIteratorT; + ArgIndexInputIteratorT d_indexed_in(d_in); + + // Initial value + OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_indexed_in, + d_out, + num_items, + cub::ArgMin(), + initial_value, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide maximum using the greater-than ('>') operator. + * + * \par + * - Uses std::numeric_limits::lowest() as the initial value of the reduction. + * - Does not support \p > operators that are non-commutative. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run max-reduction + * cub::DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_max, num_items); + * + * // d_out <-- [9] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Max( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_items, + cub::Max(), + Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent + stream, + debug_synchronous); + } + + + /** + * \brief Finds the first device-wide maximum using the greater-than ('>') operator, also returning the index of that item + * + * \par + * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) + * - The maximum is written to d_out.value and its offset in the input array is written to d_out.key. + * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs + * - Does not support \p > operators that are non-commutative. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * KeyValuePair *d_out; // e.g., [{-,-}] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run argmax-reduction + * cub::DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_argmax, num_items); + * + * // d_out <-- [{6, 9}] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type cub::KeyValuePair) \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t ArgMax( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input type + typedef typename std::iterator_traits::value_type InputValueT; + + // The output tuple type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + KeyValuePair, // ... then the key value pair OffsetT + InputValueT + typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type + + // The output value type + typedef typename OutputTupleT::Value OutputValueT; + + // Wrapped input iterator to produce index-value tuples + typedef ArgIndexInputIterator ArgIndexInputIteratorT; + ArgIndexInputIteratorT d_indexed_in(d_in); + + // Initial value + OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent + + return DispatchReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_indexed_in, + d_out, + num_items, + cub::ArgMax(), + initial_value, + stream, + debug_synchronous); + } + + + /** + * \brief Reduces segments of values, where segments are demarcated by corresponding runs of identical keys. + * + * \par + * This operation computes segmented reductions within \p d_values_in using + * the specified binary \p reduction_op functor. The segments are identified by + * "runs" of corresponding keys in \p d_keys_in, where runs are maximal ranges of + * consecutive, identical keys. For the ith run encountered, + * the first key of the run and the corresponding value aggregate of that run are + * written to d_unique_out[i] and d_aggregates_out[i], + * respectively. The total number of runs encountered is written to \p d_num_runs_out. + * + * \par + * - The == equality operator is used to determine whether keys are equivalent + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Performance + * The following chart illustrates reduction-by-key (sum) performance across + * different CUDA architectures for \p fp32 and \p fp64 values, respectively. Segments + * are identified by \p int32 keys, and have lengths uniformly sampled from [1,1000]. + * + * \image html reduce_by_key_fp32_len_500.png + * \image html reduce_by_key_fp64_len_500.png + * + * \par + * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: + * + * \image html reduce_by_key_fp32_len_5.png + * \image html reduce_by_key_fp64_len_5.png + * + * \par Snippet + * The code snippet below illustrates the segmented reduction of \p int values grouped + * by runs of associated \p int keys. + * \par + * \code + * #include // or equivalently + * + * // CustomMin functor + * struct CustomMin + * { + * template + * CUB_RUNTIME_FUNCTION __forceinline__ + * T operator()(const T &a, const T &b) const { + * return (b < a) ? b : a; + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_keys_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] + * int *d_values_in; // e.g., [0, 7, 1, 6, 2, 5, 3, 4] + * int *d_unique_out; // e.g., [-, -, -, -, -, -, -, -] + * int *d_aggregates_out; // e.g., [-, -, -, -, -, -, -, -] + * int *d_num_runs_out; // e.g., [-] + * CustomMin reduction_op; + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run reduce-by-key + * cub::DeviceReduce::ReduceByKey(d_temp_storage, temp_storage_bytes, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, reduction_op, num_items); + * + * // d_unique_out <-- [0, 2, 9, 5, 8] + * // d_aggregates_out <-- [0, 1, 6, 2, 4] + * // d_num_runs_out <-- [5] + * + * \endcode + * + * \tparam KeysInputIteratorT [inferred] Random-access input iterator type for reading input keys \iterator + * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output keys \iterator + * \tparam ValuesInputIteratorT [inferred] Random-access input iterator type for reading input values \iterator + * \tparam AggregatesOutputIterator [inferred] Random-access output iterator type for writing output value aggregates \iterator + * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator + * \tparam ReductionOpT [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + */ + template < + typename KeysInputIteratorT, + typename UniqueOutputIteratorT, + typename ValuesInputIteratorT, + typename AggregatesOutputIteratorT, + typename NumRunsOutputIteratorT, + typename ReductionOpT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t ReduceByKey( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys + UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) + ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values + AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) + ReductionOpT reduction_op, ///< [in] Binary reduction functor + int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // FlagT iterator type (not used) + + // Selection op (not used) + + // Default == operator + typedef Equality EqualityOp; + + return DispatchReduceByKey::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys_in, + d_unique_out, + d_values_in, + d_aggregates_out, + d_num_runs_out, + EqualityOp(), + reduction_op, + num_items, + stream, + debug_synchronous); + } + +}; + +/** + * \example example_device_reduce.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_run_length_encode.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_run_length_encode.cuh new file mode 100644 index 0000000..7a2e82d --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_run_length_encode.cuh @@ -0,0 +1,278 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceRunLengthEncode provides device-wide, parallel operations for computing a run-length encoding across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_rle.cuh" +#include "dispatch/dispatch_reduce_by_key.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceRunLengthEncode provides device-wide, parallel operations for demarcating "runs" of same-valued items within a sequence residing within device-accessible memory. ![](run_length_encode_logo.png) + * \ingroup SingleModule + * + * \par Overview + * A run-length encoding + * computes a simple compressed representation of a sequence of input elements such that each + * maximal "run" of consecutive same-valued data items is encoded as a single data value along with a + * count of the elements in that run. + * + * \par Usage Considerations + * \cdp_class{DeviceRunLengthEncode} + * + * \par Performance + * \linear_performance{run-length encode} + * + * \par + * The following chart illustrates DeviceRunLengthEncode::RunLengthEncode performance across + * different CUDA architectures for \p int32 items. + * Segments have lengths uniformly sampled from [1,1000]. + * + * \image html rle_int32_len_500.png + * + * \par + * \plots_below + * + */ +struct DeviceRunLengthEncode +{ + + /** + * \brief Computes a run-length encoding of the sequence \p d_in. + * + * \par + * - For the ith run encountered, the first key of the run and its length are written to + * d_unique_out[i] and d_counts_out[i], + * respectively. + * - The total number of runs encountered is written to \p d_num_runs_out. + * - The == equality operator is used to determine whether values are equivalent + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated encode performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have + * lengths uniformly sampled from [1,1000]. + * + * \image html rle_int32_len_500.png + * \image html rle_int64_len_500.png + * + * \par + * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: + * + * \image html rle_int32_len_5.png + * \image html rle_int64_len_5.png + * + * \par Snippet + * The code snippet below illustrates the run-length encoding of a sequence of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] + * int *d_unique_out; // e.g., [ , , , , , , , ] + * int *d_counts_out; // e.g., [ , , , , , , , ] + * int *d_num_runs_out; // e.g., [ ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run encoding + * cub::DeviceRunLengthEncode::Encode(d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_counts_out, d_num_runs_out, num_items); + * + * // d_unique_out <-- [0, 2, 9, 5, 8] + * // d_counts_out <-- [1, 2, 1, 3, 1] + * // d_num_runs_out <-- [5] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam UniqueOutputIteratorT [inferred] Random-access output iterator type for writing unique output items \iterator + * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing output counts \iterator + * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator + */ + template < + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsOutputIteratorT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Encode( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of keys + UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) + LengthsOutputIteratorT d_counts_out, ///< [out] Pointer to the output sequence of run-lengths (one count per run) + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs + int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType* FlagIterator; // FlagT iterator type (not used) + typedef NullType SelectOp; // Selection op (not used) + typedef Equality EqualityOp; // Default == operator + typedef cub::Sum ReductionOp; // Value reduction operator + + // The lengths output value type + typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? + OffsetT, // ... then the OffsetT type, + typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type + + // Generator type for providing 1s values for run-length reduction + typedef ConstantInputIterator LengthsInputIteratorT; + + return DispatchReduceByKey::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_unique_out, + LengthsInputIteratorT((LengthT) 1), + d_counts_out, + d_num_runs_out, + EqualityOp(), + ReductionOp(), + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Enumerates the starting offsets and lengths of all non-trivial runs (of length > 1) of same-valued keys in the sequence \p d_in. + * + * \par + * - For the ith non-trivial run, the run's starting offset + * and its length are written to d_offsets_out[i] and + * d_lengths_out[i], respectively. + * - The total number of runs encountered is written to \p d_num_runs_out. + * - The == equality operator is used to determine whether values are equivalent + * - \devicestorage + * + * \par Performance + * + * \par Snippet + * The code snippet below illustrates the identification of non-trivial runs within a sequence of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] + * int *d_offsets_out; // e.g., [ , , , , , , , ] + * int *d_lengths_out; // e.g., [ , , , , , , , ] + * int *d_num_runs_out; // e.g., [ ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run encoding + * cub::DeviceRunLengthEncode::NonTrivialRuns(d_temp_storage, temp_storage_bytes, d_in, d_offsets_out, d_lengths_out, d_num_runs_out, num_items); + * + * // d_offsets_out <-- [1, 4] + * // d_lengths_out <-- [2, 3] + * // d_num_runs_out <-- [2] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OffsetsOutputIteratorT [inferred] Random-access output iterator type for writing run-offset values \iterator + * \tparam LengthsOutputIteratorT [inferred] Random-access output iterator type for writing run-length values \iterator + * \tparam NumRunsOutputIteratorT [inferred] Output iterator type for recording the number of runs encountered \iterator + */ + template < + typename InputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsOutputIteratorT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t NonTrivialRuns( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to input sequence of data items + OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets (one offset per non-trivial run) + LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths (one count per non-trivial run) + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) + int num_items, ///< [in] Total number of associated key+value pairs (i.e., the length of \p d_in_keys and \p d_in_values) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef Equality EqualityOp; // Default == operator + + return DeviceRleDispatch::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_offsets_out, + d_lengths_out, + d_num_runs_out, + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_scan.cuh new file mode 100644 index 0000000..e86fefe --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_scan.cuh @@ -0,0 +1,443 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_scan.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. ![](device_scan.png) + * \ingroup SingleModule + * + * \par Overview + * Given a sequence of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) + * produces an output sequence where each element is computed to be the reduction + * of the elements occurring earlier in the input sequence. Prefix sum + * connotes a prefix scan with the addition operator. The term \em inclusive indicates + * that the ith output reduction incorporates the ith input. + * The term \em exclusive indicates the ith input is not incorporated into + * the ith output reduction. + * + * \par + * As of CUB 1.0.1 (2013), CUB's device-wide scan APIs have implemented our "decoupled look-back" algorithm + * for performing global prefix scan with only a single pass through the + * input data, as described in our 2016 technical report [1]. The central + * idea is to leverage a small, constant factor of redundant work in order to overlap the latencies + * of global prefix propagation with local computation. As such, our algorithm requires only + * ~2n data movement (n inputs are read, n outputs are written), and typically + * proceeds at "memcpy" speeds. + * + * \par + * [1] [Duane Merrill and Michael Garland. "Single-pass Parallel Prefix Scan with Decoupled Look-back", NVIDIA Technical Report NVR-2016-002, 2016.](https://research.nvidia.com/publication/single-pass-parallel-prefix-scan-decoupled-look-back) + * + * \par Usage Considerations + * \cdp_class{DeviceScan} + * + * \par Performance + * \linear_performance{prefix scan} + * + * \par + * The following chart illustrates DeviceScan::ExclusiveSum + * performance across different CUDA architectures for \p int32 keys. + * \plots_below + * + * \image html scan_int32.png + * + */ +struct DeviceScan +{ + /******************************************************************//** + * \name Exclusive scans + *********************************************************************/ + //@{ + + /** + * \brief Computes a device-wide exclusive prefix sum. The value of 0 is applied as the initial value, and is assigned to *d_out. + * + * \par + * - Supports non-commutative sum operators. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated exclusive sum performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. + * + * \image html scan_int32.png + * \image html scan_int64.png + * + * \par Snippet + * The code snippet below illustrates the exclusive prefix sum of an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [ , , , , , , ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run exclusive prefix sum + * cub::DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // d_out s<-- [0, 8, 14, 21, 26, 29, 29] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t ExclusiveSum( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Initial value + OutputT init_value = 0; + + return DispatchScan::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + Sum(), + init_value, + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide exclusive prefix scan using the specified binary \p scan_op functor. The \p init_value value is applied as the initial value, and is assigned to *d_out. + * + * \par + * - Supports non-commutative scan operators. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the exclusive prefix min-scan of an \p int device vector + * \par + * \code + * #include // or equivalently + * + * // CustomMin functor + * struct CustomMin + * { + * template + * CUB_RUNTIME_FUNCTION __forceinline__ + * T operator()(const T &a, const T &b) const { + * return (b < a) ? b : a; + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [ , , , , , , ] + * CustomMin min_op + * ... + * + * // Determine temporary device storage requirements for exclusive prefix scan + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); + * + * // Allocate temporary storage for exclusive prefix scan + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run exclusive prefix min-scan + * cub::DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, (int) MAX_INT, num_items); + * + * // d_out <-- [2147483647, 8, 6, 6, 5, 3, 0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + * \tparam Identity [inferred] Type of the \p identity value used Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename ScanOpT, + typename InitValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t ExclusiveScan( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + ScanOpT scan_op, ///< [in] Binary scan functor + InitValueT init_value, ///< [in] Initial value to seed the exclusive scan (and is assigned to *d_out) + int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchScan::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + init_value, + num_items, + stream, + debug_synchronous); + } + + + //@} end member group + /******************************************************************//** + * \name Inclusive scans + *********************************************************************/ + //@{ + + + /** + * \brief Computes a device-wide inclusive prefix sum. + * + * \par + * - Supports non-commutative sum operators. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the inclusive prefix sum of an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [ , , , , , , ] + * ... + * + * // Determine temporary device storage requirements for inclusive prefix sum + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // Allocate temporary storage for inclusive prefix sum + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run inclusive prefix sum + * cub::DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items); + * + * // d_out <-- [8, 14, 21, 26, 29, 29, 38] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t InclusiveSum( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchScan::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + Sum(), + NullType(), + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide inclusive prefix scan using the specified binary \p scan_op functor. + * + * \par + * - Supports non-commutative scan operators. + * - Provides "run-to-run" determinism for pseudo-associative reduction + * (e.g., addition of floating point types) on the same GPU device. + * However, results for pseudo-associative reduction may be inconsistent + * from one device to a another device of a different compute-capability + * because CUB can employ different tile-sizing for different architectures. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the inclusive prefix min-scan of an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // CustomMin functor + * struct CustomMin + * { + * template + * CUB_RUNTIME_FUNCTION __forceinline__ + * T operator()(const T &a, const T &b) const { + * return (b < a) ? b : a; + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 7 + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [ , , , , , , ] + * CustomMin min_op; + * ... + * + * // Determine temporary device storage requirements for inclusive prefix scan + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); + * + * // Allocate temporary storage for inclusive prefix scan + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run inclusive prefix min-scan + * cub::DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, min_op, num_items); + * + * // d_out <-- [8, 6, 6, 5, 3, 0, 0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading scan inputs \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing scan outputs \iterator + * \tparam ScanOp [inferred] Binary scan functor type having member T operator()(const T &a, const T &b) + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename ScanOpT> + CUB_RUNTIME_FUNCTION + static cudaError_t InclusiveScan( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + ScanOpT scan_op, ///< [in] Binary scan functor + int num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchScan::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + NullType(), + num_items, + stream, + debug_synchronous); + } + + //@} end member group + +}; + +/** + * \example example_device_scan.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_radix_sort.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_radix_sort.cuh new file mode 100644 index 0000000..0d36076 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_radix_sort.cuh @@ -0,0 +1,876 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_radix_sort.cuh" +#include "../util_arch.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceSegmentedRadixSort provides device-wide, parallel operations for computing a batched radix sort across multiple, non-overlapping sequences of data items residing within device-accessible memory. ![](segmented_sorting_logo.png) + * \ingroup SegmentedModule + * + * \par Overview + * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges + * items into ascending (or descending) order. The algorithm relies upon a positional representation for + * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, + * characters, etc.) specified from least-significant to most-significant. For a + * given input sequence of keys and a set of rules specifying a total ordering + * of the symbolic alphabet, the radix sorting method produces a lexicographic + * ordering of those keys. + * + * \par + * DeviceSegmentedRadixSort can sort all of the built-in C++ numeric primitive types + * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half + * half-precision floating-point type. Although the direct radix sorting + * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort + * is able to sort signed and floating-point types via simple bit-wise transformations + * that ensure lexicographic key ordering. + * + * \par Usage Considerations + * \cdp_class{DeviceSegmentedRadixSort} + * + */ +struct DeviceSegmentedRadixSort +{ + + /******************************************************************//** + * \name Key-value pairs + *********************************************************************/ + //@{ + + /** + * \brief Sorts segments of key-value pairs into ascending order. (~2N auxiliary storage required) + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] + * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_values_out; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] + * // d_values_out <-- [1, 2, 0, 5, 4, 3, 6] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam ValueT [inferred] Value type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename ValueT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairs( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data + const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items + ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values(const_cast(d_values_in), d_values_out); + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts segments of key-value pairs into ascending order. (~N auxiliary storage required) + * + * \par + * - The sorting operation is given a pair of key buffers and a corresponding + * pair of associated value buffers. Each pair is managed by a DoubleBuffer + * structure that indicates which of the two buffers is "current" (and thus + * contains the input data to be sorted). + * - The contents of both buffers within each pair may be altered by the sorting + * operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within each DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] + * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Create a set of DoubleBuffers to wrap pairs of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] + * // d_values.Current() <-- [5, 4, 3, 1, 2, 0, 6] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam ValueT [inferred] Value type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename ValueT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairs( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts segments of key-value pairs into descending order. (~2N auxiliary storage required). + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] + * int *d_values_in; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_values_out; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, + * d_keys_in, d_keys_out, d_values_in, d_values_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] + * // d_values_out <-- [0, 2, 1, 6, 3, 4, 5] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam ValueT [inferred] Value type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename ValueT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairsDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data + const ValueT *d_values_in, ///< [in] %Device-accessible pointer to the corresponding input sequence of associated value items + ValueT *d_values_out, ///< [out] %Device-accessible pointer to the correspondingly-reordered output sequence of associated value items + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values(const_cast(d_values_in), d_values_out); + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts segments of key-value pairs into descending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers and a corresponding + * pair of associated value buffers. Each pair is managed by a DoubleBuffer + * structure that indicates which of the two buffers is "current" (and thus + * contains the input data to be sorted). + * - The contents of both buffers within each pair may be altered by the sorting + * operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within each DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys + * with associated vector of \p int values. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] + * int *d_value_buf; // e.g., [0, 1, 2, 3, 4, 5, 6] + * int *d_value_alt_buf; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Create a set of DoubleBuffers to wrap pairs of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * cub::DoubleBuffer d_values(d_value_buf, d_value_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortPairsDescending(d_temp_storage, temp_storage_bytes, d_keys, d_values, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] + * // d_values.Current() <-- [0, 2, 1, 6, 3, 4, 5] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam ValueT [inferred] Value type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename ValueT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortPairsDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer of values whose "current" device-accessible buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + //@} end member group + /******************************************************************//** + * \name Keys-only + *********************************************************************/ + //@{ + + + /** + * \brief Sorts segments of keys into ascending order. (~2N auxiliary storage required) + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys_out <-- [6, 7, 8, 0, 3, 5, 9] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeys( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts segments of keys into ascending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers managed by a + * DoubleBuffer structure that indicates which of the two buffers is + * "current" (and thus contains the input data to be sorted). + * - The contents of both buffers may be altered by the sorting operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within the DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortKeys(d_temp_storage, temp_storage_bytes, d_keys, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys.Current() <-- [6, 7, 8, 0, 3, 5, 9] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeys( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_values; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + /** + * \brief Sorts segments of keys into descending order. (~2N auxiliary storage required). + * + * \par + * - The contents of the input data are not altered by the sorting operation + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_keys_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_keys_out; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys_out <-- [8, 7, 6, 9, 5, 3, 0] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeysDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + const KeyT *d_keys_in, ///< [in] %Device-accessible pointer to the input data of key data to sort + KeyT *d_keys_out, ///< [out] %Device-accessible pointer to the sorted output sequence of key data + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + DoubleBuffer d_values; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + false, + stream, + debug_synchronous); + } + + + /** + * \brief Sorts segments of keys into descending order. (~N auxiliary storage required). + * + * \par + * - The sorting operation is given a pair of key buffers managed by a + * DoubleBuffer structure that indicates which of the two buffers is + * "current" (and thus contains the input data to be sorted). + * - The contents of both buffers may be altered by the sorting operation. + * - Upon completion, the sorting operation will update the "current" indicator + * within the DoubleBuffer wrapper to reference which of the two buffers + * now contains the sorted output sequence (a function of the number of key bits + * specified and the targeted device architecture). + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. + * - \devicestorageP + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the batched sorting of three segments (with one zero-length segment) of \p int keys. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for sorting data + * int num_items; // e.g., 7 + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_key_buf; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_key_alt_buf; // e.g., [-, -, -, -, -, -, -] + * ... + * + * // Create a DoubleBuffer to wrap the pair of device pointers + * cub::DoubleBuffer d_keys(d_key_buf, d_key_alt_buf); + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sorting operation + * cub::DeviceSegmentedRadixSort::SortKeysDescending(d_temp_storage, temp_storage_bytes, d_keys, + * num_items, num_segments, d_offsets, d_offsets + 1); + * + * // d_keys.Current() <-- [8, 7, 6, 9, 5, 3, 0] + * + * \endcode + * + * \tparam KeyT [inferred] Key type + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename KeyT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t SortKeysDescending( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Reference to the double-buffer of keys whose "current" device-accessible buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + int num_items, ///< [in] The total number of items to sort (across all segments) + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit = 0, ///< [in] [optional] The least-significant bit index (inclusive) needed for key comparison + int end_bit = sizeof(KeyT) * 8, ///< [in] [optional] The most-significant bit index (exclusive) needed for key comparison (e.g., sizeof(unsigned int) * 8) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Null value type + DoubleBuffer d_values; + + return DispatchSegmentedRadixSort::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys, + d_values, + num_items, + num_segments, + d_begin_offsets, + d_end_offsets, + begin_bit, + end_bit, + true, + stream, + debug_synchronous); + } + + + //@} end member group + + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_reduce.cuh new file mode 100644 index 0000000..6c3b54a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_segmented_reduce.cuh @@ -0,0 +1,619 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSegmentedReduce provides device-wide, parallel operations for computing a batched reduction across multiple sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "../iterator/arg_index_input_iterator.cuh" +#include "dispatch/dispatch_reduce.cuh" +#include "dispatch/dispatch_reduce_by_key.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceSegmentedReduce provides device-wide, parallel operations for computing a reduction across multiple sequences of data items residing within device-accessible memory. ![](reduce_logo.png) + * \ingroup SegmentedModule + * + * \par Overview + * A reduction (or fold) + * uses a binary combining operator to compute a single aggregate from a sequence of input elements. + * + * \par Usage Considerations + * \cdp_class{DeviceSegmentedReduce} + * + */ +struct DeviceSegmentedReduce +{ + /** + * \brief Computes a device-wide segmented reduction using the specified binary \p reduction_op functor. + * + * \par + * - Does not support binary reduction operators that are non-commutative. + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates a custom min-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // CustomMin functor + * struct CustomMin + * { + * template + * CUB_RUNTIME_FUNCTION __forceinline__ + * T operator()(const T &a, const T &b) const { + * return (b < a) ? b : a; + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-, -, -] + * CustomMin min_op; + * int initial_value; // e.g., INT_MAX + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run reduction + * cub::DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1, min_op, initial_value); + * + * // d_out <-- [6, INT_MAX, 0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + * \tparam ReductionOp [inferred] Binary reduction functor type having member T operator()(const T &a, const T &b) + * \tparam T [inferred] Data element type that is convertible to the \p value type of \p InputIteratorT + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT, + typename ReductionOp, + typename T> + CUB_RUNTIME_FUNCTION + static cudaError_t Reduce( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + ReductionOp reduction_op, ///< [in] Binary reduction functor + T initial_value, ///< [in] Initial value of the reduction for each segment + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + reduction_op, + initial_value, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide segmented sum using the addition ('+') operator. + * + * \par + * - Uses \p 0 as the initial value of the reduction for each segment. + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - Does not support \p + operators that are non-commutative.. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the sum reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run sum-reduction + * cub::DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // d_out <-- [21, 0, 17] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Sum( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + cub::Sum(), + OutputT(), // zero-initialize + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide segmented minimum using the less-than ('<') operator. + * + * \par + * - Uses std::numeric_limits::max() as the initial value of the reduction for each segment. + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - Does not support \p < operators that are non-commutative. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the min-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run min-reduction + * cub::DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // d_out <-- [6, INT_MAX, 0] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Min( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + cub::Min(), + Traits::Max(), // replace with std::numeric_limits::max() when C++11 support is more prevalent + stream, + debug_synchronous); + } + + + /** + * \brief Finds the first device-wide minimum in each segment using the less-than ('<') operator, also returning the in-segment index of that item. + * + * \par + * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) + * - The minimum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. + * - The {1, std::numeric_limits::max()} tuple is produced for zero-length inputs + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - Does not support \p < operators that are non-commutative. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the argmin-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run argmin-reduction + * cub::DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // d_out <-- [{1,6}, {1,INT_MAX}, {2,0}] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t ArgMin( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input type + typedef typename std::iterator_traits::value_type InputValueT; + + // The output tuple type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + KeyValuePair, // ... then the key value pair OffsetT + InputValueT + typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type + + // The output value type + typedef typename OutputTupleT::Value OutputValueT; + + // Wrapped input iterator to produce index-value tuples + typedef ArgIndexInputIterator ArgIndexInputIteratorT; + ArgIndexInputIteratorT d_indexed_in(d_in); + + // Initial value + OutputTupleT initial_value(1, Traits::Max()); // replace with std::numeric_limits::max() when C++11 support is more prevalent + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_indexed_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + cub::ArgMin(), + initial_value, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide segmented maximum using the greater-than ('>') operator. + * + * \par + * - Uses std::numeric_limits::lowest() as the initial value of the reduction. + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - Does not support \p > operators that are non-commutative. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the max-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * int *d_out; // e.g., [-, -, -] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run max-reduction + * cub::DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // d_out <-- [8, INT_MIN, 9] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t Max( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + cub::Max(), + Traits::Lowest(), // replace with std::numeric_limits::lowest() when C++11 support is more prevalent + stream, + debug_synchronous); + } + + + /** + * \brief Finds the first device-wide maximum in each segment using the greater-than ('>') operator, also returning the in-segment index of that item + * + * \par + * - The output value type of \p d_out is cub::KeyValuePair (assuming the value type of \p d_in is \p T) + * - The maximum of the ith segment is written to d_out[i].value and its offset in that segment is written to d_out[i].key. + * - The {1, std::numeric_limits::lowest()} tuple is produced for zero-length inputs + * - When input a contiguous sequence of segments, a single sequence + * \p segment_offsets (of length num_segments+1) can be aliased + * for both the \p d_begin_offsets and \p d_end_offsets parameters (where + * the latter is specified as segment_offsets+1). + * - Does not support \p > operators that are non-commutative. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the argmax-reduction of a device vector of \p int data elements. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_segments; // e.g., 3 + * int *d_offsets; // e.g., [0, 3, 3, 7] + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * KeyValuePair *d_out; // e.g., [{-,-}, {-,-}, {-,-}] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run argmax-reduction + * cub::DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, + * num_segments, d_offsets, d_offsets + 1); + * + * // d_out <-- [{0,8}, {1,INT_MIN}, {3,9}] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items (of some type \p T) \iterator + * \tparam OutputIteratorT [inferred] Output iterator type for recording the reduced aggregate (having value type KeyValuePair) \iterator + * \tparam OffsetIteratorT [inferred] Random-access input iterator type for reading segment offsets \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT> + CUB_RUNTIME_FUNCTION + static cudaError_t ArgMax( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // The input type + typedef typename std::iterator_traits::value_type InputValueT; + + // The output tuple type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + KeyValuePair, // ... then the key value pair OffsetT + InputValueT + typename std::iterator_traits::value_type>::Type OutputTupleT; // ... else the output iterator's value type + + // The output value type + typedef typename OutputTupleT::Value OutputValueT; + + // Wrapped input iterator to produce index-value tuples + typedef ArgIndexInputIterator ArgIndexInputIteratorT; + ArgIndexInputIteratorT d_indexed_in(d_in); + + // Initial value + OutputTupleT initial_value(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++11 support is more prevalent + + return DispatchSegmentedReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_indexed_in, + d_out, + num_segments, + d_begin_offsets, + d_end_offsets, + cub::ArgMax(), + initial_value, + stream, + debug_synchronous); + } + +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_select.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_select.cuh new file mode 100644 index 0000000..52a3e12 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_select.cuh @@ -0,0 +1,369 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch/dispatch_select_if.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceSelect provides device-wide, parallel operations for compacting selected items from sequences of data items residing within device-accessible memory. ![](select_logo.png) + * \ingroup SingleModule + * + * \par Overview + * These operations apply a selection criterion to selectively copy + * items from a specified input sequence to a compact output sequence. + * + * \par Usage Considerations + * \cdp_class{DeviceSelect} + * + * \par Performance + * \linear_performance{select-flagged, select-if, and select-unique} + * + * \par + * The following chart illustrates DeviceSelect::If + * performance across different CUDA architectures for \p int32 items, + * where 50% of the items are randomly selected. + * + * \image html select_if_int32_50_percent.png + * + * \par + * The following chart illustrates DeviceSelect::Unique + * performance across different CUDA architectures for \p int32 items + * where segments have lengths uniformly sampled from [1,1000]. + * + * \image html select_unique_int32_len_500.png + * + * \par + * \plots_below + * + */ +struct DeviceSelect +{ + /** + * \brief Uses the \p d_flags sequence to selectively copy the corresponding items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_flags_logo.png) + * + * \par + * - The value type of \p d_flags must be castable to \p bool (e.g., \p bool, \p char, \p int, etc.). + * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. + * - \devicestorage + * + * \par Snippet + * The code snippet below illustrates the compaction of items selected from an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input, flags, and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [1, 2, 3, 4, 5, 6, 7, 8] + * char *d_flags; // e.g., [1, 0, 0, 1, 0, 1, 1, 0] + * int *d_out; // e.g., [ , , , , , , , ] + * int *d_num_selected_out; // e.g., [ ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run selection + * cub::DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items); + * + * // d_out <-- [1, 4, 6, 7] + * // d_num_selected_out <-- [4] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam FlagIterator [inferred] Random-access input iterator type for reading selection flags \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator + * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator + */ + template < + typename InputIteratorT, + typename FlagIterator, + typename OutputIteratorT, + typename NumSelectedIteratorT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Flagged( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagIterator d_flags, ///< [in] Pointer to the input sequence of selection flags + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType SelectOp; // Selection op (not used) + typedef NullType EqualityOp; // Equality operator (not used) + + return DispatchSelectIf::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_out, + d_num_selected_out, + SelectOp(), + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Uses the \p select_op functor to selectively copy items from \p d_in into \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](select_logo.png) + * + * \par + * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated select-if performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. Items are + * selected with 50% probability. + * + * \image html select_if_int32_50_percent.png + * \image html select_if_int64_50_percent.png + * + * \par + * The following charts are similar, but 5% selection probability: + * + * \image html select_if_int32_5_percent.png + * \image html select_if_int64_5_percent.png + * + * \par Snippet + * The code snippet below illustrates the compaction of items selected from an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Functor type for selecting values less than some criteria + * struct LessThan + * { + * int compare; + * + * CUB_RUNTIME_FUNCTION __forceinline__ + * LessThan(int compare) : compare(compare) {} + * + * CUB_RUNTIME_FUNCTION __forceinline__ + * bool operator()(const int &a) const { + * return (a < compare); + * } + * }; + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [0, 2, 3, 9, 5, 2, 81, 8] + * int *d_out; // e.g., [ , , , , , , , ] + * int *d_num_selected_out; // e.g., [ ] + * LessThan select_op(7); + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run selection + * cub::DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op); + * + * // d_out <-- [0, 2, 3, 5, 2] + * // d_num_selected_out <-- [5] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator + * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator + * \tparam SelectOp [inferred] Selection operator type having member bool operator()(const T &a) + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename NumSelectedIteratorT, + typename SelectOp> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t If( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + SelectOp select_op, ///< [in] Unary selection operator + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType* FlagIterator; // FlagT iterator type (not used) + typedef NullType EqualityOp; // Equality operator (not used) + + return DispatchSelectIf::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + NULL, + d_out, + d_num_selected_out, + select_op, + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + + + /** + * \brief Given an input sequence \p d_in having runs of consecutive equal-valued keys, only the first key from each run is selectively copied to \p d_out. The total number of items selected is written to \p d_num_selected_out. ![](unique_logo.png) + * + * \par + * - The == equality operator is used to determine whether keys are equivalent + * - Copies of the selected items are compacted into \p d_out and maintain their original relative ordering. + * - \devicestorage + * + * \par Performance + * The following charts illustrate saturated select-unique performance across different + * CUDA architectures for \p int32 and \p int64 items, respectively. Segments have + * lengths uniformly sampled from [1,1000]. + * + * \image html select_unique_int32_len_500.png + * \image html select_unique_int64_len_500.png + * + * \par + * The following charts are similar, but with segment lengths uniformly sampled from [1,10]: + * + * \image html select_unique_int32_len_5.png + * \image html select_unique_int64_len_5.png + * + * \par Snippet + * The code snippet below illustrates the compaction of items selected from an \p int device vector. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input and output + * int num_items; // e.g., 8 + * int *d_in; // e.g., [0, 2, 2, 9, 5, 5, 5, 8] + * int *d_out; // e.g., [ , , , , , , , ] + * int *d_num_selected_out; // e.g., [ ] + * ... + * + * // Determine temporary device storage requirements + * void *d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run selection + * cub::DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items); + * + * // d_out <-- [0, 2, 9, 5, 8] + * // d_num_selected_out <-- [5] + * + * \endcode + * + * \tparam InputIteratorT [inferred] Random-access input iterator type for reading input items \iterator + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing selected items \iterator + * \tparam NumSelectedIteratorT [inferred] Output iterator type for recording the number of items selected \iterator + */ + template < + typename InputIteratorT, + typename OutputIteratorT, + typename NumSelectedIteratorT> + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Unique( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the output total number of items selected (i.e., length of \p d_out) + int num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + typedef int OffsetT; // Signed integer type for global offsets + typedef NullType* FlagIterator; // FlagT iterator type (not used) + typedef NullType SelectOp; // Selection op (not used) + typedef Equality EqualityOp; // Default == operator + + return DispatchSelectIf::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + NULL, + d_out, + d_num_selected_out, + SelectOp(), + EqualityOp(), + num_items, + stream, + debug_synchronous); + } + +}; + +/** + * \example example_device_select_flagged.cu + * \example example_device_select_if.cu + * \example example_device_select_unique.cu + */ + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_spmv.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_spmv.cuh new file mode 100644 index 0000000..63b6a7e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/device_spmv.cuh @@ -0,0 +1,174 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). + */ + +#pragma once + +#include +#include +#include + +#include "dispatch/dispatch_spmv_orig.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * dense-vector multiplication (SpMV). + * \ingroup SingleModule + * + * \par Overview + * The [SpMV computation](http://en.wikipedia.org/wiki/Sparse_matrix-vector_multiplication) + * performs the matrix-vector operation + * y = alpha*A*x + beta*y, + * where: + * - A is an mxn sparse matrix whose non-zero structure is specified in + * [compressed-storage-row (CSR) format](http://en.wikipedia.org/wiki/Sparse_matrix#Compressed_row_Storage_.28CRS_or_CSR.29) + * (i.e., three arrays: values, row_offsets, and column_indices) + * - x and y are dense vectors + * - alpha and beta are scalar multiplicands + * + * \par Usage Considerations + * \cdp_class{DeviceSpmv} + * + */ +struct DeviceSpmv +{ + /******************************************************************//** + * \name CSR matrix operations + *********************************************************************/ + //@{ + + /** + * \brief This function performs the matrix-vector operation y = A*x. + * + * \par Snippet + * The code snippet below illustrates SpMV upon a 9x9 CSR matrix A + * representing a 3x3 lattice (24 non-zeros). + * + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize device-accessible pointers for input matrix A, input vector x, + * // and output vector y + * int num_rows = 9; + * int num_cols = 9; + * int num_nonzeros = 24; + * + * float* d_values; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, + * // 1, 1, 1, 1, 1, 1, 1, 1, + * // 1, 1, 1, 1, 1, 1, 1, 1] + * + * int* d_column_indices; // e.g., [1, 3, 0, 2, 4, 1, 5, 0, + * // 4, 6, 1, 3, 5, 7, 2, 4, + * // 8, 3, 7, 4, 6, 8, 5, 7] + * + * int* d_row_offsets; // e.g., [0, 2, 5, 7, 10, 14, 17, 19, 22, 24] + * + * float* d_vector_x; // e.g., [1, 1, 1, 1, 1, 1, 1, 1, 1] + * float* d_vector_y; // e.g., [ , , , , , , , , ] + * ... + * + * // Determine temporary device storage requirements + * void* d_temp_storage = NULL; + * size_t temp_storage_bytes = 0; + * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, + * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, + * num_rows, num_cols, num_nonzeros, alpha, beta); + * + * // Allocate temporary storage + * cudaMalloc(&d_temp_storage, temp_storage_bytes); + * + * // Run SpMV + * cub::DeviceSpmv::CsrMV(d_temp_storage, temp_storage_bytes, d_values, + * d_row_offsets, d_column_indices, d_vector_x, d_vector_y, + * num_rows, num_cols, num_nonzeros, alpha, beta); + * + * // d_vector_y <-- [2, 3, 2, 3, 4, 3, 2, 3, 2] + * + * \endcode + * + * \tparam ValueT [inferred] Matrix and vector value type (e.g., /p float, /p double, etc.) + */ + template < + typename ValueT> + CUB_RUNTIME_FUNCTION + static cudaError_t CsrMV( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + ValueT* d_values, ///< [in] Pointer to the array of \p num_nonzeros values of the corresponding nonzero elements of matrix A. + int* d_row_offsets, ///< [in] Pointer to the array of \p m + 1 offsets demarcating the start of every row in \p d_column_indices and \p d_values (with the final entry being equal to \p num_nonzeros) + int* d_column_indices, ///< [in] Pointer to the array of \p num_nonzeros column-indices of the corresponding nonzero elements of matrix A. (Indices are zero-valued.) + ValueT* d_vector_x, ///< [in] Pointer to the array of \p num_cols values corresponding to the dense input vector x + ValueT* d_vector_y, ///< [out] Pointer to the array of \p num_rows values corresponding to the dense output vector y + int num_rows, ///< [in] number of rows of matrix A. + int num_cols, ///< [in] number of columns of matrix A. + int num_nonzeros, ///< [in] number of nonzero elements of matrix A. + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + SpmvParams spmv_params; + spmv_params.d_values = d_values; + spmv_params.d_row_end_offsets = d_row_offsets + 1; + spmv_params.d_column_indices = d_column_indices; + spmv_params.d_vector_x = d_vector_x; + spmv_params.d_vector_y = d_vector_y; + spmv_params.num_rows = num_rows; + spmv_params.num_cols = num_cols; + spmv_params.num_nonzeros = num_nonzeros; + spmv_params.alpha = 1.0; + spmv_params.beta = 0.0; + + return DispatchSpmv::Dispatch( + d_temp_storage, + temp_storage_bytes, + spmv_params, + stream, + debug_synchronous); + } + + //@} end member group +}; + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_histogram.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_histogram.cuh new file mode 100644 index 0000000..ab08e8e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_histogram.cuh @@ -0,0 +1,1096 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceHistogram provides device-wide parallel operations for constructing histogram(s) from a sequence of samples data residing within device-accessible memory. + */ + +#pragma once + +#include +#include +#include + +#include "../../agent/agent_histogram.cuh" +#include "../../util_debug.cuh" +#include "../../util_device.cuh" +#include "../../thread/thread_search.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + + +/****************************************************************************** + * Histogram kernel entry points + *****************************************************************************/ + +/** + * Histogram initialization kernel entry point + */ +template < + int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed + typename CounterT, ///< Integer type for counting sample occurrences per histogram bin + typename OffsetT> ///< Signed integer type for global offsets +__global__ void DeviceHistogramInitKernel( + ArrayWrapper num_output_bins_wrapper, ///< Number of output histogram bins per channel + ArrayWrapper d_output_histograms_wrapper, ///< Histogram counter data having logical dimensions CounterT[NUM_ACTIVE_CHANNELS][num_bins.array[CHANNEL]] + GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks +{ + if ((threadIdx.x == 0) && (blockIdx.x == 0)) + tile_queue.ResetDrain(); + + int output_bin = (blockIdx.x * blockDim.x) + threadIdx.x; + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + if (output_bin < num_output_bins_wrapper.array[CHANNEL]) + d_output_histograms_wrapper.array[CHANNEL][output_bin] = 0; + } +} + + +/** + * Histogram privatized sweep kernel entry point (multi-block). Computes privatized histograms, one per thread block. + */ +template < + typename AgentHistogramPolicyT, ///< Parameterized AgentHistogramPolicy tuning policy type + int PRIVATIZED_SMEM_BINS, ///< Maximum number of histogram bins per channel (e.g., up to 256) + int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed + typename SampleIteratorT, ///< The input iterator type. \iterator. + typename CounterT, ///< Integer type for counting sample occurrences per histogram bin + typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel + typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(AgentHistogramPolicyT::BLOCK_THREADS)) +__global__ void DeviceHistogramSweepKernel( + SampleIteratorT d_samples, ///< Input data to reduce + ArrayWrapper num_output_bins_wrapper, ///< The number bins per final output histogram + ArrayWrapper num_privatized_bins_wrapper, ///< The number bins per privatized histogram + ArrayWrapper d_output_histograms_wrapper, ///< Reference to final output histograms + ArrayWrapper d_privatized_histograms_wrapper, ///< Reference to privatized histograms + ArrayWrapper output_decode_op_wrapper, ///< The transform operator for determining output bin-ids from privatized counter indices, one for each channel + ArrayWrapper privatized_decode_op_wrapper, ///< The transform operator for determining privatized counter indices from samples, one for each channel + OffsetT num_row_pixels, ///< The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< The number of rows in the region of interest + OffsetT row_stride_samples, ///< The number of samples between starts of consecutive rows in the region of interest + int tiles_per_row, ///< Number of image tiles per row + GridQueue tile_queue) ///< Drain queue descriptor for dynamically mapping tile data onto thread blocks +{ + // Thread block type for compositing input tiles + typedef AgentHistogram< + AgentHistogramPolicyT, + PRIVATIZED_SMEM_BINS, + NUM_CHANNELS, + NUM_ACTIVE_CHANNELS, + SampleIteratorT, + CounterT, + PrivatizedDecodeOpT, + OutputDecodeOpT, + OffsetT> + AgentHistogramT; + + // Shared memory for AgentHistogram + __shared__ typename AgentHistogramT::TempStorage temp_storage; + + AgentHistogramT agent( + temp_storage, + d_samples, + num_output_bins_wrapper.array, + num_privatized_bins_wrapper.array, + d_output_histograms_wrapper.array, + d_privatized_histograms_wrapper.array, + output_decode_op_wrapper.array, + privatized_decode_op_wrapper.array); + + // Initialize counters + agent.InitBinCounters(); + + // Consume input tiles + agent.ConsumeTiles( + num_row_pixels, + num_rows, + row_stride_samples, + tiles_per_row, + tile_queue); + + // Store output to global (if necessary) + agent.StoreOutput(); + +} + + + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceHistogram + */ +template < + int NUM_CHANNELS, ///< Number of channels interleaved in the input data (may be greater than the number of channels being actively histogrammed) + int NUM_ACTIVE_CHANNELS, ///< Number of channels actively being histogrammed + typename SampleIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename CounterT, ///< Integer type for counting sample occurrences per histogram bin + typename LevelT, ///< Type for specifying bin level boundaries + typename OffsetT> ///< Signed integer type for global offsets +struct DipatchHistogram +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + /// The sample value type of the input iterator + typedef typename std::iterator_traits::value_type SampleT; + + enum + { + // Maximum number of bins per channel for which we will use a privatized smem strategy + MAX_PRIVATIZED_SMEM_BINS = 256 + }; + + + //--------------------------------------------------------------------- + // Transform functors for converting samples to bin-ids + //--------------------------------------------------------------------- + + // Searches for bin given a list of bin-boundary levels + template + struct SearchTransform + { + LevelIteratorT d_levels; // Pointer to levels array + int num_output_levels; // Number of levels in array + + // Initializer + __host__ __device__ __forceinline__ void Init( + LevelIteratorT d_levels, // Pointer to levels array + int num_output_levels) // Number of levels in array + { + this->d_levels = d_levels; + this->num_output_levels = num_output_levels; + } + + // Method for converting samples to bin-ids + template + __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) + { + /// Level iterator wrapper type + typedef typename If::VALUE, + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + LevelIteratorT>::Type // Directly use the supplied input iterator type + WrappedLevelIteratorT; + + WrappedLevelIteratorT wrapped_levels(d_levels); + + int num_bins = num_output_levels - 1; + if (valid) + { + bin = UpperBound(wrapped_levels, num_output_levels, (LevelT) sample) - 1; + if (bin >= num_bins) + bin = -1; + } + } + }; + + + // Scales samples to evenly-spaced bins + struct ScaleTransform + { + int num_bins; // Number of levels in array + LevelT max; // Max sample level (exclusive) + LevelT min; // Min sample level (inclusive) + LevelT scale; // Bin scaling factor + + // Initializer + template + __host__ __device__ __forceinline__ void Init( + int num_output_levels, // Number of levels in array + _LevelT max, // Max sample level (exclusive) + _LevelT min, // Min sample level (inclusive) + _LevelT scale) // Bin scaling factor + { + this->num_bins = num_output_levels - 1; + this->max = max; + this->min = min; + this->scale = scale; + } + + // Initializer (float specialization) + __host__ __device__ __forceinline__ void Init( + int num_output_levels, // Number of levels in array + float max, // Max sample level (exclusive) + float min, // Min sample level (inclusive) + float scale) // Bin scaling factor + { + this->num_bins = num_output_levels - 1; + this->max = max; + this->min = min; + this->scale = float(1.0) / scale; + } + + // Initializer (double specialization) + __host__ __device__ __forceinline__ void Init( + int num_output_levels, // Number of levels in array + double max, // Max sample level (exclusive) + double min, // Min sample level (inclusive) + double scale) // Bin scaling factor + { + this->num_bins = num_output_levels - 1; + this->max = max; + this->min = min; + this->scale = double(1.0) / scale; + } + + // Method for converting samples to bin-ids + template + __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) + { + LevelT level_sample = (LevelT) sample; + + if (valid && (level_sample >= min) && (level_sample < max)) + bin = (int) ((level_sample - min) / scale); + } + + // Method for converting samples to bin-ids (float specialization) + template + __host__ __device__ __forceinline__ void BinSelect(float sample, int &bin, bool valid) + { + LevelT level_sample = (LevelT) sample; + + if (valid && (level_sample >= min) && (level_sample < max)) + bin = (int) ((level_sample - min) * scale); + } + + // Method for converting samples to bin-ids (double specialization) + template + __host__ __device__ __forceinline__ void BinSelect(double sample, int &bin, bool valid) + { + LevelT level_sample = (LevelT) sample; + + if (valid && (level_sample >= min) && (level_sample < max)) + bin = (int) ((level_sample - min) * scale); + } + }; + + + // Pass-through bin transform operator + struct PassThruTransform + { + // Method for converting samples to bin-ids + template + __host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid) + { + if (valid) + bin = (int) sample; + } + }; + + + + //--------------------------------------------------------------------- + // Tuning policies + //--------------------------------------------------------------------- + + template + struct TScale + { + enum + { + V_SCALE = (sizeof(SampleT) + sizeof(int) - 1) / sizeof(int), + VALUE = CUB_MAX((NOMINAL_ITEMS_PER_THREAD / NUM_ACTIVE_CHANNELS / V_SCALE), 1) + }; + }; + + + /// SM11 + struct Policy110 + { + // HistogramSweepPolicy + typedef AgentHistogramPolicy< + 512, + (NUM_CHANNELS == 1) ? 8 : 2, + BLOCK_LOAD_DIRECT, + LOAD_DEFAULT, + true, + GMEM, + false> + HistogramSweepPolicy; + }; + + /// SM20 + struct Policy200 + { + // HistogramSweepPolicy + typedef AgentHistogramPolicy< + (NUM_CHANNELS == 1) ? 256 : 128, + (NUM_CHANNELS == 1) ? 8 : 3, + (NUM_CHANNELS == 1) ? BLOCK_LOAD_DIRECT : BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + true, + SMEM, + false> + HistogramSweepPolicy; + }; + + /// SM30 + struct Policy300 + { + // HistogramSweepPolicy + typedef AgentHistogramPolicy< + 512, + (NUM_CHANNELS == 1) ? 8 : 2, + BLOCK_LOAD_DIRECT, + LOAD_DEFAULT, + true, + GMEM, + false> + HistogramSweepPolicy; + }; + + /// SM35 + struct Policy350 + { + // HistogramSweepPolicy + typedef AgentHistogramPolicy< + 128, + TScale<8>::VALUE, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + true, + BLEND, + true> + HistogramSweepPolicy; + }; + + /// SM50 + struct Policy500 + { + // HistogramSweepPolicy + typedef AgentHistogramPolicy< + 384, + TScale<16>::VALUE, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + true, + SMEM, + false> + HistogramSweepPolicy; + }; + + + + //--------------------------------------------------------------------- + // Tuning policies of current PTX compiler pass + //--------------------------------------------------------------------- + +#if (CUB_PTX_ARCH >= 500) + typedef Policy500 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#else + typedef Policy110 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxHistogramSweepPolicy : PtxPolicy::HistogramSweepPolicy {}; + + + //--------------------------------------------------------------------- + // Utilities + //--------------------------------------------------------------------- + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t InitConfigs( + int ptx_version, + KernelConfig &histogram_sweep_config) + { + #if (CUB_PTX_ARCH > 0) + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + return histogram_sweep_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 500) + { + return histogram_sweep_config.template Init(); + } + else if (ptx_version >= 350) + { + return histogram_sweep_config.template Init(); + } + else if (ptx_version >= 300) + { + return histogram_sweep_config.template Init(); + } + else if (ptx_version >= 200) + { + return histogram_sweep_config.template Init(); + } + else if (ptx_version >= 110) + { + return histogram_sweep_config.template Init(); + } + else + { + // No global atomic support + return cudaErrorNotSupported; + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration + */ + struct KernelConfig + { + int block_threads; + int pixels_per_thread; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Init() + { + block_threads = BlockPolicy::BLOCK_THREADS; + pixels_per_thread = BlockPolicy::PIXELS_PER_THREAD; + + return cudaSuccess; + } + }; + + + //--------------------------------------------------------------------- + // Dispatch entrypoints + //--------------------------------------------------------------------- + + /** + * Privatization-based dispatch routine + */ + template < + typename PrivatizedDecodeOpT, ///< The transform operator type for determining privatized counter indices from samples, one for each channel + typename OutputDecodeOpT, ///< The transform operator type for determining output bin-ids from privatized counter indices, one for each channel + typename DeviceHistogramInitKernelT, ///< Function type of cub::DeviceHistogramInitKernel + typename DeviceHistogramSweepKernelT> ///< Function type of cub::DeviceHistogramSweepKernel + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t PrivatizedDispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. + int num_privatized_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel + int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS], ///< [in] Transform operators for determining bin-ids from samples, one for each channel + int max_num_output_bins, ///< [in] Maximum number of output bins in any channel + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest + DeviceHistogramInitKernelT histogram_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramInitKernel + DeviceHistogramSweepKernelT histogram_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceHistogramSweepKernel + KernelConfig histogram_sweep_config, ///< [in] Dispatch parameters that match the policy that \p histogram_sweep_kernel was compiled for + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + #ifndef CUB_RUNTIME_ENABLED + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported); + + #else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Get SM occupancy for histogram_sweep_kernel + int histogram_sweep_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + histogram_sweep_sm_occupancy, + histogram_sweep_kernel, + histogram_sweep_config.block_threads))) break; + + // Get device occupancy for histogram_sweep_kernel + int histogram_sweep_occupancy = histogram_sweep_sm_occupancy * sm_count; + + if (num_row_pixels * NUM_CHANNELS == row_stride_samples) + { + // Treat as a single linear array of samples + num_row_pixels *= num_rows; + num_rows = 1; + row_stride_samples = num_row_pixels * NUM_CHANNELS; + } + + // Get grid dimensions, trying to keep total blocks ~histogram_sweep_occupancy + int pixels_per_tile = histogram_sweep_config.block_threads * histogram_sweep_config.pixels_per_thread; + int tiles_per_row = int(num_row_pixels + pixels_per_tile - 1) / pixels_per_tile; + int blocks_per_row = CUB_MIN(histogram_sweep_occupancy, tiles_per_row); + int blocks_per_col = (blocks_per_row > 0) ? + int(CUB_MIN(histogram_sweep_occupancy / blocks_per_row, num_rows)) : + 0; + int num_thread_blocks = blocks_per_row * blocks_per_col; + + dim3 sweep_grid_dims; + sweep_grid_dims.x = (unsigned int) blocks_per_row; + sweep_grid_dims.y = (unsigned int) blocks_per_col; + sweep_grid_dims.z = 1; + + // Temporary storage allocation requirements + const int NUM_ALLOCATIONS = NUM_ACTIVE_CHANNELS + 1; + void* allocations[NUM_ALLOCATIONS]; + size_t allocation_sizes[NUM_ALLOCATIONS]; + + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + allocation_sizes[CHANNEL] = size_t(num_thread_blocks) * (num_privatized_levels[CHANNEL] - 1) * sizeof(CounterT); + + allocation_sizes[NUM_ALLOCATIONS - 1] = GridQueue::AllocationSize(); + + // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Construct the grid queue descriptor + GridQueue tile_queue(allocations[NUM_ALLOCATIONS - 1]); + + // Setup array wrapper for histogram channel output (because we can't pass static arrays as kernel parameters) + ArrayWrapper d_output_histograms_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + d_output_histograms_wrapper.array[CHANNEL] = d_output_histograms[CHANNEL]; + + // Setup array wrapper for privatized per-block histogram channel output (because we can't pass static arrays as kernel parameters) + ArrayWrapper d_privatized_histograms_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + d_privatized_histograms_wrapper.array[CHANNEL] = (CounterT*) allocations[CHANNEL]; + + // Setup array wrapper for sweep bin transforms (because we can't pass static arrays as kernel parameters) + ArrayWrapper privatized_decode_op_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + privatized_decode_op_wrapper.array[CHANNEL] = privatized_decode_op[CHANNEL]; + + // Setup array wrapper for aggregation bin transforms (because we can't pass static arrays as kernel parameters) + ArrayWrapper output_decode_op_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + output_decode_op_wrapper.array[CHANNEL] = output_decode_op[CHANNEL]; + + // Setup array wrapper for num privatized bins (because we can't pass static arrays as kernel parameters) + ArrayWrapper num_privatized_bins_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + num_privatized_bins_wrapper.array[CHANNEL] = num_privatized_levels[CHANNEL] - 1; + + // Setup array wrapper for num output bins (because we can't pass static arrays as kernel parameters) + ArrayWrapper num_output_bins_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + num_output_bins_wrapper.array[CHANNEL] = num_output_levels[CHANNEL] - 1; + + int histogram_init_block_threads = 256; + int histogram_init_grid_dims = (max_num_output_bins + histogram_init_block_threads - 1) / histogram_init_block_threads; + + // Log DeviceHistogramInitKernel configuration + if (debug_synchronous) _CubLog("Invoking DeviceHistogramInitKernel<<<%d, %d, 0, %lld>>>()\n", + histogram_init_grid_dims, histogram_init_block_threads, (long long) stream); + + // Invoke histogram_init_kernel + histogram_init_kernel<<>>( + num_output_bins_wrapper, + d_output_histograms_wrapper, + tile_queue); + + // Return if empty problem + if ((blocks_per_row == 0) || (blocks_per_col == 0)) + break; + + // Log histogram_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking histogram_sweep_kernel<<<{%d, %d, %d}, %d, 0, %lld>>>(), %d pixels per thread, %d SM occupancy\n", + sweep_grid_dims.x, sweep_grid_dims.y, sweep_grid_dims.z, + histogram_sweep_config.block_threads, (long long) stream, histogram_sweep_config.pixels_per_thread, histogram_sweep_sm_occupancy); + + // Invoke histogram_sweep_kernel + histogram_sweep_kernel<<>>( + d_samples, + num_output_bins_wrapper, + num_privatized_bins_wrapper, + d_output_histograms_wrapper, + d_privatized_histograms_wrapper, + output_decode_op_wrapper, + privatized_decode_op_wrapper, + num_row_pixels, + num_rows, + row_stride_samples, + tiles_per_row, + tile_queue); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + } + while (0); + + return error; + + #endif // CUB_RUNTIME_ENABLED + } + + + + /** + * Dispatch routine for HistogramRange, specialized for sample types larger than 8bit + */ + CUB_RUNTIME_FUNCTION + static cudaError_t DispatchRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. + int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel dispatch configurations + KernelConfig histogram_sweep_config; + if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) + break; + + // Use the search transform op for converting samples to privatized bins + typedef SearchTransform PrivatizedDecodeOpT; + + // Use the pass-thru transform op for converting privatized bins to output bins + typedef PassThruTransform OutputDecodeOpT; + + PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; + OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; + int max_levels = num_output_levels[0]; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + privatized_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); + if (num_output_levels[channel] > max_levels) + max_levels = num_output_levels[channel]; + } + int max_num_output_bins = max_levels - 1; + + // Dispatch + if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) + { + // Too many bins to keep in shared memory. + const int PRIVATIZED_SMEM_BINS = 0; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + } + else + { + // Dispatch shared-privatized approach + const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + } + + } while (0); + + return error; + } + + + /** + * Dispatch routine for HistogramRange, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) + */ + CUB_RUNTIME_FUNCTION + static cudaError_t DispatchRange( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. + int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel dispatch configurations + KernelConfig histogram_sweep_config; + if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) + break; + + // Use the pass-thru transform op for converting samples to privatized bins + typedef PassThruTransform PrivatizedDecodeOpT; + + // Use the search transform op for converting privatized bins to output bins + typedef SearchTransform OutputDecodeOpT; + + int num_privatized_levels[NUM_ACTIVE_CHANNELS]; + PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; + OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; + int max_levels = num_output_levels[0]; // Maximum number of levels in any channel + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + num_privatized_levels[channel] = 257; + output_decode_op[channel].Init(d_levels[channel], num_output_levels[channel]); + + if (num_output_levels[channel] > max_levels) + max_levels = num_output_levels[channel]; + } + int max_num_output_bins = max_levels - 1; + + const int PRIVATIZED_SMEM_BINS = 256; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_privatized_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + + } while (0); + + return error; + } + + + /** + * Dispatch routine for HistogramEven, specialized for sample types larger than 8-bit + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t DispatchEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. + int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel dispatch configurations + KernelConfig histogram_sweep_config; + if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) + break; + + // Use the scale transform op for converting samples to privatized bins + typedef ScaleTransform PrivatizedDecodeOpT; + + // Use the pass-thru transform op for converting privatized bins to output bins + typedef PassThruTransform OutputDecodeOpT; + + PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; + OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; + int max_levels = num_output_levels[0]; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + int bins = num_output_levels[channel] - 1; + LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; + + privatized_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); + + if (num_output_levels[channel] > max_levels) + max_levels = num_output_levels[channel]; + } + int max_num_output_bins = max_levels - 1; + + if (max_num_output_bins > MAX_PRIVATIZED_SMEM_BINS) + { + // Dispatch shared-privatized approach + const int PRIVATIZED_SMEM_BINS = 0; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + } + else + { + // Dispatch shared-privatized approach + const int PRIVATIZED_SMEM_BINS = MAX_PRIVATIZED_SMEM_BINS; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_output_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + } + } + while (0); + + return error; + } + + + /** + * Dispatch routine for HistogramEven, specialized for 8-bit sample types (computes 256-bin privatized histograms and then reduces to user-specified levels) + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t DispatchEven( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SampleIteratorT d_samples, ///< [in] The pointer to the input sequence of sample items. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT* d_output_histograms[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_output_levels[i] - 1. + int num_output_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of bin level boundaries for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_output_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_samples, ///< [in] The number of samples between starts of consecutive rows in the region of interest + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + Int2Type is_byte_sample) ///< [in] Marker type indicating whether or not SampleT is a 8b type + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel dispatch configurations + KernelConfig histogram_sweep_config; + if (CubDebug(error = InitConfigs(ptx_version, histogram_sweep_config))) + break; + + // Use the pass-thru transform op for converting samples to privatized bins + typedef PassThruTransform PrivatizedDecodeOpT; + + // Use the scale transform op for converting privatized bins to output bins + typedef ScaleTransform OutputDecodeOpT; + + int num_privatized_levels[NUM_ACTIVE_CHANNELS]; + PrivatizedDecodeOpT privatized_decode_op[NUM_ACTIVE_CHANNELS]; + OutputDecodeOpT output_decode_op[NUM_ACTIVE_CHANNELS]; + int max_levels = num_output_levels[0]; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + num_privatized_levels[channel] = 257; + + int bins = num_output_levels[channel] - 1; + LevelT scale = (upper_level[channel] - lower_level[channel]) / bins; + output_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale); + + if (num_output_levels[channel] > max_levels) + max_levels = num_output_levels[channel]; + } + int max_num_output_bins = max_levels - 1; + + const int PRIVATIZED_SMEM_BINS = 256; + + if (CubDebug(error = PrivatizedDispatch( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_output_histograms, + num_privatized_levels, + privatized_decode_op, + num_output_levels, + output_decode_op, + max_num_output_bins, + num_row_pixels, + num_rows, + row_stride_samples, + DeviceHistogramInitKernel, + DeviceHistogramSweepKernel, + histogram_sweep_config, + stream, + debug_synchronous))) break; + + } + while (0); + + return error; + } + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_radix_sort.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_radix_sort.cuh new file mode 100644 index 0000000..d1a992d --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_radix_sort.cuh @@ -0,0 +1,1619 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceRadixSort provides device-wide, parallel operations for computing a radix sort across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "../../agent/agent_radix_sort_upsweep.cuh" +#include "../../agent/agent_radix_sort_downsweep.cuh" +#include "../../agent/agent_scan.cuh" +#include "../../block/block_radix_sort.cuh" +#include "../../grid/grid_even_share.cuh" +#include "../../util_type.cuh" +#include "../../util_debug.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Upsweep digit-counting kernel entry point (multi-block). Computes privatized digit histograms, one per block. + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int((ALT_DIGIT_BITS) ? + ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS : + ChainedPolicyT::ActivePolicy::UpsweepPolicy::BLOCK_THREADS)) +__global__ void DeviceRadixSortUpsweepKernel( + const KeyT *d_keys, ///< [in] Input keys buffer + OffsetT *d_spine, ///< [out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) + OffsetT /*num_items*/, ///< [in] Total number of input data items + int current_bit, ///< [in] Bit position of current radix digit + int num_bits, ///< [in] Number of bits of current radix digit + GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block +{ + enum { + TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * + ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD + }; + + // Parameterize AgentRadixSortUpsweep type for the current configuration + typedef AgentRadixSortUpsweep< + typename If<(ALT_DIGIT_BITS), + typename ChainedPolicyT::ActivePolicy::AltUpsweepPolicy, + typename ChainedPolicyT::ActivePolicy::UpsweepPolicy>::Type, + KeyT, + OffsetT> + AgentRadixSortUpsweepT; + + // Shared memory storage + __shared__ typename AgentRadixSortUpsweepT::TempStorage temp_storage; + + // Initialize GRID_MAPPING_RAKE even-share descriptor for this thread block + even_share.template BlockInit(); + + AgentRadixSortUpsweepT upsweep(temp_storage, d_keys, current_bit, num_bits); + + upsweep.ProcessRegion(even_share.block_offset, even_share.block_end); + + CTA_SYNC(); + + // Write out digit counts (striped) + upsweep.template ExtractCounts(d_spine, gridDim.x, blockIdx.x); +} + + +/** + * Spine scan kernel entry point (single-block). Computes an exclusive prefix sum over the privatized digit histograms + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ScanPolicy::BLOCK_THREADS), 1) +__global__ void RadixSortScanBinsKernel( + OffsetT *d_spine, ///< [in,out] Privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) + int num_counts) ///< [in] Total number of bin-counts +{ + // Parameterize the AgentScan type for the current configuration + typedef AgentScan< + typename ChainedPolicyT::ActivePolicy::ScanPolicy, + OffsetT*, + OffsetT*, + cub::Sum, + OffsetT, + OffsetT> + AgentScanT; + + // Shared memory storage + __shared__ typename AgentScanT::TempStorage temp_storage; + + // Block scan instance + AgentScanT block_scan(temp_storage, d_spine, d_spine, cub::Sum(), OffsetT(0)) ; + + // Process full input tiles + int block_offset = 0; + BlockScanRunningPrefixOp prefix_op(0, Sum()); + while (block_offset + AgentScanT::TILE_ITEMS <= num_counts) + { + block_scan.template ConsumeTile(block_offset, prefix_op); + block_offset += AgentScanT::TILE_ITEMS; + } +} + + +/** + * Downsweep pass kernel entry point (multi-block). Scatters keys (and values) into corresponding bins for the current digit place. + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int((ALT_DIGIT_BITS) ? + ChainedPolicyT::ActivePolicy::AltDownsweepPolicy::BLOCK_THREADS : + ChainedPolicyT::ActivePolicy::DownsweepPolicy::BLOCK_THREADS)) +__global__ void DeviceRadixSortDownsweepKernel( + const KeyT *d_keys_in, ///< [in] Input keys buffer + KeyT *d_keys_out, ///< [in] Output keys buffer + const ValueT *d_values_in, ///< [in] Input values buffer + ValueT *d_values_out, ///< [in] Output values buffer + OffsetT *d_spine, ///< [in] Scan of privatized (per block) digit histograms (striped, i.e., 0s counts from each block, then 1s counts from each block, etc.) + OffsetT num_items, ///< [in] Total number of input data items + int current_bit, ///< [in] Bit position of current radix digit + int num_bits, ///< [in] Number of bits of current radix digit + GridEvenShare even_share) ///< [in] Even-share descriptor for mapan equal number of tiles onto each thread block +{ + enum { + TILE_ITEMS = ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::BLOCK_THREADS * + ChainedPolicyT::ActivePolicy::AltUpsweepPolicy::ITEMS_PER_THREAD + }; + + // Parameterize AgentRadixSortDownsweep type for the current configuration + typedef AgentRadixSortDownsweep< + typename If<(ALT_DIGIT_BITS), + typename ChainedPolicyT::ActivePolicy::AltDownsweepPolicy, + typename ChainedPolicyT::ActivePolicy::DownsweepPolicy>::Type, + IS_DESCENDING, + KeyT, + ValueT, + OffsetT> + AgentRadixSortDownsweepT; + + // Shared memory storage + __shared__ typename AgentRadixSortDownsweepT::TempStorage temp_storage; + + // Initialize even-share descriptor for this thread block + even_share.template BlockInit(); + + // Process input tiles + AgentRadixSortDownsweepT(temp_storage, num_items, d_spine, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, num_bits).ProcessRegion( + even_share.block_offset, + even_share.block_end); +} + + +/** + * Single pass kernel entry point (single-block). Fully sorts a tile of input. + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) +__global__ void DeviceRadixSortSingleTileKernel( + const KeyT *d_keys_in, ///< [in] Input keys buffer + KeyT *d_keys_out, ///< [in] Output keys buffer + const ValueT *d_values_in, ///< [in] Input values buffer + ValueT *d_values_out, ///< [in] Output values buffer + OffsetT num_items, ///< [in] Total number of input data items + int current_bit, ///< [in] Bit position of current radix digit + int end_bit) ///< [in] The past-the-end (most-significant) bit index needed for key comparison +{ + // Constants + enum + { + BLOCK_THREADS = ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS, + ITEMS_PER_THREAD = ChainedPolicyT::ActivePolicy::SingleTilePolicy::ITEMS_PER_THREAD, + KEYS_ONLY = Equals::VALUE, + }; + + // BlockRadixSort type + typedef BlockRadixSort< + KeyT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + ValueT, + ChainedPolicyT::ActivePolicy::SingleTilePolicy::RADIX_BITS, + (ChainedPolicyT::ActivePolicy::SingleTilePolicy::RANK_ALGORITHM == RADIX_RANK_MEMOIZE), + ChainedPolicyT::ActivePolicy::SingleTilePolicy::SCAN_ALGORITHM> + BlockRadixSortT; + + // BlockLoad type (keys) + typedef BlockLoad< + KeyT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadKeys; + + // BlockLoad type (values) + typedef BlockLoad< + ValueT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + ChainedPolicyT::ActivePolicy::SingleTilePolicy::LOAD_ALGORITHM> BlockLoadValues; + + // Unsigned word for key bits + typedef typename Traits::UnsignedBits UnsignedBitsT; + + // Shared memory storage + __shared__ union TempStorage + { + typename BlockRadixSortT::TempStorage sort; + typename BlockLoadKeys::TempStorage load_keys; + typename BlockLoadValues::TempStorage load_values; + + } temp_storage; + + // Keys and values for the block + KeyT keys[ITEMS_PER_THREAD]; + ValueT values[ITEMS_PER_THREAD]; + + // Get default (min/max) value for out-of-bounds keys + UnsignedBitsT default_key_bits = (IS_DESCENDING) ? Traits::LOWEST_KEY : Traits::MAX_KEY; + KeyT default_key = reinterpret_cast(default_key_bits); + + // Load keys + BlockLoadKeys(temp_storage.load_keys).Load(d_keys_in, keys, num_items, default_key); + + CTA_SYNC(); + + // Load values + if (!KEYS_ONLY) + { + // Register pressure work-around: moving num_items through shfl prevents compiler + // from reusing guards/addressing from prior guarded loads + num_items = ShuffleIndex(num_items, 0, 0xffffffff); + + BlockLoadValues(temp_storage.load_values).Load(d_values_in, values, num_items); + + CTA_SYNC(); + } + + // Sort tile + BlockRadixSortT(temp_storage.sort).SortBlockedToStriped( + keys, + values, + current_bit, + end_bit, + Int2Type(), + Int2Type()); + + // Store keys and values + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + int item_offset = ITEM * BLOCK_THREADS + threadIdx.x; + if (item_offset < num_items) + { + d_keys_out[item_offset] = keys[ITEM]; + if (!KEYS_ONLY) + d_values_out[item_offset] = values[ITEM]; + } + } +} + + +/** + * Segmented radix sorting pass (one block per segment) + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + bool ALT_DIGIT_BITS, ///< Whether or not to use the alternate (lower-bits) policy + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int((ALT_DIGIT_BITS) ? + ChainedPolicyT::ActivePolicy::AltSegmentedPolicy::BLOCK_THREADS : + ChainedPolicyT::ActivePolicy::SegmentedPolicy::BLOCK_THREADS)) +__global__ void DeviceSegmentedRadixSortKernel( + const KeyT *d_keys_in, ///< [in] Input keys buffer + KeyT *d_keys_out, ///< [in] Output keys buffer + const ValueT *d_values_in, ///< [in] Input values buffer + ValueT *d_values_out, ///< [in] Output values buffer + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data + int current_bit, ///< [in] Bit position of current radix digit + int pass_bits) ///< [in] Number of bits of current radix digit +{ + // + // Constants + // + + typedef typename If<(ALT_DIGIT_BITS), + typename ChainedPolicyT::ActivePolicy::AltSegmentedPolicy, + typename ChainedPolicyT::ActivePolicy::SegmentedPolicy>::Type SegmentedPolicyT; + + enum + { + BLOCK_THREADS = SegmentedPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = SegmentedPolicyT::ITEMS_PER_THREAD, + RADIX_BITS = SegmentedPolicyT::RADIX_BITS, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + RADIX_DIGITS = 1 << RADIX_BITS, + KEYS_ONLY = Equals::VALUE, + }; + + // Upsweep type + typedef AgentRadixSortUpsweep< + AgentRadixSortUpsweepPolicy, + KeyT, + OffsetT> + BlockUpsweepT; + + // Digit-scan type + typedef BlockScan DigitScanT; + + // Downsweep type + typedef AgentRadixSortDownsweep BlockDownsweepT; + + enum + { + /// Number of bin-starting offsets tracked per thread + BINS_TRACKED_PER_THREAD = BlockDownsweepT::BINS_TRACKED_PER_THREAD + }; + + // + // Process input tiles + // + + // Shared memory storage + __shared__ union + { + typename BlockUpsweepT::TempStorage upsweep; + typename BlockDownsweepT::TempStorage downsweep; + struct + { + volatile OffsetT reverse_counts_in[RADIX_DIGITS]; + volatile OffsetT reverse_counts_out[RADIX_DIGITS]; + typename DigitScanT::TempStorage scan; + }; + + } temp_storage; + + OffsetT segment_begin = d_begin_offsets[blockIdx.x]; + OffsetT segment_end = d_end_offsets[blockIdx.x]; + OffsetT num_items = segment_end - segment_begin; + + // Check if empty segment + if (num_items <= 0) + return; + + // Upsweep + BlockUpsweepT upsweep(temp_storage.upsweep, d_keys_in, current_bit, pass_bits); + upsweep.ProcessRegion(segment_begin, segment_end); + + CTA_SYNC(); + + // The count of each digit value in this pass (valid in the first RADIX_DIGITS threads) + OffsetT bin_count[BINS_TRACKED_PER_THREAD]; + upsweep.ExtractCounts(bin_count); + + CTA_SYNC(); + + if (IS_DESCENDING) + { + // Reverse bin counts + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + temp_storage.reverse_counts_in[bin_idx] = bin_count[track]; + } + + CTA_SYNC(); + + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + bin_count[track] = temp_storage.reverse_counts_in[RADIX_DIGITS - bin_idx - 1]; + } + } + + // Scan + OffsetT bin_offset[BINS_TRACKED_PER_THREAD]; // The global scatter base offset for each digit value in this pass (valid in the first RADIX_DIGITS threads) + DigitScanT(temp_storage.scan).ExclusiveSum(bin_count, bin_offset); + + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + bin_offset[track] += segment_begin; + } + + if (IS_DESCENDING) + { + // Reverse bin offsets + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + temp_storage.reverse_counts_out[threadIdx.x] = bin_offset[track]; + } + + CTA_SYNC(); + + #pragma unroll + for (int track = 0; track < BINS_TRACKED_PER_THREAD; ++track) + { + int bin_idx = (threadIdx.x * BINS_TRACKED_PER_THREAD) + track; + + if ((BLOCK_THREADS == RADIX_DIGITS) || (bin_idx < RADIX_DIGITS)) + bin_offset[track] = temp_storage.reverse_counts_out[RADIX_DIGITS - bin_idx - 1]; + } + } + + CTA_SYNC(); + + // Downsweep + BlockDownsweepT downsweep(temp_storage.downsweep, bin_offset, num_items, d_keys_in, d_keys_out, d_values_in, d_values_out, current_bit, pass_bits); + downsweep.ProcessRegion(segment_begin, segment_end); +} + + + +/****************************************************************************** + * Policy + ******************************************************************************/ + +/** + * Tuning policy for kernel specialization + */ +template < + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetT> ///< Signed integer type for global offsets +struct DeviceRadixSortPolicy +{ + //------------------------------------------------------------------------------ + // Constants + //------------------------------------------------------------------------------ + + enum + { + // Whether this is a keys-only (or key-value) sort + KEYS_ONLY = (Equals::VALUE), + }; + + // Dominant-sized key/value type + typedef typename If<(sizeof(ValueT) > 4) && (sizeof(KeyT) < sizeof(ValueT)), ValueT, KeyT>::Type DominantT; + + //------------------------------------------------------------------------------ + // Architecture-specific tuning policies + //------------------------------------------------------------------------------ + + /// SM20 + struct Policy200 : ChainedPolicy<200, Policy200, Policy200> + { + enum { + PRIMARY_RADIX_BITS = 5, + ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, + + // Relative size of KeyT type to a 4-byte word + SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4, + }; + + // Keys-only upsweep policies + typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; + typedef AgentRadixSortUpsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; + + // Key-value pairs upsweep policies + typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; + typedef AgentRadixSortUpsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; + + // Upsweep policies + typedef typename If::Type UpsweepPolicy; + typedef typename If::Type AltUpsweepPolicy; + + // Scan policy + typedef AgentScanPolicy <512, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Keys-only downsweep policies + typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; + typedef AgentRadixSortDownsweepPolicy <64, CUB_MAX(1, 18 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; + + // Key-value pairs downsweep policies + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 13 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; + + // Downsweep policies + typedef typename If::Type DownsweepPolicy; + typedef typename If::Type AltDownsweepPolicy; + + // Single-tile policy + typedef DownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef DownsweepPolicy SegmentedPolicy; + typedef AltDownsweepPolicy AltSegmentedPolicy; + }; + + /// SM30 + struct Policy300 : ChainedPolicy<300, Policy300, Policy200> + { + enum { + PRIMARY_RADIX_BITS = 5, + ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, + + // Relative size of KeyT type to a 4-byte word + SCALE_FACTOR_4B = (CUB_MAX(sizeof(KeyT), sizeof(ValueT)) + 3) / 4, + }; + + // Keys-only upsweep policies + typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyKeys; + typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 7 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyKeys; + + // Key-value pairs upsweep policies + typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, PRIMARY_RADIX_BITS> UpsweepPolicyPairs; + typedef AgentRadixSortUpsweepPolicy <256, CUB_MAX(1, 5 / SCALE_FACTOR_4B), LOAD_DEFAULT, ALT_RADIX_BITS> AltUpsweepPolicyPairs; + + // Upsweep policies + typedef typename If::Type UpsweepPolicy; + typedef typename If::Type AltUpsweepPolicy; + + // Scan policy + typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; + + // Keys-only downsweep policies + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyKeys; + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 14 / SCALE_FACTOR_4B), BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyKeys; + + // Key-value pairs downsweep policies + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, PRIMARY_RADIX_BITS> DownsweepPolicyPairs; + typedef AgentRadixSortDownsweepPolicy <128, CUB_MAX(1, 10 / SCALE_FACTOR_4B), BLOCK_LOAD_TRANSPOSE, LOAD_DEFAULT, RADIX_RANK_BASIC, BLOCK_SCAN_WARP_SCANS, ALT_RADIX_BITS> AltDownsweepPolicyPairs; + + // Downsweep policies + typedef typename If::Type DownsweepPolicy; + typedef typename If::Type AltDownsweepPolicy; + + // Single-tile policy + typedef DownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef DownsweepPolicy SegmentedPolicy; + typedef AltDownsweepPolicy AltSegmentedPolicy; + }; + + + /// SM35 + struct Policy350 : ChainedPolicy<350, Policy350, Policy300> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 1.72B 32b keys/s, 1.17B 32b pairs/s, 1.55B 32b segmented keys/s (K40m) + }; + + // Scan policy + typedef AgentScanPolicy <1024, 4, BLOCK_LOAD_VECTORIZE, LOAD_DEFAULT, BLOCK_STORE_VECTORIZE, BLOCK_SCAN_WARP_SCANS> ScanPolicy; + + // Keys-only downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicyKeys; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicyKeys; + + // Key-value pairs downsweep policies + typedef DownsweepPolicyKeys DownsweepPolicyPairs; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicyPairs; + + // Downsweep policies + typedef typename If::Type DownsweepPolicy; + typedef typename If::Type AltDownsweepPolicy; + + // Upsweep policies + typedef DownsweepPolicy UpsweepPolicy; + typedef AltDownsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef DownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef DownsweepPolicy SegmentedPolicy; + typedef AltDownsweepPolicy AltSegmentedPolicy; + + + }; + + + /// SM50 + struct Policy500 : ChainedPolicy<500, Policy500, Policy350> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 3.5B 32b keys/s, 1.92B 32b pairs/s (TitanX) + SINGLE_TILE_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, + SEGMENTED_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 3.1B 32b segmented keys/s (TitanX) + }; + + // ScanPolicy + typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicy; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicy; + + // Upsweep policies + typedef DownsweepPolicy UpsweepPolicy; + typedef AltDownsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef AgentRadixSortDownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef AgentRadixSortDownsweepPolicy SegmentedPolicy; + typedef AgentRadixSortDownsweepPolicy AltSegmentedPolicy; + }; + + + /// SM60 (GP100) + struct Policy600 : ChainedPolicy<600, Policy600, Policy500> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 6.9B 32b keys/s (Quadro P100) + SINGLE_TILE_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, + SEGMENTED_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 5.9B 32b segmented keys/s (Quadro P100) + }; + + // ScanPolicy + typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicy; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicy; + + // Upsweep policies + typedef DownsweepPolicy UpsweepPolicy; + typedef AltDownsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef AgentRadixSortDownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef AgentRadixSortDownsweepPolicy SegmentedPolicy; + typedef AgentRadixSortDownsweepPolicy AltSegmentedPolicy; + + }; + + + /// SM61 (GP104) + struct Policy610 : ChainedPolicy<610, Policy610, Policy600> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 3.4B 32b keys/s, 1.83B 32b pairs/s (1080) + SINGLE_TILE_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, + SEGMENTED_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 3.3B 32b segmented keys/s (1080) + }; + + // ScanPolicy + typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicy; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicy; + + // Upsweep policies + typedef AgentRadixSortUpsweepPolicy UpsweepPolicy; + typedef AgentRadixSortUpsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef AgentRadixSortDownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef AgentRadixSortDownsweepPolicy SegmentedPolicy; + typedef AgentRadixSortDownsweepPolicy AltSegmentedPolicy; + }; + + + /// SM62 (Tegra, less RF) + struct Policy620 : ChainedPolicy<620, Policy620, Policy610> + { + enum { + PRIMARY_RADIX_BITS = 5, + ALT_RADIX_BITS = PRIMARY_RADIX_BITS - 1, + }; + + // ScanPolicy + typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicy; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicy; + + // Upsweep policies + typedef DownsweepPolicy UpsweepPolicy; + typedef AltDownsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef AgentRadixSortDownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef DownsweepPolicy SegmentedPolicy; + typedef AltDownsweepPolicy AltSegmentedPolicy; + }; + + + /// SM70 (GV100) + struct Policy700 : ChainedPolicy<700, Policy700, Policy620> + { + enum { + PRIMARY_RADIX_BITS = (sizeof(KeyT) > 1) ? 7 : 5, // 7.62B 32b keys/s (GV100) + SINGLE_TILE_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, + SEGMENTED_RADIX_BITS = (sizeof(KeyT) > 1) ? 6 : 5, // 8.7B 32b segmented keys/s (GV100) + }; + + // ScanPolicy + typedef AgentScanPolicy <512, 23, BLOCK_LOAD_WARP_TRANSPOSE, LOAD_DEFAULT, BLOCK_STORE_WARP_TRANSPOSE, BLOCK_SCAN_RAKING_MEMOIZE> ScanPolicy; + + // Downsweep policies + typedef AgentRadixSortDownsweepPolicy DownsweepPolicy; + typedef AgentRadixSortDownsweepPolicy AltDownsweepPolicy; + + // Upsweep policies + typedef DownsweepPolicy UpsweepPolicy; + typedef AltDownsweepPolicy AltUpsweepPolicy; + + // Single-tile policy + typedef AgentRadixSortDownsweepPolicy SingleTilePolicy; + + // Segmented policies + typedef AgentRadixSortDownsweepPolicy SegmentedPolicy; + typedef AgentRadixSortDownsweepPolicy AltSegmentedPolicy; + }; + + + /// MaxPolicy + typedef Policy700 MaxPolicy; + + +}; + + + +/****************************************************************************** + * Single-problem dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for device-wide radix sort + */ +template < + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetT> ///< Signed integer type for global offsets +struct DispatchRadixSort : + DeviceRadixSortPolicy +{ + //------------------------------------------------------------------------------ + // Constants + //------------------------------------------------------------------------------ + + enum + { + // Whether this is a keys-only (or key-value) sort + KEYS_ONLY = (Equals::VALUE), + }; + + + //------------------------------------------------------------------------------ + // Problem state + //------------------------------------------------------------------------------ + + void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + OffsetT num_items; ///< [in] Number of items to sort + int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison + int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison + cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int ptx_version; ///< [in] PTX version + bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers + + + //------------------------------------------------------------------------------ + // Constructor + //------------------------------------------------------------------------------ + + /// Constructor + CUB_RUNTIME_FUNCTION __forceinline__ + DispatchRadixSort( + void* d_temp_storage, + size_t &temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + OffsetT num_items, + int begin_bit, + int end_bit, + bool is_overwrite_okay, + cudaStream_t stream, + bool debug_synchronous, + int ptx_version) + : + d_temp_storage(d_temp_storage), + temp_storage_bytes(temp_storage_bytes), + d_keys(d_keys), + d_values(d_values), + num_items(num_items), + begin_bit(begin_bit), + end_bit(end_bit), + stream(stream), + debug_synchronous(debug_synchronous), + ptx_version(ptx_version), + is_overwrite_okay(is_overwrite_okay) + {} + + + //------------------------------------------------------------------------------ + // Small-problem (single tile) invocation + //------------------------------------------------------------------------------ + + /// Invoke a single block to sort in-core + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename SingleTileKernelT> ///< Function type of cub::DeviceRadixSortSingleTileKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokeSingleTile( + SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortSingleTileKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void)single_tile_kernel; + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + cudaError error = cudaSuccess; + do + { + // Return if the caller is simply requesting the size of the storage allocation + if (d_temp_storage == NULL) + { + temp_storage_bytes = 1; + break; + } + + // Return if empty problem + if (num_items == 0) + break; + + // Log single_tile_kernel configuration + if (debug_synchronous) + _CubLog("Invoking single_tile_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", + 1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, (long long) stream, + ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD, 1, begin_bit, ActivePolicyT::SingleTilePolicy::RADIX_BITS); + + // Invoke upsweep_kernel with same grid size as downsweep_kernel + single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( + d_keys.Current(), + d_keys.Alternate(), + d_values.Current(), + d_values.Alternate(), + num_items, + begin_bit, + end_bit); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Update selector + d_keys.selector ^= 1; + d_values.selector ^= 1; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + //------------------------------------------------------------------------------ + // Normal problem size invocation + //------------------------------------------------------------------------------ + + /** + * Invoke a three-kernel sorting pass at the current bit. + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePass( + const KeyT *d_keys_in, + KeyT *d_keys_out, + const ValueT *d_values_in, + ValueT *d_values_out, + OffsetT *d_spine, + int spine_length, + int ¤t_bit, + PassConfigT &pass_config) + { + cudaError error = cudaSuccess; + do + { + int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); + + // Log upsweep_kernel configuration + if (debug_synchronous) + _CubLog("Invoking upsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", + pass_config.even_share.grid_size, pass_config.upsweep_config.block_threads, (long long) stream, + pass_config.upsweep_config.items_per_thread, pass_config.upsweep_config.sm_occupancy, current_bit, pass_bits); + + // Invoke upsweep_kernel with same grid size as downsweep_kernel + pass_config.upsweep_kernel<<>>( + d_keys_in, + d_spine, + num_items, + current_bit, + pass_bits, + pass_config.even_share); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Log scan_kernel configuration + if (debug_synchronous) _CubLog("Invoking scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", + 1, pass_config.scan_config.block_threads, (long long) stream, pass_config.scan_config.items_per_thread); + + // Invoke scan_kernel + pass_config.scan_kernel<<<1, pass_config.scan_config.block_threads, 0, stream>>>( + d_spine, + spine_length); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Log downsweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking downsweep_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + pass_config.even_share.grid_size, pass_config.downsweep_config.block_threads, (long long) stream, + pass_config.downsweep_config.items_per_thread, pass_config.downsweep_config.sm_occupancy); + + // Invoke downsweep_kernel + pass_config.downsweep_kernel<<>>( + d_keys_in, + d_keys_out, + d_values_in, + d_values_out, + d_spine, + num_items, + current_bit, + pass_bits, + pass_config.even_share); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Update current bit + current_bit += pass_bits; + } + while (0); + + return error; + } + + + + /// Pass configuration structure + template < + typename UpsweepKernelT, + typename ScanKernelT, + typename DownsweepKernelT> + struct PassConfig + { + UpsweepKernelT upsweep_kernel; + KernelConfig upsweep_config; + ScanKernelT scan_kernel; + KernelConfig scan_config; + DownsweepKernelT downsweep_kernel; + KernelConfig downsweep_config; + int radix_bits; + int radix_digits; + int max_downsweep_grid_size; + GridEvenShare even_share; + + /// Initialize pass configuration + template < + typename UpsweepPolicyT, + typename ScanPolicyT, + typename DownsweepPolicyT> + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InitPassConfig( + UpsweepKernelT upsweep_kernel, + ScanKernelT scan_kernel, + DownsweepKernelT downsweep_kernel, + int ptx_version, + int sm_count, + int num_items) + { + cudaError error = cudaSuccess; + do + { + this->upsweep_kernel = upsweep_kernel; + this->scan_kernel = scan_kernel; + this->downsweep_kernel = downsweep_kernel; + radix_bits = DownsweepPolicyT::RADIX_BITS; + radix_digits = 1 << radix_bits; + + if (CubDebug(error = upsweep_config.Init(upsweep_kernel))) break; + if (CubDebug(error = scan_config.Init(scan_kernel))) break; + if (CubDebug(error = downsweep_config.Init(downsweep_kernel))) break; + + max_downsweep_grid_size = (downsweep_config.sm_occupancy * sm_count) * CUB_SUBSCRIPTION_FACTOR(ptx_version); + + even_share.DispatchInit( + num_items, + max_downsweep_grid_size, + CUB_MAX(downsweep_config.tile_size, upsweep_config.tile_size)); + + } + while (0); + return error; + } + + }; + + + /// Invocation (run multiple digit passes) + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename UpsweepKernelT, ///< Function type of cub::DeviceRadixSortUpsweepKernel + typename ScanKernelT, ///< Function type of cub::SpineScanKernel + typename DownsweepKernelT> ///< Function type of cub::DeviceRadixSortDownsweepKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePasses( + UpsweepKernelT upsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel + UpsweepKernelT alt_upsweep_kernel, ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortUpsweepKernel + ScanKernelT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::SpineScanKernel + DownsweepKernelT downsweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel + DownsweepKernelT alt_downsweep_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceRadixSortDownsweepKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void)upsweep_kernel; + (void)alt_upsweep_kernel; + (void)scan_kernel; + (void)downsweep_kernel; + (void)alt_downsweep_kernel; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Init regular and alternate-digit kernel configurations + PassConfig pass_config, alt_pass_config; + if ((error = pass_config.template InitPassConfig< + typename ActivePolicyT::UpsweepPolicy, + typename ActivePolicyT::ScanPolicy, + typename ActivePolicyT::DownsweepPolicy>( + upsweep_kernel, scan_kernel, downsweep_kernel, ptx_version, sm_count, num_items))) break; + + if ((error = alt_pass_config.template InitPassConfig< + typename ActivePolicyT::AltUpsweepPolicy, + typename ActivePolicyT::ScanPolicy, + typename ActivePolicyT::AltDownsweepPolicy>( + alt_upsweep_kernel, scan_kernel, alt_downsweep_kernel, ptx_version, sm_count, num_items))) break; + + // Get maximum spine length + int max_grid_size = CUB_MAX(pass_config.max_downsweep_grid_size, alt_pass_config.max_downsweep_grid_size); + int spine_length = (max_grid_size * pass_config.radix_digits) + pass_config.scan_config.tile_size; + + // Temporary storage allocation requirements + void* allocations[3]; + size_t allocation_sizes[3] = + { + spine_length * sizeof(OffsetT), // bytes needed for privatized block digit histograms + (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer + (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer + }; + + // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + + // Return if the caller is simply requesting the size of the storage allocation + if (d_temp_storage == NULL) + return cudaSuccess; + + // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size + int num_bits = end_bit - begin_bit; + int num_passes = (num_bits + pass_config.radix_bits - 1) / pass_config.radix_bits; + bool is_num_passes_odd = num_passes & 1; + int max_alt_passes = (num_passes * pass_config.radix_bits) - num_bits; + int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_pass_config.radix_bits)); + + // Alias the temporary storage allocations + OffsetT *d_spine = static_cast(allocations[0]); + + DoubleBuffer d_keys_remaining_passes( + (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[1]), + (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_keys.Alternate()); + + DoubleBuffer d_values_remaining_passes( + (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[2]), + (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[2]) : d_values.Alternate()); + + // Run first pass, consuming from the input's current buffers + int current_bit = begin_bit; + if (CubDebug(error = InvokePass( + d_keys.Current(), d_keys_remaining_passes.Current(), + d_values.Current(), d_values_remaining_passes.Current(), + d_spine, spine_length, current_bit, + (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; + + // Run remaining passes + while (current_bit < end_bit) + { + if (CubDebug(error = InvokePass( + d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], + d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], + d_spine, spine_length, current_bit, + (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break;; + + // Invert selectors + d_keys_remaining_passes.selector ^= 1; + d_values_remaining_passes.selector ^= 1; + } + + // Update selector + if (!is_overwrite_okay) { + num_passes = 1; // Sorted data always ends up in the other vector + } + + d_keys.selector = (d_keys.selector + num_passes) & 1; + d_values.selector = (d_values.selector + num_passes) & 1; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + //------------------------------------------------------------------------------ + // Chained policy invocation + //------------------------------------------------------------------------------ + + /// Invocation + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Invoke() + { + typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; + typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; + + // Force kernel code-generation in all compiler passes + if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) + { + // Small, single tile size + return InvokeSingleTile( + DeviceRadixSortSingleTileKernel); + } + else + { + // Regular size + return InvokePasses( + DeviceRadixSortUpsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, OffsetT>, + DeviceRadixSortUpsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, OffsetT>, + RadixSortScanBinsKernel< MaxPolicyT, OffsetT>, + DeviceRadixSortDownsweepKernel< MaxPolicyT, false, IS_DESCENDING, KeyT, ValueT, OffsetT>, + DeviceRadixSortDownsweepKernel< MaxPolicyT, true, IS_DESCENDING, KeyT, ValueT, OffsetT>); + } + } + + + //------------------------------------------------------------------------------ + // Dispatch entrypoints + //------------------------------------------------------------------------------ + + /** + * Internal dispatch routine + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + OffsetT num_items, ///< [in] Number of items to sort + int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison + int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison + bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + typedef typename DispatchRadixSort::MaxPolicy MaxPolicyT; + + cudaError_t error; + do { + // Get PTX version + int ptx_version; + if (CubDebug(error = PtxVersion(ptx_version))) break; + + // Create dispatch functor + DispatchRadixSort dispatch( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, begin_bit, end_bit, is_overwrite_okay, + stream, debug_synchronous, ptx_version); + + // Dispatch to chained policy + if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; + + } while (0); + + return error; + } +}; + + + + +/****************************************************************************** + * Segmented dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for segmented device-wide radix sort + */ +template < + bool IS_DESCENDING, ///< Whether or not the sorted-order is high-to-low + typename KeyT, ///< Key type + typename ValueT, ///< Value type + typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator + typename OffsetT> ///< Signed integer type for global offsets +struct DispatchSegmentedRadixSort : + DeviceRadixSortPolicy +{ + //------------------------------------------------------------------------------ + // Constants + //------------------------------------------------------------------------------ + + enum + { + // Whether this is a keys-only (or key-value) sort + KEYS_ONLY = (Equals::VALUE), + }; + + + //------------------------------------------------------------------------------ + // Parameter members + //------------------------------------------------------------------------------ + + void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys; ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values; ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + OffsetT num_items; ///< [in] Number of items to sort + OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit; ///< [in] The beginning (least-significant) bit index needed for key comparison + int end_bit; ///< [in] The past-the-end (most-significant) bit index needed for key comparison + cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int ptx_version; ///< [in] PTX version + bool is_overwrite_okay; ///< [in] Whether is okay to overwrite source buffers + + + //------------------------------------------------------------------------------ + // Constructors + //------------------------------------------------------------------------------ + + /// Constructor + CUB_RUNTIME_FUNCTION __forceinline__ + DispatchSegmentedRadixSort( + void* d_temp_storage, + size_t &temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + OffsetT num_items, + OffsetT num_segments, + OffsetIteratorT d_begin_offsets, + OffsetIteratorT d_end_offsets, + int begin_bit, + int end_bit, + bool is_overwrite_okay, + cudaStream_t stream, + bool debug_synchronous, + int ptx_version) + : + d_temp_storage(d_temp_storage), + temp_storage_bytes(temp_storage_bytes), + d_keys(d_keys), + d_values(d_values), + num_items(num_items), + num_segments(num_segments), + d_begin_offsets(d_begin_offsets), + d_end_offsets(d_end_offsets), + begin_bit(begin_bit), + end_bit(end_bit), + is_overwrite_okay(is_overwrite_okay), + stream(stream), + debug_synchronous(debug_synchronous), + ptx_version(ptx_version) + {} + + + //------------------------------------------------------------------------------ + // Multi-segment invocation + //------------------------------------------------------------------------------ + + /// Invoke a three-kernel sorting pass at the current bit. + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePass( + const KeyT *d_keys_in, + KeyT *d_keys_out, + const ValueT *d_values_in, + ValueT *d_values_out, + int ¤t_bit, + PassConfigT &pass_config) + { + cudaError error = cudaSuccess; + do + { + int pass_bits = CUB_MIN(pass_config.radix_bits, (end_bit - current_bit)); + + // Log kernel configuration + if (debug_synchronous) + _CubLog("Invoking segmented_kernels<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy, current bit %d, bit_grain %d\n", + num_segments, pass_config.segmented_config.block_threads, (long long) stream, + pass_config.segmented_config.items_per_thread, pass_config.segmented_config.sm_occupancy, current_bit, pass_bits); + + pass_config.segmented_kernel<<>>( + d_keys_in, d_keys_out, + d_values_in, d_values_out, + d_begin_offsets, d_end_offsets, num_segments, + current_bit, pass_bits); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Update current bit + current_bit += pass_bits; + } + while (0); + + return error; + } + + + /// PassConfig data structure + template + struct PassConfig + { + SegmentedKernelT segmented_kernel; + KernelConfig segmented_config; + int radix_bits; + int radix_digits; + + /// Initialize pass configuration + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InitPassConfig(SegmentedKernelT segmented_kernel) + { + this->segmented_kernel = segmented_kernel; + this->radix_bits = SegmentedPolicyT::RADIX_BITS; + this->radix_digits = 1 << radix_bits; + + return CubDebug(segmented_config.Init(segmented_kernel)); + } + }; + + + /// Invocation (run multiple digit passes) + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename SegmentedKernelT> ///< Function type of cub::DeviceSegmentedRadixSortKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePasses( + SegmentedKernelT segmented_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel + SegmentedKernelT alt_segmented_kernel) ///< [in] Alternate kernel function pointer to parameterization of cub::DeviceSegmentedRadixSortKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void)segmented_kernel; + (void)alt_segmented_kernel; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + + cudaError error = cudaSuccess; + do + { + // Init regular and alternate kernel configurations + PassConfig pass_config, alt_pass_config; + if ((error = pass_config.template InitPassConfig(segmented_kernel))) break; + if ((error = alt_pass_config.template InitPassConfig(alt_segmented_kernel))) break; + + // Temporary storage allocation requirements + void* allocations[2]; + size_t allocation_sizes[2] = + { + (is_overwrite_okay) ? 0 : num_items * sizeof(KeyT), // bytes needed for 3rd keys buffer + (is_overwrite_okay || (KEYS_ONLY)) ? 0 : num_items * sizeof(ValueT), // bytes needed for 3rd values buffer + }; + + // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + + // Return if the caller is simply requesting the size of the storage allocation + if (d_temp_storage == NULL) + { + if (temp_storage_bytes == 0) + temp_storage_bytes = 1; + return cudaSuccess; + } + + // Pass planning. Run passes of the alternate digit-size configuration until we have an even multiple of our preferred digit size + int radix_bits = ActivePolicyT::SegmentedPolicy::RADIX_BITS; + int alt_radix_bits = ActivePolicyT::AltSegmentedPolicy::RADIX_BITS; + int num_bits = end_bit - begin_bit; + int num_passes = (num_bits + radix_bits - 1) / radix_bits; + bool is_num_passes_odd = num_passes & 1; + int max_alt_passes = (num_passes * radix_bits) - num_bits; + int alt_end_bit = CUB_MIN(end_bit, begin_bit + (max_alt_passes * alt_radix_bits)); + + DoubleBuffer d_keys_remaining_passes( + (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate() : static_cast(allocations[0]), + (is_overwrite_okay) ? d_keys.Current() : (is_num_passes_odd) ? static_cast(allocations[0]) : d_keys.Alternate()); + + DoubleBuffer d_values_remaining_passes( + (is_overwrite_okay || is_num_passes_odd) ? d_values.Alternate() : static_cast(allocations[1]), + (is_overwrite_okay) ? d_values.Current() : (is_num_passes_odd) ? static_cast(allocations[1]) : d_values.Alternate()); + + // Run first pass, consuming from the input's current buffers + int current_bit = begin_bit; + + if (CubDebug(error = InvokePass( + d_keys.Current(), d_keys_remaining_passes.Current(), + d_values.Current(), d_values_remaining_passes.Current(), + current_bit, + (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; + + // Run remaining passes + while (current_bit < end_bit) + { + if (CubDebug(error = InvokePass( + d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_keys_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], + d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector], d_values_remaining_passes.d_buffers[d_keys_remaining_passes.selector ^ 1], + current_bit, + (current_bit < alt_end_bit) ? alt_pass_config : pass_config))) break; + + // Invert selectors and update current bit + d_keys_remaining_passes.selector ^= 1; + d_values_remaining_passes.selector ^= 1; + } + + // Update selector + if (!is_overwrite_okay) { + num_passes = 1; // Sorted data always ends up in the other vector + } + + d_keys.selector = (d_keys.selector + num_passes) & 1; + d_values.selector = (d_values.selector + num_passes) & 1; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + //------------------------------------------------------------------------------ + // Chained policy invocation + //------------------------------------------------------------------------------ + + /// Invocation + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Invoke() + { + typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; + + // Force kernel code-generation in all compiler passes + return InvokePasses( + DeviceSegmentedRadixSortKernel, + DeviceSegmentedRadixSortKernel); + } + + + //------------------------------------------------------------------------------ + // Dispatch entrypoints + //------------------------------------------------------------------------------ + + + /// Internal dispatch routine + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + DoubleBuffer &d_keys, ///< [in,out] Double-buffer whose current buffer contains the unsorted input keys and, upon return, is updated to point to the sorted output keys + DoubleBuffer &d_values, ///< [in,out] Double-buffer whose current buffer contains the unsorted input values and, upon return, is updated to point to the sorted output values + int num_items, ///< [in] Number of items to sort + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int begin_bit, ///< [in] The beginning (least-significant) bit index needed for key comparison + int end_bit, ///< [in] The past-the-end (most-significant) bit index needed for key comparison + bool is_overwrite_okay, ///< [in] Whether is okay to overwrite source buffers + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + typedef typename DispatchSegmentedRadixSort::MaxPolicy MaxPolicyT; + + cudaError_t error; + do { + // Get PTX version + int ptx_version; + if (CubDebug(error = PtxVersion(ptx_version))) break; + + // Create dispatch functor + DispatchSegmentedRadixSort dispatch( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, num_segments, d_begin_offsets, d_end_offsets, + begin_bit, end_bit, is_overwrite_okay, + stream, debug_synchronous, ptx_version); + + // Dispatch to chained policy + if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; + + } while (0); + + return error; + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce.cuh new file mode 100644 index 0000000..e9d1b7a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce.cuh @@ -0,0 +1,882 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceReduce provides device-wide, parallel operations for computing a reduction across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "../../agent/agent_reduce.cuh" +#include "../../iterator/arg_index_input_iterator.cuh" +#include "../../thread/thread_operators.cuh" +#include "../../grid/grid_even_share.cuh" +#include "../../iterator/arg_index_input_iterator.cuh" +#include "../../util_debug.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Reduce region kernel entry point (multi-block). Computes privatized reductions, one per thread block. + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) +__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) +__global__ void DeviceReduceKernel( + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + OffsetT num_items, ///< [in] Total number of input data items + GridEvenShare even_share, ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block + ReductionOpT reduction_op) ///< [in] Binary reduction functor +{ + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Thread block type for reducing input tiles + typedef AgentReduce< + typename ChainedPolicyT::ActivePolicy::ReducePolicy, + InputIteratorT, + OutputIteratorT, + OffsetT, + ReductionOpT> + AgentReduceT; + + // Shared memory storage + __shared__ typename AgentReduceT::TempStorage temp_storage; + + // Consume input tiles + OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeTiles(even_share); + + // Output result + if (threadIdx.x == 0) + d_out[blockIdx.x] = block_aggregate; +} + + +/** + * Reduce a single tile kernel entry point (single-block). Can be used to aggregate privatized thread block reductions from a previous multi-block reduction pass. + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) + typename OuputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT +__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::SingleTilePolicy::BLOCK_THREADS), 1) +__global__ void DeviceReduceSingleTileKernel( + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + OffsetT num_items, ///< [in] Total number of input data items + ReductionOpT reduction_op, ///< [in] Binary reduction functor + OuputT init) ///< [in] The initial value of the reduction +{ + // Thread block type for reducing input tiles + typedef AgentReduce< + typename ChainedPolicyT::ActivePolicy::SingleTilePolicy, + InputIteratorT, + OutputIteratorT, + OffsetT, + ReductionOpT> + AgentReduceT; + + // Shared memory storage + __shared__ typename AgentReduceT::TempStorage temp_storage; + + // Check if empty problem + if (num_items == 0) + { + if (threadIdx.x == 0) + *d_out = init; + return; + } + + // Consume input tiles + OuputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( + OffsetT(0), + num_items); + + // Output result + if (threadIdx.x == 0) + *d_out = reduction_op(init, block_aggregate); +} + + +/// Normalize input iterator to segment offset +template +__device__ __forceinline__ +void NormalizeReductionOutput( + T &/*val*/, + OffsetT /*base_offset*/, + IteratorT /*itr*/) +{} + + +/// Normalize input iterator to segment offset (specialized for arg-index) +template +__device__ __forceinline__ +void NormalizeReductionOutput( + KeyValuePairT &val, + OffsetT base_offset, + ArgIndexInputIterator /*itr*/) +{ + val.key -= base_offset; +} + + +/** + * Segmented reduction (one block per segment) + */ +template < + typename ChainedPolicyT, ///< Chained tuning policy + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator + typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT, ///< Binary reduction functor type having member T operator()(const T &a, const T &b) + typename OutputT> ///< Data element type that is convertible to the \p value type of \p OutputIteratorT +__launch_bounds__ (int(ChainedPolicyT::ActivePolicy::ReducePolicy::BLOCK_THREADS)) +__global__ void DeviceSegmentedReduceKernel( + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + int /*num_segments*/, ///< [in] The number of segments that comprise the sorting data + ReductionOpT reduction_op, ///< [in] Binary reduction functor + OutputT init) ///< [in] The initial value of the reduction +{ + // Thread block type for reducing input tiles + typedef AgentReduce< + typename ChainedPolicyT::ActivePolicy::ReducePolicy, + InputIteratorT, + OutputIteratorT, + OffsetT, + ReductionOpT> + AgentReduceT; + + // Shared memory storage + __shared__ typename AgentReduceT::TempStorage temp_storage; + + OffsetT segment_begin = d_begin_offsets[blockIdx.x]; + OffsetT segment_end = d_end_offsets[blockIdx.x]; + + // Check if empty problem + if (segment_begin == segment_end) + { + if (threadIdx.x == 0) + d_out[blockIdx.x] = init; + return; + } + + // Consume input tiles + OutputT block_aggregate = AgentReduceT(temp_storage, d_in, reduction_op).ConsumeRange( + segment_begin, + segment_end); + + // Normalize as needed + NormalizeReductionOutput(block_aggregate, segment_begin, d_in); + + if (threadIdx.x == 0) + d_out[blockIdx.x] = reduction_op(init, block_aggregate);; +} + + + + +/****************************************************************************** + * Policy + ******************************************************************************/ + +template < + typename OuputT, ///< Data type + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) +struct DeviceReducePolicy +{ + //------------------------------------------------------------------------------ + // Architecture-specific tuning policies + //------------------------------------------------------------------------------ + + /// SM13 + struct Policy130 : ChainedPolicy<130, Policy130, Policy130> + { + // ReducePolicy + typedef AgentReducePolicy< + CUB_SCALED_GRANULARITIES(128, 8, OuputT), ///< Threads per block, items per thread + 2, ///< Number of items per vectorized load + BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use + LOAD_DEFAULT> ///< Cache load modifier + ReducePolicy; + + // SingleTilePolicy + typedef ReducePolicy SingleTilePolicy; + + // SegmentedReducePolicy + typedef ReducePolicy SegmentedReducePolicy; + }; + + + /// SM20 + struct Policy200 : ChainedPolicy<200, Policy200, Policy130> + { + // ReducePolicy (GTX 580: 178.9 GB/s @ 48M 4B items, 158.1 GB/s @ 192M 1B items) + typedef AgentReducePolicy< + CUB_SCALED_GRANULARITIES(128, 8, OuputT), ///< Threads per block, items per thread + 4, ///< Number of items per vectorized load + BLOCK_REDUCE_RAKING, ///< Cooperative block-wide reduction algorithm to use + LOAD_DEFAULT> ///< Cache load modifier + ReducePolicy; + + // SingleTilePolicy + typedef ReducePolicy SingleTilePolicy; + + // SegmentedReducePolicy + typedef ReducePolicy SegmentedReducePolicy; + }; + + + /// SM30 + struct Policy300 : ChainedPolicy<300, Policy300, Policy200> + { + // ReducePolicy (GTX670: 154.0 @ 48M 4B items) + typedef AgentReducePolicy< + CUB_SCALED_GRANULARITIES(256, 20, OuputT), ///< Threads per block, items per thread + 2, ///< Number of items per vectorized load + BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use + LOAD_DEFAULT> ///< Cache load modifier + ReducePolicy; + + // SingleTilePolicy + typedef ReducePolicy SingleTilePolicy; + + // SegmentedReducePolicy + typedef ReducePolicy SegmentedReducePolicy; + }; + + + /// SM35 + struct Policy350 : ChainedPolicy<350, Policy350, Policy300> + { + // ReducePolicy (GTX Titan: 255.1 GB/s @ 48M 4B items; 228.7 GB/s @ 192M 1B items) + typedef AgentReducePolicy< + CUB_SCALED_GRANULARITIES(256, 20, OuputT), ///< Threads per block, items per thread + 4, ///< Number of items per vectorized load + BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use + LOAD_LDG> ///< Cache load modifier + ReducePolicy; + + // SingleTilePolicy + typedef ReducePolicy SingleTilePolicy; + + // SegmentedReducePolicy + typedef ReducePolicy SegmentedReducePolicy; + }; + + /// SM60 + struct Policy600 : ChainedPolicy<600, Policy600, Policy350> + { + // ReducePolicy (P100: 591 GB/s @ 64M 4B items; 583 GB/s @ 256M 1B items) + typedef AgentReducePolicy< + CUB_SCALED_GRANULARITIES(256, 16, OuputT), ///< Threads per block, items per thread + 4, ///< Number of items per vectorized load + BLOCK_REDUCE_WARP_REDUCTIONS, ///< Cooperative block-wide reduction algorithm to use + LOAD_LDG> ///< Cache load modifier + ReducePolicy; + + // SingleTilePolicy + typedef ReducePolicy SingleTilePolicy; + + // SegmentedReducePolicy + typedef ReducePolicy SegmentedReducePolicy; + }; + + + /// MaxPolicy + typedef Policy600 MaxPolicy; + +}; + + + +/****************************************************************************** + * Single-problem dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction + */ +template < + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) +struct DispatchReduce : + DeviceReducePolicy< + typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type, // ... else the output iterator's value type + OffsetT, + ReductionOpT> +{ + //------------------------------------------------------------------------------ + // Constants + //------------------------------------------------------------------------------ + + // Data type of output iterator + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + + //------------------------------------------------------------------------------ + // Problem state + //------------------------------------------------------------------------------ + + void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out; ///< [out] Pointer to the output aggregate + OffsetT num_items; ///< [in] Total number of input items (i.e., length of \p d_in) + ReductionOpT reduction_op; ///< [in] Binary reduction functor + OutputT init; ///< [in] The initial value of the reduction + cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int ptx_version; ///< [in] PTX version + + //------------------------------------------------------------------------------ + // Constructor + //------------------------------------------------------------------------------ + + /// Constructor + CUB_RUNTIME_FUNCTION __forceinline__ + DispatchReduce( + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + OffsetT num_items, + ReductionOpT reduction_op, + OutputT init, + cudaStream_t stream, + bool debug_synchronous, + int ptx_version) + : + d_temp_storage(d_temp_storage), + temp_storage_bytes(temp_storage_bytes), + d_in(d_in), + d_out(d_out), + num_items(num_items), + reduction_op(reduction_op), + init(init), + stream(stream), + debug_synchronous(debug_synchronous), + ptx_version(ptx_version) + {} + + + //------------------------------------------------------------------------------ + // Small-problem (single tile) invocation + //------------------------------------------------------------------------------ + + /// Invoke a single block block to reduce in-core + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokeSingleTile( + SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void)single_tile_kernel; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + cudaError error = cudaSuccess; + do + { + // Return if the caller is simply requesting the size of the storage allocation + if (d_temp_storage == NULL) + { + temp_storage_bytes = 1; + break; + } + + // Log single_reduce_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", + ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, + (long long) stream, + ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); + + // Invoke single_reduce_sweep_kernel + single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( + d_in, + d_out, + num_items, + reduction_op, + init); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + //------------------------------------------------------------------------------ + // Normal problem size invocation (two-pass) + //------------------------------------------------------------------------------ + + /// Invoke two-passes to reduce + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename ReduceKernelT, ///< Function type of cub::DeviceReduceKernel + typename SingleTileKernelT> ///< Function type of cub::DeviceReduceSingleTileKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePasses( + ReduceKernelT reduce_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceKernel + SingleTileKernelT single_tile_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceSingleTileKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void) reduce_kernel; + (void) single_tile_kernel; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Init regular kernel configuration + KernelConfig reduce_config; + if (CubDebug(error = reduce_config.Init(reduce_kernel))) break; + int reduce_device_occupancy = reduce_config.sm_occupancy * sm_count; + + // Even-share work distribution + int max_blocks = reduce_device_occupancy * CUB_SUBSCRIPTION_FACTOR(ptx_version); + GridEvenShare even_share; + even_share.DispatchInit(num_items, max_blocks, reduce_config.tile_size); + + // Temporary storage allocation requirements + void* allocations[1]; + size_t allocation_sizes[1] = + { + max_blocks * sizeof(OutputT) // bytes needed for privatized block reductions + }; + + // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + return cudaSuccess; + } + + // Alias the allocation for the privatized per-block reductions + OutputT *d_block_reductions = (OutputT*) allocations[0]; + + // Get grid size for device_reduce_sweep_kernel + int reduce_grid_size = even_share.grid_size; + + // Log device_reduce_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking DeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + reduce_grid_size, + ActivePolicyT::ReducePolicy::BLOCK_THREADS, + (long long) stream, + ActivePolicyT::ReducePolicy::ITEMS_PER_THREAD, + reduce_config.sm_occupancy); + + // Invoke DeviceReduceKernel + reduce_kernel<<>>( + d_in, + d_block_reductions, + num_items, + even_share, + reduction_op); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Log single_reduce_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking DeviceReduceSingleTileKernel<<<1, %d, 0, %lld>>>(), %d items per thread\n", + ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, + (long long) stream, + ActivePolicyT::SingleTilePolicy::ITEMS_PER_THREAD); + + // Invoke DeviceReduceSingleTileKernel + single_tile_kernel<<<1, ActivePolicyT::SingleTilePolicy::BLOCK_THREADS, 0, stream>>>( + d_block_reductions, + d_out, + reduce_grid_size, + reduction_op, + init); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + + } + + + //------------------------------------------------------------------------------ + // Chained policy invocation + //------------------------------------------------------------------------------ + + /// Invocation + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Invoke() + { + typedef typename ActivePolicyT::SingleTilePolicy SingleTilePolicyT; + typedef typename DispatchReduce::MaxPolicy MaxPolicyT; + + // Force kernel code-generation in all compiler passes + if (num_items <= (SingleTilePolicyT::BLOCK_THREADS * SingleTilePolicyT::ITEMS_PER_THREAD)) + { + // Small, single tile size + return InvokeSingleTile( + DeviceReduceSingleTileKernel); + } + else + { + // Regular size + return InvokePasses( + DeviceReduceKernel, + DeviceReduceSingleTileKernel); + } + } + + + //------------------------------------------------------------------------------ + // Dispatch entrypoints + //------------------------------------------------------------------------------ + + /** + * Internal dispatch routine for computing a device-wide reduction + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + ReductionOpT reduction_op, ///< [in] Binary reduction functor + OutputT init, ///< [in] The initial value of the reduction + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + typedef typename DispatchReduce::MaxPolicy MaxPolicyT; + + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + if (CubDebug(error = PtxVersion(ptx_version))) break; + + // Create dispatch functor + DispatchReduce dispatch( + d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, reduction_op, init, + stream, debug_synchronous, ptx_version); + + // Dispatch to chained policy + if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; + } + while (0); + + return error; + } +}; + + + +/****************************************************************************** + * Segmented dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for device-wide reduction + */ +template < + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OutputIteratorT, ///< Output iterator type for recording the reduced aggregate \iterator + typename OffsetIteratorT, ///< Random-access input iterator type for reading segment offsets \iterator + typename OffsetT, ///< Signed integer type for global offsets + typename ReductionOpT> ///< Binary reduction functor type having member T operator()(const T &a, const T &b) +struct DispatchSegmentedReduce : + DeviceReducePolicy< + typename std::iterator_traits::value_type, + OffsetT, + ReductionOpT> +{ + //------------------------------------------------------------------------------ + // Constants + //------------------------------------------------------------------------------ + + /// The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + + //------------------------------------------------------------------------------ + // Problem state + //------------------------------------------------------------------------------ + + void *d_temp_storage; ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes; ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in; ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out; ///< [out] Pointer to the output aggregate + OffsetT num_segments; ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets; ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets; ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + ReductionOpT reduction_op; ///< [in] Binary reduction functor + OutputT init; ///< [in] The initial value of the reduction + cudaStream_t stream; ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous; ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int ptx_version; ///< [in] PTX version + + //------------------------------------------------------------------------------ + // Constructor + //------------------------------------------------------------------------------ + + /// Constructor + CUB_RUNTIME_FUNCTION __forceinline__ + DispatchSegmentedReduce( + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + OffsetT num_segments, + OffsetIteratorT d_begin_offsets, + OffsetIteratorT d_end_offsets, + ReductionOpT reduction_op, + OutputT init, + cudaStream_t stream, + bool debug_synchronous, + int ptx_version) + : + d_temp_storage(d_temp_storage), + temp_storage_bytes(temp_storage_bytes), + d_in(d_in), + d_out(d_out), + num_segments(num_segments), + d_begin_offsets(d_begin_offsets), + d_end_offsets(d_end_offsets), + reduction_op(reduction_op), + init(init), + stream(stream), + debug_synchronous(debug_synchronous), + ptx_version(ptx_version) + {} + + + + //------------------------------------------------------------------------------ + // Chained policy invocation + //------------------------------------------------------------------------------ + + /// Invocation + template < + typename ActivePolicyT, ///< Umbrella policy active for the target device + typename DeviceSegmentedReduceKernelT> ///< Function type of cub::DeviceSegmentedReduceKernel + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t InvokePasses( + DeviceSegmentedReduceKernelT segmented_reduce_kernel) ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentedReduceKernel + { +#ifndef CUB_RUNTIME_ENABLED + (void)segmented_reduce_kernel; + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); +#else + cudaError error = cudaSuccess; + do + { + // Return if the caller is simply requesting the size of the storage allocation + if (d_temp_storage == NULL) + { + temp_storage_bytes = 1; + return cudaSuccess; + } + + // Init kernel configuration + KernelConfig segmented_reduce_config; + if (CubDebug(error = segmented_reduce_config.Init(segmented_reduce_kernel))) break; + + // Log device_reduce_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking SegmentedDeviceReduceKernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + num_segments, + ActivePolicyT::SegmentedReducePolicy::BLOCK_THREADS, + (long long) stream, + ActivePolicyT::SegmentedReducePolicy::ITEMS_PER_THREAD, + segmented_reduce_config.sm_occupancy); + + // Invoke DeviceReduceKernel + segmented_reduce_kernel<<>>( + d_in, + d_out, + d_begin_offsets, + d_end_offsets, + num_segments, + reduction_op, + init); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + + } + + + /// Invocation + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Invoke() + { + typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; + + // Force kernel code-generation in all compiler passes + return InvokePasses( + DeviceSegmentedReduceKernel); + } + + + //------------------------------------------------------------------------------ + // Dispatch entrypoints + //------------------------------------------------------------------------------ + + /** + * Internal dispatch routine for computing a device-wide reduction + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output aggregate + int num_segments, ///< [in] The number of segments that comprise the sorting data + OffsetIteratorT d_begin_offsets, ///< [in] Pointer to the sequence of beginning offsets of length \p num_segments, such that d_begin_offsets[i] is the first element of the ith data segment in d_keys_* and d_values_* + OffsetIteratorT d_end_offsets, ///< [in] Pointer to the sequence of ending offsets of length \p num_segments, such that d_end_offsets[i]-1 is the last element of the ith data segment in d_keys_* and d_values_*. If d_end_offsets[i]-1 <= d_begin_offsets[i], the ith is considered empty. + ReductionOpT reduction_op, ///< [in] Binary reduction functor + OutputT init, ///< [in] The initial value of the reduction + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + typedef typename DispatchSegmentedReduce::MaxPolicy MaxPolicyT; + + if (num_segments <= 0) + return cudaSuccess; + + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + if (CubDebug(error = PtxVersion(ptx_version))) break; + + // Create dispatch functor + DispatchSegmentedReduce dispatch( + d_temp_storage, temp_storage_bytes, + d_in, d_out, + num_segments, d_begin_offsets, d_end_offsets, + reduction_op, init, + stream, debug_synchronous, ptx_version); + + // Dispatch to chained policy + if (CubDebug(error = MaxPolicyT::Invoke(ptx_version, dispatch))) break; + } + while (0); + + return error; + } +}; + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce_by_key.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce_by_key.cuh new file mode 100644 index 0000000..6f4837b --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_reduce_by_key.cuh @@ -0,0 +1,554 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceReduceByKey provides device-wide, parallel operations for reducing segments of values residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch_scan.cuh" +#include "../../agent/agent_reduce_by_key.cuh" +#include "../../thread/thread_operators.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Multi-block reduce-by-key sweep kernel entry point + */ +template < + typename AgentReduceByKeyPolicyT, ///< Parameterized AgentReduceByKeyPolicyT tuning policy type + typename KeysInputIteratorT, ///< Random-access input iterator type for keys + typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys + typename ValuesInputIteratorT, ///< Random-access input iterator type for values + typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values + typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered + typename ScanTileStateT, ///< Tile status interface type + typename EqualityOpT, ///< KeyT equality operator type + typename ReductionOpT, ///< ValueT reduction operator type + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(AgentReduceByKeyPolicyT::BLOCK_THREADS)) +__global__ void DeviceReduceByKeyKernel( + KeysInputIteratorT d_keys_in, ///< Pointer to the input sequence of keys + UniqueOutputIteratorT d_unique_out, ///< Pointer to the output sequence of unique keys (one key per run) + ValuesInputIteratorT d_values_in, ///< Pointer to the input sequence of corresponding values + AggregatesOutputIteratorT d_aggregates_out, ///< Pointer to the output sequence of value aggregates (one aggregate per run) + NumRunsOutputIteratorT d_num_runs_out, ///< Pointer to total number of runs encountered (i.e., the length of d_unique_out) + ScanTileStateT tile_state, ///< Tile status interface + int start_tile, ///< The starting tile for the current grid + EqualityOpT equality_op, ///< KeyT equality operator + ReductionOpT reduction_op, ///< ValueT reduction operator + OffsetT num_items) ///< Total number of items to select from +{ + // Thread block type for reducing tiles of value segments + typedef AgentReduceByKey< + AgentReduceByKeyPolicyT, + KeysInputIteratorT, + UniqueOutputIteratorT, + ValuesInputIteratorT, + AggregatesOutputIteratorT, + NumRunsOutputIteratorT, + EqualityOpT, + ReductionOpT, + OffsetT> + AgentReduceByKeyT; + + // Shared memory for AgentReduceByKey + __shared__ typename AgentReduceByKeyT::TempStorage temp_storage; + + // Process tiles + AgentReduceByKeyT(temp_storage, d_keys_in, d_unique_out, d_values_in, d_aggregates_out, d_num_runs_out, equality_op, reduction_op).ConsumeRange( + num_items, + tile_state, + start_tile); +} + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceReduceByKey + */ +template < + typename KeysInputIteratorT, ///< Random-access input iterator type for keys + typename UniqueOutputIteratorT, ///< Random-access output iterator type for keys + typename ValuesInputIteratorT, ///< Random-access input iterator type for values + typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values + typename NumRunsOutputIteratorT, ///< Output iterator type for recording number of segments encountered + typename EqualityOpT, ///< KeyT equality operator type + typename ReductionOpT, ///< ValueT reduction operator type + typename OffsetT> ///< Signed integer type for global offsets +struct DispatchReduceByKey +{ + //------------------------------------------------------------------------- + // Types and constants + //------------------------------------------------------------------------- + + // The input keys type + typedef typename std::iterator_traits::value_type KeyInputT; + + // The output keys type + typedef typename If<(Equals::value_type, void>::VALUE), // KeyOutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type + + // The input values type + typedef typename std::iterator_traits::value_type ValueInputT; + + // The output values type + typedef typename If<(Equals::value_type, void>::VALUE), // ValueOutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type ValueOutputT; // ... else the output iterator's value type + + enum + { + INIT_KERNEL_THREADS = 128, + MAX_INPUT_BYTES = CUB_MAX(sizeof(KeyOutputT), sizeof(ValueOutputT)), + COMBINED_INPUT_BYTES = sizeof(KeyOutputT) + sizeof(ValueOutputT), + }; + + // Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + + //------------------------------------------------------------------------- + // Tuning policies + //------------------------------------------------------------------------- + + /// SM35 + struct Policy350 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 6, + ITEMS_PER_THREAD = (MAX_INPUT_BYTES <= 8) ? 6 : CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), + }; + + typedef AgentReduceByKeyPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + BLOCK_SCAN_WARP_SCANS> + ReduceByKeyPolicyT; + }; + + /// SM30 + struct Policy300 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 6, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), + }; + + typedef AgentReduceByKeyPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + ReduceByKeyPolicyT; + }; + + /// SM20 + struct Policy200 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 11, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), + }; + + typedef AgentReduceByKeyPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + ReduceByKeyPolicyT; + }; + + /// SM13 + struct Policy130 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 7, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, ((NOMINAL_4B_ITEMS_PER_THREAD * 8) + COMBINED_INPUT_BYTES - 1) / COMBINED_INPUT_BYTES)), + }; + + typedef AgentReduceByKeyPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + ReduceByKeyPolicyT; + }; + + /// SM11 + struct Policy110 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 5, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 8) / COMBINED_INPUT_BYTES)), + }; + + typedef AgentReduceByKeyPolicy< + 64, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_RAKING> + ReduceByKeyPolicyT; + }; + + + /****************************************************************************** + * Tuning policies of current PTX compiler pass + ******************************************************************************/ + +#if (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 130) + typedef Policy130 PtxPolicy; + +#else + typedef Policy110 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxReduceByKeyPolicy : PtxPolicy::ReduceByKeyPolicyT {}; + + + /****************************************************************************** + * Utilities + ******************************************************************************/ + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static void InitConfigs( + int ptx_version, + KernelConfig &reduce_by_key_config) + { + #if (CUB_PTX_ARCH > 0) + (void)ptx_version; + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + reduce_by_key_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 350) + { + reduce_by_key_config.template Init(); + } + else if (ptx_version >= 300) + { + reduce_by_key_config.template Init(); + } + else if (ptx_version >= 200) + { + reduce_by_key_config.template Init(); + } + else if (ptx_version >= 130) + { + reduce_by_key_config.template Init(); + } + else + { + reduce_by_key_config.template Init(); + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration. + */ + struct KernelConfig + { + int block_threads; + int items_per_thread; + int tile_items; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + void Init() + { + block_threads = PolicyT::BLOCK_THREADS; + items_per_thread = PolicyT::ITEMS_PER_THREAD; + tile_items = block_threads * items_per_thread; + } + }; + + + //--------------------------------------------------------------------- + // Dispatch entrypoints + //--------------------------------------------------------------------- + + /** + * Internal dispatch routine for computing a device-wide reduce-by-key using the + * specified kernel functions. + */ + template < + typename ScanInitKernelT, ///< Function type of cub::DeviceScanInitKernel + typename ReduceByKeyKernelT> ///< Function type of cub::DeviceReduceByKeyKernelT + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys + UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) + ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values + AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) + EqualityOpT equality_op, ///< [in] KeyT equality operator + ReductionOpT reduction_op, ///< [in] ValueT reduction operator + OffsetT num_items, ///< [in] Total number of items to select from + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int /*ptx_version*/, ///< [in] PTX version of dispatch kernels + ScanInitKernelT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel + ReduceByKeyKernelT reduce_by_key_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceReduceByKeyKernel + KernelConfig reduce_by_key_config) ///< [in] Dispatch parameters that match the policy that \p reduce_by_key_kernel was compiled for + { + +#ifndef CUB_RUNTIME_ENABLED + (void)d_temp_storage; + (void)temp_storage_bytes; + (void)d_keys_in; + (void)d_unique_out; + (void)d_values_in; + (void)d_aggregates_out; + (void)d_num_runs_out; + (void)equality_op; + (void)reduction_op; + (void)num_items; + (void)stream; + (void)debug_synchronous; + (void)init_kernel; + (void)reduce_by_key_kernel; + (void)reduce_by_key_config; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported); + +#else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Number of input tiles + int tile_size = reduce_by_key_config.block_threads * reduce_by_key_config.items_per_thread; + int num_tiles = (num_items + tile_size - 1) / tile_size; + + // Specify temporary storage allocation requirements + size_t allocation_sizes[1]; + if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors + + // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) + void* allocations[1]; + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Construct the tile status interface + ScanTileStateT tile_state; + if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; + + // Log init_kernel configuration + int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); + if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); + + // Invoke init_kernel to initialize tile descriptors + init_kernel<<>>( + tile_state, + num_tiles, + d_num_runs_out); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Return if empty problem + if (num_items == 0) + break; + + // Get SM occupancy for reduce_by_key_kernel + int reduce_by_key_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + reduce_by_key_sm_occupancy, // out + reduce_by_key_kernel, + reduce_by_key_config.block_threads))) break; + + // Get max x-dimension of grid + int max_dim_x; + if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; + + // Run grids in epochs (in case number of tiles exceeds max x-dimension + int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); + for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) + { + // Log reduce_by_key_kernel configuration + if (debug_synchronous) _CubLog("Invoking %d reduce_by_key_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + start_tile, scan_grid_size, reduce_by_key_config.block_threads, (long long) stream, reduce_by_key_config.items_per_thread, reduce_by_key_sm_occupancy); + + // Invoke reduce_by_key_kernel + reduce_by_key_kernel<<>>( + d_keys_in, + d_unique_out, + d_values_in, + d_aggregates_out, + d_num_runs_out, + tile_state, + start_tile, + equality_op, + reduction_op, + num_items); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + KeysInputIteratorT d_keys_in, ///< [in] Pointer to the input sequence of keys + UniqueOutputIteratorT d_unique_out, ///< [out] Pointer to the output sequence of unique keys (one key per run) + ValuesInputIteratorT d_values_in, ///< [in] Pointer to the input sequence of corresponding values + AggregatesOutputIteratorT d_aggregates_out, ///< [out] Pointer to the output sequence of value aggregates (one aggregate per run) + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs encountered (i.e., the length of d_unique_out) + EqualityOpT equality_op, ///< [in] KeyT equality operator + ReductionOpT reduction_op, ///< [in] ValueT reduction operator + OffsetT num_items, ///< [in] Total number of items to select from + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel kernel dispatch configurations + KernelConfig reduce_by_key_config; + InitConfigs(ptx_version, reduce_by_key_config); + + // Dispatch + if (CubDebug(error = Dispatch( + d_temp_storage, + temp_storage_bytes, + d_keys_in, + d_unique_out, + d_values_in, + d_aggregates_out, + d_num_runs_out, + equality_op, + reduction_op, + num_items, + stream, + debug_synchronous, + ptx_version, + DeviceCompactInitKernel, + DeviceReduceByKeyKernel, + reduce_by_key_config))) break; + } + while (0); + + return error; + } +}; + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_rle.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_rle.cuh new file mode 100644 index 0000000..98c3681 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_rle.cuh @@ -0,0 +1,538 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceRle provides device-wide, parallel operations for run-length-encoding sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch_scan.cuh" +#include "../../agent/agent_rle.cuh" +#include "../../thread/thread_operators.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Select kernel entry point (multi-block) + * + * Performs functor-based selection if SelectOp functor type != NullType + * Otherwise performs flag-based selection if FlagIterator's value type != NullType + * Otherwise performs discontinuity selection (keep unique) + */ +template < + typename AgentRlePolicyT, ///< Parameterized AgentRlePolicyT tuning policy type + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator + typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator + typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator + typename ScanTileStateT, ///< Tile status interface type + typename EqualityOpT, ///< T equality operator type + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(AgentRlePolicyT::BLOCK_THREADS)) +__global__ void DeviceRleSweepKernel( + InputIteratorT d_in, ///< [in] Pointer to input sequence of data items + OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets + LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) + ScanTileStateT tile_status, ///< [in] Tile status interface + EqualityOpT equality_op, ///< [in] Equality operator for input items + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + int num_tiles) ///< [in] Total number of tiles for the entire problem +{ + // Thread block type for selecting data from input tiles + typedef AgentRle< + AgentRlePolicyT, + InputIteratorT, + OffsetsOutputIteratorT, + LengthsOutputIteratorT, + EqualityOpT, + OffsetT> AgentRleT; + + // Shared memory for AgentRle + __shared__ typename AgentRleT::TempStorage temp_storage; + + // Process tiles + AgentRleT(temp_storage, d_in, d_offsets_out, d_lengths_out, equality_op, num_items).ConsumeRange( + num_tiles, + tile_status, + d_num_runs_out); +} + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceRle + */ +template < + typename InputIteratorT, ///< Random-access input iterator type for reading input items \iterator + typename OffsetsOutputIteratorT, ///< Random-access output iterator type for writing run-offset values \iterator + typename LengthsOutputIteratorT, ///< Random-access output iterator type for writing run-length values \iterator + typename NumRunsOutputIteratorT, ///< Output iterator type for recording the number of runs encountered \iterator + typename EqualityOpT, ///< T equality operator type + typename OffsetT> ///< Signed integer type for global offsets +struct DeviceRleDispatch +{ + /****************************************************************************** + * Types and constants + ******************************************************************************/ + + // The input value type + typedef typename std::iterator_traits::value_type T; + + // The lengths output value type + typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? + OffsetT, // ... then the OffsetT type, + typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type + + enum + { + INIT_KERNEL_THREADS = 128, + }; + + // Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + + /****************************************************************************** + * Tuning policies + ******************************************************************************/ + + /// SM35 + struct Policy350 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 15, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), + }; + + typedef AgentRlePolicy< + 96, + ITEMS_PER_THREAD, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + true, + BLOCK_SCAN_WARP_SCANS> + RleSweepPolicy; + }; + + /// SM30 + struct Policy300 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 5, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), + }; + + typedef AgentRlePolicy< + 256, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + true, + BLOCK_SCAN_RAKING_MEMOIZE> + RleSweepPolicy; + }; + + /// SM20 + struct Policy200 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 15, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), + }; + + typedef AgentRlePolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + false, + BLOCK_SCAN_WARP_SCANS> + RleSweepPolicy; + }; + + /// SM13 + struct Policy130 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 9, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), + }; + + typedef AgentRlePolicy< + 64, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + true, + BLOCK_SCAN_RAKING_MEMOIZE> + RleSweepPolicy; + }; + + /// SM10 + struct Policy100 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 9, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(T)))), + }; + + typedef AgentRlePolicy< + 256, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + true, + BLOCK_SCAN_RAKING_MEMOIZE> + RleSweepPolicy; + }; + + + /****************************************************************************** + * Tuning policies of current PTX compiler pass + ******************************************************************************/ + +#if (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 130) + typedef Policy130 PtxPolicy; + +#else + typedef Policy100 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxRleSweepPolicy : PtxPolicy::RleSweepPolicy {}; + + + /****************************************************************************** + * Utilities + ******************************************************************************/ + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static void InitConfigs( + int ptx_version, + KernelConfig& device_rle_config) + { + #if (CUB_PTX_ARCH > 0) + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + device_rle_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 350) + { + device_rle_config.template Init(); + } + else if (ptx_version >= 300) + { + device_rle_config.template Init(); + } + else if (ptx_version >= 200) + { + device_rle_config.template Init(); + } + else if (ptx_version >= 130) + { + device_rle_config.template Init(); + } + else + { + device_rle_config.template Init(); + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration. Mirrors the constants within AgentRlePolicyT. + */ + struct KernelConfig + { + int block_threads; + int items_per_thread; + BlockLoadAlgorithm load_policy; + bool store_warp_time_slicing; + BlockScanAlgorithm scan_algorithm; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + void Init() + { + block_threads = AgentRlePolicyT::BLOCK_THREADS; + items_per_thread = AgentRlePolicyT::ITEMS_PER_THREAD; + load_policy = AgentRlePolicyT::LOAD_ALGORITHM; + store_warp_time_slicing = AgentRlePolicyT::STORE_WARP_TIME_SLICING; + scan_algorithm = AgentRlePolicyT::SCAN_ALGORITHM; + } + + CUB_RUNTIME_FUNCTION __forceinline__ + void Print() + { + printf("%d, %d, %d, %d, %d", + block_threads, + items_per_thread, + load_policy, + store_warp_time_slicing, + scan_algorithm); + } + }; + + + /****************************************************************************** + * Dispatch entrypoints + ******************************************************************************/ + + /** + * Internal dispatch routine for computing a device-wide run-length-encode using the + * specified kernel functions. + */ + template < + typename DeviceScanInitKernelPtr, ///< Function type of cub::DeviceScanInitKernel + typename DeviceRleSweepKernelPtr> ///< Function type of cub::DeviceRleSweepKernelPtr + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to the output sequence of run-offsets + LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to the output sequence of run-lengths + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to the total number of runs encountered (i.e., length of \p d_offsets_out) + EqualityOpT equality_op, ///< [in] Equality operator for input items + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int ptx_version, ///< [in] PTX version of dispatch kernels + DeviceScanInitKernelPtr device_scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel + DeviceRleSweepKernelPtr device_rle_sweep_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceRleSweepKernel + KernelConfig device_rle_config) ///< [in] Dispatch parameters that match the policy that \p device_rle_sweep_kernel was compiled for + { + +#ifndef CUB_RUNTIME_ENABLED + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported); + +#else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Number of input tiles + int tile_size = device_rle_config.block_threads * device_rle_config.items_per_thread; + int num_tiles = (num_items + tile_size - 1) / tile_size; + + // Specify temporary storage allocation requirements + size_t allocation_sizes[1]; + if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors + + // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) + void* allocations[1]; + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Construct the tile status interface + ScanTileStateT tile_status; + if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; + + // Log device_scan_init_kernel configuration + int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); + if (debug_synchronous) _CubLog("Invoking device_scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); + + // Invoke device_scan_init_kernel to initialize tile descriptors and queue descriptors + device_scan_init_kernel<<>>( + tile_status, + num_tiles, + d_num_runs_out); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Return if empty problem + if (num_items == 0) + break; + + // Get SM occupancy for device_rle_sweep_kernel + int device_rle_kernel_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + device_rle_kernel_sm_occupancy, // out + device_rle_sweep_kernel, + device_rle_config.block_threads))) break; + + // Get max x-dimension of grid + int max_dim_x; + if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; + + // Get grid size for scanning tiles + dim3 scan_grid_size; + scan_grid_size.z = 1; + scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; + scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); + + // Log device_rle_sweep_kernel configuration + if (debug_synchronous) _CubLog("Invoking device_rle_sweep_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, device_rle_config.block_threads, (long long) stream, device_rle_config.items_per_thread, device_rle_kernel_sm_occupancy); + + // Invoke device_rle_sweep_kernel + device_rle_sweep_kernel<<>>( + d_in, + d_offsets_out, + d_lengths_out, + d_num_runs_out, + tile_status, + equality_op, + num_items, + num_tiles); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to input sequence of data items + OffsetsOutputIteratorT d_offsets_out, ///< [out] Pointer to output sequence of run-offsets + LengthsOutputIteratorT d_lengths_out, ///< [out] Pointer to output sequence of run-lengths + NumRunsOutputIteratorT d_num_runs_out, ///< [out] Pointer to total number of runs (i.e., length of \p d_offsets_out) + EqualityOpT equality_op, ///< [in] Equality operator for input items + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel kernel dispatch configurations + KernelConfig device_rle_config; + InitConfigs(ptx_version, device_rle_config); + + // Dispatch + if (CubDebug(error = Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_offsets_out, + d_lengths_out, + d_num_runs_out, + equality_op, + num_items, + stream, + debug_synchronous, + ptx_version, + DeviceCompactInitKernel, + DeviceRleSweepKernel, + device_rle_config))) break; + } + while (0); + + return error; + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_scan.cuh new file mode 100644 index 0000000..3ef720a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_scan.cuh @@ -0,0 +1,563 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceScan provides device-wide, parallel operations for computing a prefix scan across a sequence of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "../../agent/agent_scan.cuh" +#include "../../thread/thread_operators.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_arch.cuh" +#include "../../util_debug.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Initialization kernel for tile status initialization (multi-block) + */ +template < + typename ScanTileStateT> ///< Tile status interface type +__global__ void DeviceScanInitKernel( + ScanTileStateT tile_state, ///< [in] Tile status interface + int num_tiles) ///< [in] Number of tiles +{ + // Initialize tile status + tile_state.InitializeStatus(num_tiles); +} + +/** + * Initialization kernel for tile status initialization (multi-block) + */ +template < + typename ScanTileStateT, ///< Tile status interface type + typename NumSelectedIteratorT> ///< Output iterator type for recording the number of items selected +__global__ void DeviceCompactInitKernel( + ScanTileStateT tile_state, ///< [in] Tile status interface + int num_tiles, ///< [in] Number of tiles + NumSelectedIteratorT d_num_selected_out) ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) +{ + // Initialize tile status + tile_state.InitializeStatus(num_tiles); + + // Initialize d_num_selected_out + if ((blockIdx.x == 0) && (threadIdx.x == 0)) + *d_num_selected_out = 0; +} + + +/** + * Scan kernel entry point (multi-block) + */ +template < + typename ScanPolicyT, ///< Parameterized ScanPolicyT tuning policy type + typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator + typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator + typename ScanTileStateT, ///< Tile status interface type + typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) + typename InitValueT, ///< Initial value to seed the exclusive scan (cub::NullType for inclusive scans) + typename OffsetT> ///< Signed integer type for global offsets +__launch_bounds__ (int(ScanPolicyT::BLOCK_THREADS)) +__global__ void DeviceScanKernel( + InputIteratorT d_in, ///< Input data + OutputIteratorT d_out, ///< Output data + ScanTileStateT tile_state, ///< Tile status interface + int start_tile, ///< The starting tile for the current grid + ScanOpT scan_op, ///< Binary scan functor + InitValueT init_value, ///< Initial value to seed the exclusive scan + OffsetT num_items) ///< Total number of scan items for the entire problem +{ + // Thread block type for scanning input tiles + typedef AgentScan< + ScanPolicyT, + InputIteratorT, + OutputIteratorT, + ScanOpT, + InitValueT, + OffsetT> AgentScanT; + + // Shared memory for AgentScan + __shared__ typename AgentScanT::TempStorage temp_storage; + + // Process tiles + AgentScanT(temp_storage, d_in, d_out, scan_op, init_value).ConsumeRange( + num_items, + tile_state, + start_tile); +} + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceScan + */ +template < + typename InputIteratorT, ///< Random-access input iterator type for reading scan inputs \iterator + typename OutputIteratorT, ///< Random-access output iterator type for writing scan outputs \iterator + typename ScanOpT, ///< Binary scan functor type having member T operator()(const T &a, const T &b) + typename InitValueT, ///< The init_value element type for ScanOpT (cub::NullType for inclusive scans) + typename OffsetT> ///< Signed integer type for global offsets +struct DispatchScan +{ + //--------------------------------------------------------------------- + // Constants and Types + //--------------------------------------------------------------------- + + enum + { + INIT_KERNEL_THREADS = 128 + }; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Tile status descriptor interface type + typedef ScanTileState ScanTileStateT; + + + //--------------------------------------------------------------------- + // Tuning policies + //--------------------------------------------------------------------- + + /// SM600 + struct Policy600 + { + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(128, 15, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_STORE_TRANSPOSE, + BLOCK_SCAN_WARP_SCANS> + ScanPolicyT; + }; + + + /// SM520 + struct Policy520 + { + // Titan X: 32.47B items/s @ 48M 32-bit T + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(128, 12, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_DIRECT, + LOAD_LDG, + BLOCK_STORE_WARP_TRANSPOSE, + BLOCK_SCAN_WARP_SCANS> + ScanPolicyT; + }; + + + /// SM35 + struct Policy350 + { + // GTX Titan: 29.5B items/s (232.4 GB/s) @ 48M 32-bit T + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(128, 12, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_DIRECT, + LOAD_LDG, + BLOCK_STORE_WARP_TRANSPOSE_TIMESLICED, + BLOCK_SCAN_RAKING> + ScanPolicyT; + }; + + /// SM30 + struct Policy300 + { + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(256, 9, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_STORE_WARP_TRANSPOSE, + BLOCK_SCAN_WARP_SCANS> + ScanPolicyT; + }; + + /// SM20 + struct Policy200 + { + // GTX 580: 20.3B items/s (162.3 GB/s) @ 48M 32-bit T + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(128, 12, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_STORE_WARP_TRANSPOSE, + BLOCK_SCAN_WARP_SCANS> + ScanPolicyT; + }; + + /// SM13 + struct Policy130 + { + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(96, 21, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_STORE_WARP_TRANSPOSE, + BLOCK_SCAN_RAKING_MEMOIZE> + ScanPolicyT; + }; + + /// SM10 + struct Policy100 + { + typedef AgentScanPolicy< + CUB_SCALED_GRANULARITIES(64, 9, OutputT), ///< Threads per block, items per thread + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_STORE_WARP_TRANSPOSE, + BLOCK_SCAN_WARP_SCANS> + ScanPolicyT; + }; + + + //--------------------------------------------------------------------- + // Tuning policies of current PTX compiler pass + //--------------------------------------------------------------------- + +#if (CUB_PTX_ARCH >= 600) + typedef Policy600 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 520) + typedef Policy520 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 130) + typedef Policy130 PtxPolicy; + +#else + typedef Policy100 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxAgentScanPolicy : PtxPolicy::ScanPolicyT {}; + + + //--------------------------------------------------------------------- + // Utilities + //--------------------------------------------------------------------- + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static void InitConfigs( + int ptx_version, + KernelConfig &scan_kernel_config) + { + #if (CUB_PTX_ARCH > 0) + (void)ptx_version; + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + scan_kernel_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 600) + { + scan_kernel_config.template Init(); + } + else if (ptx_version >= 520) + { + scan_kernel_config.template Init(); + } + else if (ptx_version >= 350) + { + scan_kernel_config.template Init(); + } + else if (ptx_version >= 300) + { + scan_kernel_config.template Init(); + } + else if (ptx_version >= 200) + { + scan_kernel_config.template Init(); + } + else if (ptx_version >= 130) + { + scan_kernel_config.template Init(); + } + else + { + scan_kernel_config.template Init(); + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration. + */ + struct KernelConfig + { + int block_threads; + int items_per_thread; + int tile_items; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + void Init() + { + block_threads = PolicyT::BLOCK_THREADS; + items_per_thread = PolicyT::ITEMS_PER_THREAD; + tile_items = block_threads * items_per_thread; + } + }; + + + //--------------------------------------------------------------------- + // Dispatch entrypoints + //--------------------------------------------------------------------- + + /** + * Internal dispatch routine for computing a device-wide prefix scan using the + * specified kernel functions. + */ + template < + typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel + typename ScanSweepKernelPtrT> ///< Function type of cub::DeviceScanKernelPtrT + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + ScanOpT scan_op, ///< [in] Binary scan functor + InitValueT init_value, ///< [in] Initial value to seed the exclusive scan + OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int /*ptx_version*/, ///< [in] PTX version of dispatch kernels + ScanInitKernelPtrT init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel + ScanSweepKernelPtrT scan_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanKernel + KernelConfig scan_kernel_config) ///< [in] Dispatch parameters that match the policy that \p scan_kernel was compiled for + { + +#ifndef CUB_RUNTIME_ENABLED + (void)d_temp_storage; + (void)temp_storage_bytes; + (void)d_in; + (void)d_out; + (void)scan_op; + (void)init_value; + (void)num_items; + (void)stream; + (void)debug_synchronous; + (void)init_kernel; + (void)scan_kernel; + (void)scan_kernel_config; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported); + +#else + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Number of input tiles + int tile_size = scan_kernel_config.block_threads * scan_kernel_config.items_per_thread; + int num_tiles = (num_items + tile_size - 1) / tile_size; + + // Specify temporary storage allocation requirements + size_t allocation_sizes[1]; + if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors + + // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) + void* allocations[1]; + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Return if empty problem + if (num_items == 0) + break; + + // Construct the tile status interface + ScanTileStateT tile_state; + if (CubDebug(error = tile_state.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; + + // Log init_kernel configuration + int init_grid_size = (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS; + if (debug_synchronous) _CubLog("Invoking init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); + + // Invoke init_kernel to initialize tile descriptors + init_kernel<<>>( + tile_state, + num_tiles); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Get SM occupancy for scan_kernel + int scan_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + scan_sm_occupancy, // out + scan_kernel, + scan_kernel_config.block_threads))) break; + + // Get max x-dimension of grid + int max_dim_x; + if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; + + // Run grids in epochs (in case number of tiles exceeds max x-dimension + int scan_grid_size = CUB_MIN(num_tiles, max_dim_x); + for (int start_tile = 0; start_tile < num_tiles; start_tile += scan_grid_size) + { + // Log scan_kernel configuration + if (debug_synchronous) _CubLog("Invoking %d scan_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + start_tile, scan_grid_size, scan_kernel_config.block_threads, (long long) stream, scan_kernel_config.items_per_thread, scan_sm_occupancy); + + // Invoke scan_kernel + scan_kernel<<>>( + d_in, + d_out, + tile_state, + start_tile, + scan_op, + init_value, + num_items); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + OutputIteratorT d_out, ///< [out] Pointer to the output sequence of data items + ScanOpT scan_op, ///< [in] Binary scan functor + InitValueT init_value, ///< [in] Initial value to seed the exclusive scan + OffsetT num_items, ///< [in] Total number of input items (i.e., the length of \p d_in) + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + if (CubDebug(error = PtxVersion(ptx_version))) break; + + // Get kernel kernel dispatch configurations + KernelConfig scan_kernel_config; + InitConfigs(ptx_version, scan_kernel_config); + + // Dispatch + if (CubDebug(error = Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + init_value, + num_items, + stream, + debug_synchronous, + ptx_version, + DeviceScanInitKernel, + DeviceScanKernel, + scan_kernel_config))) break; + } + while (0); + + return error; + } +}; + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_select_if.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_select_if.cuh new file mode 100644 index 0000000..60b3313 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_select_if.cuh @@ -0,0 +1,542 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSelect provides device-wide, parallel operations for selecting items from sequences of data items residing within device-accessible memory. + */ + +#pragma once + +#include +#include + +#include "dispatch_scan.cuh" +#include "../../agent/agent_select_if.cuh" +#include "../../thread/thread_operators.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_device.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/****************************************************************************** + * Kernel entry points + *****************************************************************************/ + +/** + * Select kernel entry point (multi-block) + * + * Performs functor-based selection if SelectOpT functor type != NullType + * Otherwise performs flag-based selection if FlagsInputIterator's value type != NullType + * Otherwise performs discontinuity selection (keep unique) + */ +template < + typename AgentSelectIfPolicyT, ///< Parameterized AgentSelectIfPolicyT tuning policy type + typename InputIteratorT, ///< Random-access input iterator type for reading input items + typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) + typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items + typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected + typename ScanTileStateT, ///< Tile status interface type + typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) + typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) + typename OffsetT, ///< Signed integer type for global offsets + bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output +__launch_bounds__ (int(AgentSelectIfPolicyT::BLOCK_THREADS)) +__global__ void DeviceSelectSweepKernel( + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) + SelectedOutputIteratorT d_selected_out, ///< [out] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [out] Pointer to the total number of items selected (i.e., length of \p d_selected_out) + ScanTileStateT tile_status, ///< [in] Tile status interface + SelectOpT select_op, ///< [in] Selection operator + EqualityOpT equality_op, ///< [in] Equality operator + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + int num_tiles) ///< [in] Total number of tiles for the entire problem +{ + // Thread block type for selecting data from input tiles + typedef AgentSelectIf< + AgentSelectIfPolicyT, + InputIteratorT, + FlagsInputIteratorT, + SelectedOutputIteratorT, + SelectOpT, + EqualityOpT, + OffsetT, + KEEP_REJECTS> AgentSelectIfT; + + // Shared memory for AgentSelectIf + __shared__ typename AgentSelectIfT::TempStorage temp_storage; + + // Process tiles + AgentSelectIfT(temp_storage, d_in, d_flags, d_selected_out, select_op, equality_op, num_items).ConsumeRange( + num_tiles, + tile_status, + d_num_selected_out); +} + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceSelect + */ +template < + typename InputIteratorT, ///< Random-access input iterator type for reading input items + typename FlagsInputIteratorT, ///< Random-access input iterator type for reading selection flags (NullType* if a selection functor or discontinuity flagging is to be used for selection) + typename SelectedOutputIteratorT, ///< Random-access output iterator type for writing selected items + typename NumSelectedIteratorT, ///< Output iterator type for recording the number of items selected + typename SelectOpT, ///< Selection operator type (NullType if selection flags or discontinuity flagging is to be used for selection) + typename EqualityOpT, ///< Equality operator type (NullType if selection functor or selection flags is to be used for selection) + typename OffsetT, ///< Signed integer type for global offsets + bool KEEP_REJECTS> ///< Whether or not we push rejected items to the back of the output +struct DispatchSelectIf +{ + /****************************************************************************** + * Types and constants + ******************************************************************************/ + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // The flag value type + typedef typename std::iterator_traits::value_type FlagT; + + enum + { + INIT_KERNEL_THREADS = 128, + }; + + // Tile status descriptor interface type + typedef ScanTileState ScanTileStateT; + + + /****************************************************************************** + * Tuning policies + ******************************************************************************/ + + /// SM35 + struct Policy350 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 10, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), + }; + + typedef AgentSelectIfPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + BLOCK_SCAN_WARP_SCANS> + SelectIfPolicyT; + }; + + /// SM30 + struct Policy300 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 7, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(3, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), + }; + + typedef AgentSelectIfPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + SelectIfPolicyT; + }; + + /// SM20 + struct Policy200 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = (KEEP_REJECTS) ? 7 : 15, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), + }; + + typedef AgentSelectIfPolicy< + 128, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + SelectIfPolicyT; + }; + + /// SM13 + struct Policy130 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 9, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), + }; + + typedef AgentSelectIfPolicy< + 64, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_RAKING_MEMOIZE> + SelectIfPolicyT; + }; + + /// SM10 + struct Policy100 + { + enum { + NOMINAL_4B_ITEMS_PER_THREAD = 9, + ITEMS_PER_THREAD = CUB_MIN(NOMINAL_4B_ITEMS_PER_THREAD, CUB_MAX(1, (NOMINAL_4B_ITEMS_PER_THREAD * 4 / sizeof(OutputT)))), + }; + + typedef AgentSelectIfPolicy< + 64, + ITEMS_PER_THREAD, + BLOCK_LOAD_WARP_TRANSPOSE, + LOAD_DEFAULT, + BLOCK_SCAN_RAKING> + SelectIfPolicyT; + }; + + + /****************************************************************************** + * Tuning policies of current PTX compiler pass + ******************************************************************************/ + +#if (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 130) + typedef Policy130 PtxPolicy; + +#else + typedef Policy100 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxSelectIfPolicyT : PtxPolicy::SelectIfPolicyT {}; + + + /****************************************************************************** + * Utilities + ******************************************************************************/ + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static void InitConfigs( + int ptx_version, + KernelConfig &select_if_config) + { + #if (CUB_PTX_ARCH > 0) + (void)ptx_version; + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + select_if_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 350) + { + select_if_config.template Init(); + } + else if (ptx_version >= 300) + { + select_if_config.template Init(); + } + else if (ptx_version >= 200) + { + select_if_config.template Init(); + } + else if (ptx_version >= 130) + { + select_if_config.template Init(); + } + else + { + select_if_config.template Init(); + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration. + */ + struct KernelConfig + { + int block_threads; + int items_per_thread; + int tile_items; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + void Init() + { + block_threads = PolicyT::BLOCK_THREADS; + items_per_thread = PolicyT::ITEMS_PER_THREAD; + tile_items = block_threads * items_per_thread; + } + }; + + + /****************************************************************************** + * Dispatch entrypoints + ******************************************************************************/ + + /** + * Internal dispatch routine for computing a device-wide selection using the + * specified kernel functions. + */ + template < + typename ScanInitKernelPtrT, ///< Function type of cub::DeviceScanInitKernel + typename SelectIfKernelPtrT> ///< Function type of cub::SelectIfKernelPtrT + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) + SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) + SelectOpT select_op, ///< [in] Selection operator + EqualityOpT equality_op, ///< [in] Equality operator + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int /*ptx_version*/, ///< [in] PTX version of dispatch kernels + ScanInitKernelPtrT scan_init_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceScanInitKernel + SelectIfKernelPtrT select_if_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSelectSweepKernel + KernelConfig select_if_config) ///< [in] Dispatch parameters that match the policy that \p select_if_kernel was compiled for + { + +#ifndef CUB_RUNTIME_ENABLED + (void)d_temp_storage; + (void)temp_storage_bytes; + (void)d_in; + (void)d_flags; + (void)d_selected_out; + (void)d_num_selected_out; + (void)select_op; + (void)equality_op; + (void)num_items; + (void)stream; + (void)debug_synchronous; + (void)scan_init_kernel; + (void)select_if_kernel; + (void)select_if_config; + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported); + +#else + + cudaError error = cudaSuccess; + do + { + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Number of input tiles + int tile_size = select_if_config.block_threads * select_if_config.items_per_thread; + int num_tiles = (num_items + tile_size - 1) / tile_size; + + // Specify temporary storage allocation requirements + size_t allocation_sizes[1]; + if (CubDebug(error = ScanTileStateT::AllocationSize(num_tiles, allocation_sizes[0]))) break; // bytes needed for tile status descriptors + + // Compute allocation pointers into the single storage blob (or compute the necessary size of the blob) + void* allocations[1]; + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Construct the tile status interface + ScanTileStateT tile_status; + if (CubDebug(error = tile_status.Init(num_tiles, allocations[0], allocation_sizes[0]))) break; + + // Log scan_init_kernel configuration + int init_grid_size = CUB_MAX(1, (num_tiles + INIT_KERNEL_THREADS - 1) / INIT_KERNEL_THREADS); + if (debug_synchronous) _CubLog("Invoking scan_init_kernel<<<%d, %d, 0, %lld>>>()\n", init_grid_size, INIT_KERNEL_THREADS, (long long) stream); + + // Invoke scan_init_kernel to initialize tile descriptors + scan_init_kernel<<>>( + tile_status, + num_tiles, + d_num_selected_out); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Return if empty problem + if (num_items == 0) + break; + + // Get SM occupancy for select_if_kernel + int range_select_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + range_select_sm_occupancy, // out + select_if_kernel, + select_if_config.block_threads))) break; + + // Get max x-dimension of grid + int max_dim_x; + if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; + + // Get grid size for scanning tiles + dim3 scan_grid_size; + scan_grid_size.z = 1; + scan_grid_size.y = ((unsigned int) num_tiles + max_dim_x - 1) / max_dim_x; + scan_grid_size.x = CUB_MIN(num_tiles, max_dim_x); + + // Log select_if_kernel configuration + if (debug_synchronous) _CubLog("Invoking select_if_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + scan_grid_size.x, scan_grid_size.y, scan_grid_size.z, select_if_config.block_threads, (long long) stream, select_if_config.items_per_thread, range_select_sm_occupancy); + + // Invoke select_if_kernel + select_if_kernel<<>>( + d_in, + d_flags, + d_selected_out, + d_num_selected_out, + tile_status, + select_op, + equality_op, + num_items, + num_tiles); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + InputIteratorT d_in, ///< [in] Pointer to the input sequence of data items + FlagsInputIteratorT d_flags, ///< [in] Pointer to the input sequence of selection flags (if applicable) + SelectedOutputIteratorT d_selected_out, ///< [in] Pointer to the output sequence of selected data items + NumSelectedIteratorT d_num_selected_out, ///< [in] Pointer to the total number of items selected (i.e., length of \p d_selected_out) + SelectOpT select_op, ///< [in] Selection operator + EqualityOpT equality_op, ///< [in] Equality operator + OffsetT num_items, ///< [in] Total number of input items (i.e., length of \p d_in) + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel kernel dispatch configurations + KernelConfig select_if_config; + InitConfigs(ptx_version, select_if_config); + + // Dispatch + if (CubDebug(error = Dispatch( + d_temp_storage, + temp_storage_bytes, + d_in, + d_flags, + d_selected_out, + d_num_selected_out, + select_op, + equality_op, + num_items, + stream, + debug_synchronous, + ptx_version, + DeviceCompactInitKernel, + DeviceSelectSweepKernel, + select_if_config))) break; + } + while (0); + + return error; + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_spmv_orig.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_spmv_orig.cuh new file mode 100644 index 0000000..ab9c534 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/device/dispatch/dispatch_spmv_orig.cuh @@ -0,0 +1,834 @@ + +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::DeviceSpmv provides device-wide parallel operations for performing sparse-matrix * vector multiplication (SpMV). + */ + +#pragma once + +#include +#include + +#include "../../agent/single_pass_scan_operators.cuh" +#include "../../agent/agent_segment_fixup.cuh" +#include "../../agent/agent_spmv_orig.cuh" +#include "../../util_type.cuh" +#include "../../util_debug.cuh" +#include "../../util_device.cuh" +#include "../../thread/thread_search.cuh" +#include "../../grid/grid_queue.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * SpMV kernel entry points + *****************************************************************************/ + +/** + * Spmv search kernel. Identifies merge path starting coordinates for each tile. + */ +template < + typename AgentSpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type + typename ValueT, ///< Matrix and vector value type + typename OffsetT> ///< Signed integer type for sequence offsets +__global__ void DeviceSpmv1ColKernel( + SpmvParams spmv_params) ///< [in] SpMV input parameter bundle +{ + typedef CacheModifiedInputIterator< + AgentSpmvPolicyT::VECTOR_VALUES_LOAD_MODIFIER, + ValueT, + OffsetT> + VectorValueIteratorT; + + VectorValueIteratorT wrapped_vector_x(spmv_params.d_vector_x); + + int row_idx = (blockIdx.x * blockDim.x) + threadIdx.x; + if (row_idx < spmv_params.num_rows) + { + OffsetT end_nonzero_idx = spmv_params.d_row_end_offsets[row_idx]; + OffsetT nonzero_idx = spmv_params.d_row_end_offsets[row_idx - 1]; + + ValueT value = 0.0; + if (end_nonzero_idx != nonzero_idx) + { + value = spmv_params.d_values[nonzero_idx] * wrapped_vector_x[spmv_params.d_column_indices[nonzero_idx]]; + } + + spmv_params.d_vector_y[row_idx] = value; + } +} + + +/** + * Spmv search kernel. Identifies merge path starting coordinates for each tile. + */ +template < + typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type + typename OffsetT, ///< Signed integer type for sequence offsets + typename CoordinateT, ///< Merge path coordinate type + typename SpmvParamsT> ///< SpmvParams type +__global__ void DeviceSpmvSearchKernel( + int num_merge_tiles, ///< [in] Number of SpMV merge tiles (spmv grid size) + CoordinateT* d_tile_coordinates, ///< [out] Pointer to the temporary array of tile starting coordinates + SpmvParamsT spmv_params) ///< [in] SpMV input parameter bundle +{ + /// Constants + enum + { + BLOCK_THREADS = SpmvPolicyT::BLOCK_THREADS, + ITEMS_PER_THREAD = SpmvPolicyT::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + typedef CacheModifiedInputIterator< + SpmvPolicyT::ROW_OFFSETS_SEARCH_LOAD_MODIFIER, + OffsetT, + OffsetT> + RowOffsetsSearchIteratorT; + + // Find the starting coordinate for all tiles (plus the end coordinate of the last one) + int tile_idx = (blockIdx.x * blockDim.x) + threadIdx.x; + if (tile_idx < num_merge_tiles + 1) + { + OffsetT diagonal = (tile_idx * TILE_ITEMS); + CoordinateT tile_coordinate; + CountingInputIterator nonzero_indices(0); + + // Search the merge path + MergePathSearch( + diagonal, + RowOffsetsSearchIteratorT(spmv_params.d_row_end_offsets), + nonzero_indices, + spmv_params.num_rows, + spmv_params.num_nonzeros, + tile_coordinate); + + // Output starting offset + d_tile_coordinates[tile_idx] = tile_coordinate; + } +} + + +/** + * Spmv agent entry point + */ +template < + typename SpmvPolicyT, ///< Parameterized SpmvPolicy tuning policy type + typename ScanTileStateT, ///< Tile status interface type + typename ValueT, ///< Matrix and vector value type + typename OffsetT, ///< Signed integer type for sequence offsets + typename CoordinateT, ///< Merge path coordinate type + bool HAS_ALPHA, ///< Whether the input parameter Alpha is 1 + bool HAS_BETA> ///< Whether the input parameter Beta is 0 +__launch_bounds__ (int(SpmvPolicyT::BLOCK_THREADS)) +__global__ void DeviceSpmvKernel( + SpmvParams spmv_params, ///< [in] SpMV input parameter bundle + CoordinateT* d_tile_coordinates, ///< [in] Pointer to the temporary array of tile starting coordinates + KeyValuePair* d_tile_carry_pairs, ///< [out] Pointer to the temporary array carry-out dot product row-ids, one per block + int num_tiles, ///< [in] Number of merge tiles + ScanTileStateT tile_state, ///< [in] Tile status interface for fixup reduce-by-key kernel + int num_segment_fixup_tiles) ///< [in] Number of reduce-by-key tiles (fixup grid size) +{ + // Spmv agent type specialization + typedef AgentSpmv< + SpmvPolicyT, + ValueT, + OffsetT, + HAS_ALPHA, + HAS_BETA> + AgentSpmvT; + + // Shared memory for AgentSpmv + __shared__ typename AgentSpmvT::TempStorage temp_storage; + + AgentSpmvT(temp_storage, spmv_params).ConsumeTile( + d_tile_coordinates, + d_tile_carry_pairs, + num_tiles); + + // Initialize fixup tile status + tile_state.InitializeStatus(num_segment_fixup_tiles); + +} + + +/** + * Multi-block reduce-by-key sweep kernel entry point + */ +template < + typename AgentSegmentFixupPolicyT, ///< Parameterized AgentSegmentFixupPolicy tuning policy type + typename PairsInputIteratorT, ///< Random-access input iterator type for keys + typename AggregatesOutputIteratorT, ///< Random-access output iterator type for values + typename OffsetT, ///< Signed integer type for global offsets + typename ScanTileStateT> ///< Tile status interface type +__launch_bounds__ (int(AgentSegmentFixupPolicyT::BLOCK_THREADS)) +__global__ void DeviceSegmentFixupKernel( + PairsInputIteratorT d_pairs_in, ///< [in] Pointer to the array carry-out dot product row-ids, one per spmv block + AggregatesOutputIteratorT d_aggregates_out, ///< [in,out] Output value aggregates + OffsetT num_items, ///< [in] Total number of items to select from + int num_tiles, ///< [in] Total number of tiles for the entire problem + ScanTileStateT tile_state) ///< [in] Tile status interface +{ + // Thread block type for reducing tiles of value segments + typedef AgentSegmentFixup< + AgentSegmentFixupPolicyT, + PairsInputIteratorT, + AggregatesOutputIteratorT, + cub::Equality, + cub::Sum, + OffsetT> + AgentSegmentFixupT; + + // Shared memory for AgentSegmentFixup + __shared__ typename AgentSegmentFixupT::TempStorage temp_storage; + + // Process tiles + AgentSegmentFixupT(temp_storage, d_pairs_in, d_aggregates_out, cub::Equality(), cub::Sum()).ConsumeRange( + num_items, + num_tiles, + tile_state); +} + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceSpmv + */ +template < + typename ValueT, ///< Matrix and vector value type + typename OffsetT> ///< Signed integer type for global offsets +struct DispatchSpmv +{ + //--------------------------------------------------------------------- + // Constants and Types + //--------------------------------------------------------------------- + + enum + { + INIT_KERNEL_THREADS = 128 + }; + + // SpmvParams bundle type + typedef SpmvParams SpmvParamsT; + + // 2D merge path coordinate type + typedef typename CubVector::Type CoordinateT; + + // Tile status descriptor interface type + typedef ReduceByKeyScanTileState ScanTileStateT; + + // Tuple type for scanning (pairs accumulated segment-value with segment-index) + typedef KeyValuePair KeyValuePairT; + + + //--------------------------------------------------------------------- + // Tuning policies + //--------------------------------------------------------------------- + + /// SM11 + struct Policy110 + { + typedef AgentSpmvPolicy< + 128, + 1, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + false, + BLOCK_SCAN_WARP_SCANS> + SpmvPolicyT; + + typedef AgentSegmentFixupPolicy< + 128, + 4, + BLOCK_LOAD_VECTORIZE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + }; + + /// SM20 + struct Policy200 + { + typedef AgentSpmvPolicy< + 96, + 18, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + false, + BLOCK_SCAN_RAKING> + SpmvPolicyT; + + typedef AgentSegmentFixupPolicy< + 128, + 4, + BLOCK_LOAD_VECTORIZE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + + }; + + + + /// SM30 + struct Policy300 + { + typedef AgentSpmvPolicy< + 96, + 6, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + false, + BLOCK_SCAN_WARP_SCANS> + SpmvPolicyT; + + typedef AgentSegmentFixupPolicy< + 128, + 4, + BLOCK_LOAD_VECTORIZE, + LOAD_DEFAULT, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + + }; + + + /// SM35 + struct Policy350 + { + typedef AgentSpmvPolicy< + (sizeof(ValueT) > 4) ? 96 : 128, + (sizeof(ValueT) > 4) ? 4 : 7, + LOAD_LDG, + LOAD_CA, + LOAD_LDG, + LOAD_LDG, + LOAD_LDG, + (sizeof(ValueT) > 4) ? true : false, + BLOCK_SCAN_WARP_SCANS> + SpmvPolicyT; + + typedef AgentSegmentFixupPolicy< + 128, + 3, + BLOCK_LOAD_VECTORIZE, + LOAD_LDG, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + }; + + + /// SM37 + struct Policy370 + { + + typedef AgentSpmvPolicy< + (sizeof(ValueT) > 4) ? 128 : 128, + (sizeof(ValueT) > 4) ? 9 : 14, + LOAD_LDG, + LOAD_CA, + LOAD_LDG, + LOAD_LDG, + LOAD_LDG, + false, + BLOCK_SCAN_WARP_SCANS> + SpmvPolicyT; + + typedef AgentSegmentFixupPolicy< + 128, + 3, + BLOCK_LOAD_VECTORIZE, + LOAD_LDG, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + }; + + /// SM50 + struct Policy500 + { + typedef AgentSpmvPolicy< + (sizeof(ValueT) > 4) ? 64 : 128, + (sizeof(ValueT) > 4) ? 6 : 7, + LOAD_LDG, + LOAD_DEFAULT, + (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, + (sizeof(ValueT) > 4) ? LOAD_LDG : LOAD_DEFAULT, + LOAD_LDG, + (sizeof(ValueT) > 4) ? true : false, + (sizeof(ValueT) > 4) ? BLOCK_SCAN_WARP_SCANS : BLOCK_SCAN_RAKING_MEMOIZE> + SpmvPolicyT; + + + typedef AgentSegmentFixupPolicy< + 128, + 3, + BLOCK_LOAD_VECTORIZE, + LOAD_LDG, + BLOCK_SCAN_RAKING_MEMOIZE> + SegmentFixupPolicyT; + }; + + + /// SM60 + struct Policy600 + { + typedef AgentSpmvPolicy< + (sizeof(ValueT) > 4) ? 64 : 128, + (sizeof(ValueT) > 4) ? 5 : 7, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + LOAD_DEFAULT, + false, + BLOCK_SCAN_WARP_SCANS> + SpmvPolicyT; + + + typedef AgentSegmentFixupPolicy< + 128, + 3, + BLOCK_LOAD_DIRECT, + LOAD_LDG, + BLOCK_SCAN_WARP_SCANS> + SegmentFixupPolicyT; + }; + + + + //--------------------------------------------------------------------- + // Tuning policies of current PTX compiler pass + //--------------------------------------------------------------------- + +#if (CUB_PTX_ARCH >= 600) + typedef Policy600 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 500) + typedef Policy500 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 370) + typedef Policy370 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#else + typedef Policy110 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxSpmvPolicyT : PtxPolicy::SpmvPolicyT {}; + struct PtxSegmentFixupPolicy : PtxPolicy::SegmentFixupPolicyT {}; + + + //--------------------------------------------------------------------- + // Utilities + //--------------------------------------------------------------------- + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template + CUB_RUNTIME_FUNCTION __forceinline__ + static void InitConfigs( + int ptx_version, + KernelConfig &spmv_config, + KernelConfig &segment_fixup_config) + { + #if (CUB_PTX_ARCH > 0) + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + spmv_config.template Init(); + segment_fixup_config.template Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 600) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 500) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 370) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 350) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else if (ptx_version >= 300) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + + } + else if (ptx_version >= 200) + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + else + { + spmv_config.template Init(); + segment_fixup_config.template Init(); + } + + #endif + } + + + /** + * Kernel kernel dispatch configuration. + */ + struct KernelConfig + { + int block_threads; + int items_per_thread; + int tile_items; + + template + CUB_RUNTIME_FUNCTION __forceinline__ + void Init() + { + block_threads = PolicyT::BLOCK_THREADS; + items_per_thread = PolicyT::ITEMS_PER_THREAD; + tile_items = block_threads * items_per_thread; + } + }; + + + //--------------------------------------------------------------------- + // Dispatch entrypoints + //--------------------------------------------------------------------- + + /** + * Internal dispatch routine for computing a device-wide reduction using the + * specified kernel functions. + * + * If the input is larger than a single tile, this method uses two-passes of + * kernel invocations. + */ + template < + typename Spmv1ColKernelT, ///< Function type of cub::DeviceSpmv1ColKernel + typename SpmvSearchKernelT, ///< Function type of cub::AgentSpmvSearchKernel + typename SpmvKernelT, ///< Function type of cub::AgentSpmvKernel + typename SegmentFixupKernelT> ///< Function type of cub::DeviceSegmentFixupKernelT + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SpmvParamsT& spmv_params, ///< SpMV input parameter bundle + cudaStream_t stream, ///< [in] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + Spmv1ColKernelT spmv_1col_kernel, ///< [in] Kernel function pointer to parameterization of DeviceSpmv1ColKernel + SpmvSearchKernelT spmv_search_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvSearchKernel + SpmvKernelT spmv_kernel, ///< [in] Kernel function pointer to parameterization of AgentSpmvKernel + SegmentFixupKernelT segment_fixup_kernel, ///< [in] Kernel function pointer to parameterization of cub::DeviceSegmentFixupKernel + KernelConfig spmv_config, ///< [in] Dispatch parameters that match the policy that \p spmv_kernel was compiled for + KernelConfig segment_fixup_config) ///< [in] Dispatch parameters that match the policy that \p segment_fixup_kernel was compiled for + { +#ifndef CUB_RUNTIME_ENABLED + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); + +#else + cudaError error = cudaSuccess; + do + { + if (spmv_params.num_cols == 1) + { + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + temp_storage_bytes = 1; + break; + } + + // Get search/init grid dims + int degen_col_kernel_block_size = INIT_KERNEL_THREADS; + int degen_col_kernel_grid_size = (spmv_params.num_rows + degen_col_kernel_block_size - 1) / degen_col_kernel_block_size; + + if (debug_synchronous) _CubLog("Invoking spmv_1col_kernel<<<%d, %d, 0, %lld>>>()\n", + degen_col_kernel_grid_size, degen_col_kernel_block_size, (long long) stream); + + // Invoke spmv_search_kernel + spmv_1col_kernel<<>>( + spmv_params); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + break; + } + + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Get max x-dimension of grid + int max_dim_x; + if (CubDebug(error = cudaDeviceGetAttribute(&max_dim_x, cudaDevAttrMaxGridDimX, device_ordinal))) break;; + + // Total number of spmv work items + int num_merge_items = spmv_params.num_rows + spmv_params.num_nonzeros; + + // Tile sizes of kernels + int merge_tile_size = spmv_config.block_threads * spmv_config.items_per_thread; + int segment_fixup_tile_size = segment_fixup_config.block_threads * segment_fixup_config.items_per_thread; + + // Number of tiles for kernels + unsigned int num_merge_tiles = (num_merge_items + merge_tile_size - 1) / merge_tile_size; + unsigned int num_segment_fixup_tiles = (num_merge_tiles + segment_fixup_tile_size - 1) / segment_fixup_tile_size; + + // Get SM occupancy for kernels + int spmv_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + spmv_sm_occupancy, + spmv_kernel, + spmv_config.block_threads))) break; + + int segment_fixup_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + segment_fixup_sm_occupancy, + segment_fixup_kernel, + segment_fixup_config.block_threads))) break; + + // Get grid dimensions + dim3 spmv_grid_size( + CUB_MIN(num_merge_tiles, max_dim_x), + (num_merge_tiles + max_dim_x - 1) / max_dim_x, + 1); + + dim3 segment_fixup_grid_size( + CUB_MIN(num_segment_fixup_tiles, max_dim_x), + (num_segment_fixup_tiles + max_dim_x - 1) / max_dim_x, + 1); + + // Get the temporary storage allocation requirements + size_t allocation_sizes[3]; + if (CubDebug(error = ScanTileStateT::AllocationSize(num_segment_fixup_tiles, allocation_sizes[0]))) break; // bytes needed for reduce-by-key tile status descriptors + allocation_sizes[1] = num_merge_tiles * sizeof(KeyValuePairT); // bytes needed for block carry-out pairs + allocation_sizes[2] = (num_merge_tiles + 1) * sizeof(CoordinateT); // bytes needed for tile starting coordinates + + // Alias the temporary allocations from the single storage blob (or compute the necessary size of the blob) + void* allocations[3]; + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + break; + } + + // Construct the tile status interface + ScanTileStateT tile_state; + if (CubDebug(error = tile_state.Init(num_segment_fixup_tiles, allocations[0], allocation_sizes[0]))) break; + + // Alias the other allocations + KeyValuePairT* d_tile_carry_pairs = (KeyValuePairT*) allocations[1]; // Agent carry-out pairs + CoordinateT* d_tile_coordinates = (CoordinateT*) allocations[2]; // Agent starting coordinates + + // Get search/init grid dims + int search_block_size = INIT_KERNEL_THREADS; + int search_grid_size = (num_merge_tiles + 1 + search_block_size - 1) / search_block_size; + +#if (CUB_PTX_ARCH == 0) + // Init textures + if (CubDebug(error = spmv_params.t_vector_x.BindTexture(spmv_params.d_vector_x))) break; +#endif + + if (search_grid_size < sm_count) +// if (num_merge_tiles < spmv_sm_occupancy * sm_count) + { + // Not enough spmv tiles to saturate the device: have spmv blocks search their own staring coords + d_tile_coordinates = NULL; + } + else + { + // Use separate search kernel if we have enough spmv tiles to saturate the device + + // Log spmv_search_kernel configuration + if (debug_synchronous) _CubLog("Invoking spmv_search_kernel<<<%d, %d, 0, %lld>>>()\n", + search_grid_size, search_block_size, (long long) stream); + + // Invoke spmv_search_kernel + spmv_search_kernel<<>>( + num_merge_tiles, + d_tile_coordinates, + spmv_params); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + + // Log spmv_kernel configuration + if (debug_synchronous) _CubLog("Invoking spmv_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + spmv_grid_size.x, spmv_grid_size.y, spmv_grid_size.z, spmv_config.block_threads, (long long) stream, spmv_config.items_per_thread, spmv_sm_occupancy); + + // Invoke spmv_kernel + spmv_kernel<<>>( + spmv_params, + d_tile_coordinates, + d_tile_carry_pairs, + num_merge_tiles, + tile_state, + num_segment_fixup_tiles); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + + // Run reduce-by-key fixup if necessary + if (num_merge_tiles > 1) + { + // Log segment_fixup_kernel configuration + if (debug_synchronous) _CubLog("Invoking segment_fixup_kernel<<<{%d,%d,%d}, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + segment_fixup_grid_size.x, segment_fixup_grid_size.y, segment_fixup_grid_size.z, segment_fixup_config.block_threads, (long long) stream, segment_fixup_config.items_per_thread, segment_fixup_sm_occupancy); + + // Invoke segment_fixup_kernel + segment_fixup_kernel<<>>( + d_tile_carry_pairs, + spmv_params.d_vector_y, + num_merge_tiles, + num_segment_fixup_tiles, + tile_state); + + // Check for failure to launch + if (CubDebug(error = cudaPeekAtLastError())) break; + + // Sync the stream if specified to flush runtime errors + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + +#if (CUB_PTX_ARCH == 0) + // Free textures + if (CubDebug(error = spmv_params.t_vector_x.UnbindTexture())) break; +#endif + } + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine for computing a device-wide reduction + */ + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t& temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation + SpmvParamsT& spmv_params, ///< SpMV input parameter bundle + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. May cause significant slowdown. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel kernel dispatch configurations + KernelConfig spmv_config, segment_fixup_config; + InitConfigs(ptx_version, spmv_config, segment_fixup_config); + + if (CubDebug(error = Dispatch( + d_temp_storage, temp_storage_bytes, spmv_params, stream, debug_synchronous, + DeviceSpmv1ColKernel, + DeviceSpmvSearchKernel, + DeviceSpmvKernel, + DeviceSegmentFixupKernel, + spmv_config, segment_fixup_config))) break; + + } + while (0); + + return error; + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_barrier.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_barrier.cuh new file mode 100644 index 0000000..461fb44 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_barrier.cuh @@ -0,0 +1,211 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::GridBarrier implements a software global barrier among thread blocks within a CUDA grid + */ + +#pragma once + +#include "../util_debug.cuh" +#include "../util_namespace.cuh" +#include "../thread/thread_load.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup GridModule + * @{ + */ + + +/** + * \brief GridBarrier implements a software global barrier among thread blocks within a CUDA grid + */ +class GridBarrier +{ +protected : + + typedef unsigned int SyncFlag; + + // Counters in global device memory + SyncFlag* d_sync; + +public: + + /** + * Constructor + */ + GridBarrier() : d_sync(NULL) {} + + + /** + * Synchronize + */ + __device__ __forceinline__ void Sync() const + { + volatile SyncFlag *d_vol_sync = d_sync; + + // Threadfence and syncthreads to make sure global writes are visible before + // thread-0 reports in with its sync counter + __threadfence(); + CTA_SYNC(); + + if (blockIdx.x == 0) + { + // Report in ourselves + if (threadIdx.x == 0) + { + d_vol_sync[blockIdx.x] = 1; + } + + CTA_SYNC(); + + // Wait for everyone else to report in + for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) + { + while (ThreadLoad(d_sync + peer_block) == 0) + { + __threadfence_block(); + } + } + + CTA_SYNC(); + + // Let everyone know it's safe to proceed + for (int peer_block = threadIdx.x; peer_block < gridDim.x; peer_block += blockDim.x) + { + d_vol_sync[peer_block] = 0; + } + } + else + { + if (threadIdx.x == 0) + { + // Report in + d_vol_sync[blockIdx.x] = 1; + + // Wait for acknowledgment + while (ThreadLoad(d_sync + blockIdx.x) == 1) + { + __threadfence_block(); + } + } + + CTA_SYNC(); + } + } +}; + + +/** + * \brief GridBarrierLifetime extends GridBarrier to provide lifetime management of the temporary device storage needed for cooperation. + * + * Uses RAII for lifetime, i.e., device resources are reclaimed when + * the destructor is called. + */ +class GridBarrierLifetime : public GridBarrier +{ +protected: + + // Number of bytes backed by d_sync + size_t sync_bytes; + +public: + + /** + * Constructor + */ + GridBarrierLifetime() : GridBarrier(), sync_bytes(0) {} + + + /** + * DeviceFrees and resets the progress counters + */ + cudaError_t HostReset() + { + cudaError_t retval = cudaSuccess; + if (d_sync) + { + CubDebug(retval = cudaFree(d_sync)); + d_sync = NULL; + } + sync_bytes = 0; + return retval; + } + + + /** + * Destructor + */ + virtual ~GridBarrierLifetime() + { + HostReset(); + } + + + /** + * Sets up the progress counters for the next kernel launch (lazily + * allocating and initializing them if necessary) + */ + cudaError_t Setup(int sweep_grid_size) + { + cudaError_t retval = cudaSuccess; + do { + size_t new_sync_bytes = sweep_grid_size * sizeof(SyncFlag); + if (new_sync_bytes > sync_bytes) + { + if (d_sync) + { + if (CubDebug(retval = cudaFree(d_sync))) break; + } + + sync_bytes = new_sync_bytes; + + // Allocate and initialize to zero + if (CubDebug(retval = cudaMalloc((void**) &d_sync, sync_bytes))) break; + if (CubDebug(retval = cudaMemset(d_sync, 0, new_sync_bytes))) break; + } + } while (0); + + return retval; + } +}; + + +/** @} */ // end group GridModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_even_share.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_even_share.cuh new file mode 100644 index 0000000..f0b3a69 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_even_share.cuh @@ -0,0 +1,222 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::GridEvenShare is a descriptor utility for distributing input among CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly the same number of fixed-size work units (grains). + */ + + +#pragma once + +#include "../util_namespace.cuh" +#include "../util_macro.cuh" +#include "grid_mapping.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup GridModule + * @{ + */ + + +/** + * \brief GridEvenShare is a descriptor utility for distributing input among + * CUDA thread blocks in an "even-share" fashion. Each thread block gets roughly + * the same number of input tiles. + * + * \par Overview + * Each thread block is assigned a consecutive sequence of input tiles. To help + * preserve alignment and eliminate the overhead of guarded loads for all but the + * last thread block, to GridEvenShare assigns one of three different amounts of + * work to a given thread block: "big", "normal", or "last". The "big" workloads + * are one scheduling grain larger than "normal". The "last" work unit for the + * last thread block may be partially-full if the input is not an even multiple of + * the scheduling grain size. + * + * \par + * Before invoking a child grid, a parent thread will typically construct an + * instance of GridEvenShare. The instance can be passed to child thread blocks + * which can initialize their per-thread block offsets using \p BlockInit(). + */ +template +struct GridEvenShare +{ +private: + + OffsetT total_tiles; + int big_shares; + OffsetT big_share_items; + OffsetT normal_share_items; + OffsetT normal_base_offset; + +public: + + /// Total number of input items + OffsetT num_items; + + /// Grid size in thread blocks + int grid_size; + + /// OffsetT into input marking the beginning of the owning thread block's segment of input tiles + OffsetT block_offset; + + /// OffsetT into input of marking the end (one-past) of the owning thread block's segment of input tiles + OffsetT block_end; + + /// Stride between input tiles + OffsetT block_stride; + + + /** + * \brief Constructor. + */ + __host__ __device__ __forceinline__ GridEvenShare() : + total_tiles(0), + big_shares(0), + big_share_items(0), + normal_share_items(0), + normal_base_offset(0), + num_items(0), + grid_size(0), + block_offset(0), + block_end(0), + block_stride(0) + {} + + + /** + * \brief Dispatch initializer. To be called prior prior to kernel launch. + */ + __host__ __device__ __forceinline__ void DispatchInit( + OffsetT num_items, ///< Total number of input items + int max_grid_size, ///< Maximum grid size allowable (actual grid size may be less if not warranted by the the number of input items) + int tile_items) ///< Number of data items per input tile + { + this->block_offset = num_items; // Initialize past-the-end + this->block_end = num_items; // Initialize past-the-end + this->num_items = num_items; + this->total_tiles = (num_items + tile_items - 1) / tile_items; + this->grid_size = CUB_MIN(total_tiles, max_grid_size); + OffsetT avg_tiles_per_block = total_tiles / grid_size; + this->big_shares = total_tiles - (avg_tiles_per_block * grid_size); // leftover grains go to big blocks + this->normal_share_items = avg_tiles_per_block * tile_items; + this->normal_base_offset = big_shares * tile_items; + this->big_share_items = normal_share_items + tile_items; + } + + + /** + * \brief Initializes ranges for the specified thread block index. Specialized + * for a "raking" access pattern in which each thread block is assigned a + * consecutive sequence of input tiles. + */ + template + __device__ __forceinline__ void BlockInit( + int block_id, + Int2Type /*strategy_tag*/) + { + block_stride = TILE_ITEMS; + if (block_id < big_shares) + { + // This thread block gets a big share of grains (avg_tiles_per_block + 1) + block_offset = (block_id * big_share_items); + block_end = block_offset + big_share_items; + } + else if (block_id < total_tiles) + { + // This thread block gets a normal share of grains (avg_tiles_per_block) + block_offset = normal_base_offset + (block_id * normal_share_items); + block_end = CUB_MIN(num_items, block_offset + normal_share_items); + } + // Else default past-the-end + } + + + /** + * \brief Block-initialization, specialized for a "raking" access + * pattern in which each thread block is assigned a consecutive sequence + * of input tiles. + */ + template + __device__ __forceinline__ void BlockInit( + int block_id, + Int2Type /*strategy_tag*/) + { + block_stride = grid_size * TILE_ITEMS; + block_offset = (block_id * TILE_ITEMS); + block_end = num_items; + } + + + /** + * \brief Block-initialization, specialized for "strip mining" access + * pattern in which the input tiles assigned to each thread block are + * separated by a stride equal to the the extent of the grid. + */ + template < + int TILE_ITEMS, + GridMappingStrategy STRATEGY> + __device__ __forceinline__ void BlockInit() + { + BlockInit(blockIdx.x, Int2Type()); + } + + + /** + * \brief Block-initialization, specialized for a "raking" access + * pattern in which each thread block is assigned a consecutive sequence + * of input tiles. + */ + template + __device__ __forceinline__ void BlockInit( + OffsetT block_offset, ///< [in] Threadblock begin offset (inclusive) + OffsetT block_end) ///< [in] Threadblock end offset (exclusive) + { + this->block_offset = block_offset; + this->block_end = block_end; + this->block_stride = TILE_ITEMS; + } + + +}; + + + + + +/** @} */ // end group GridModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_mapping.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_mapping.cuh new file mode 100644 index 0000000..f0e9fde --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_mapping.cuh @@ -0,0 +1,113 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. + */ + +#pragma once + +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup GridModule + * @{ + */ + + +/****************************************************************************** + * Mapping policies + *****************************************************************************/ + + +/** + * \brief cub::GridMappingStrategy enumerates alternative strategies for mapping constant-sized tiles of device-wide data onto a grid of CUDA thread blocks. + */ +enum GridMappingStrategy +{ + /** + * \brief An a "raking" access pattern in which each thread block is + * assigned a consecutive sequence of input tiles + * + * \par Overview + * The input is evenly partitioned into \p p segments, where \p p is + * constant and corresponds loosely to the number of thread blocks that may + * actively reside on the target device. Each segment is comprised of + * consecutive tiles, where a tile is a small, constant-sized unit of input + * to be processed to completion before the thread block terminates or + * obtains more work. The kernel invokes \p p thread blocks, each + * of which iteratively consumes a segment of n/p elements + * in tile-size increments. + */ + GRID_MAPPING_RAKE, + + /** + * \brief An a "strip mining" access pattern in which the input tiles assigned + * to each thread block are separated by a stride equal to the the extent of + * the grid. + * + * \par Overview + * The input is evenly partitioned into \p p sets, where \p p is + * constant and corresponds loosely to the number of thread blocks that may + * actively reside on the target device. Each set is comprised of + * data tiles separated by stride \p tiles, where a tile is a small, + * constant-sized unit of input to be processed to completion before the + * thread block terminates or obtains more work. The kernel invokes \p p + * thread blocks, each of which iteratively consumes a segment of + * n/p elements in tile-size increments. + */ + GRID_MAPPING_STRIP_MINE, + + /** + * \brief A dynamic "queue-based" strategy for assigning input tiles to thread blocks. + * + * \par Overview + * The input is treated as a queue to be dynamically consumed by a grid of + * thread blocks. Work is atomically dequeued in tiles, where a tile is a + * unit of input to be processed to completion before the thread block + * terminates or obtains more work. The grid size \p p is constant, + * loosely corresponding to the number of thread blocks that may actively + * reside on the target device. + */ + GRID_MAPPING_DYNAMIC, +}; + + +/** @} */ // end group GridModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_queue.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_queue.cuh new file mode 100644 index 0000000..9615b14 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/grid/grid_queue.cuh @@ -0,0 +1,220 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::GridQueue is a descriptor utility for dynamic queue management. + */ + +#pragma once + +#include "../util_namespace.cuh" +#include "../util_debug.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup GridModule + * @{ + */ + + +/** + * \brief GridQueue is a descriptor utility for dynamic queue management. + * + * \par Overview + * GridQueue descriptors provides abstractions for "filling" or + * "draining" globally-shared vectors. + * + * \par + * A "filling" GridQueue works by atomically-adding to a zero-initialized counter, + * returning a unique offset for the calling thread to write its items. + * The GridQueue maintains the total "fill-size". The fill counter must be reset + * using GridQueue::ResetFill by the host or kernel instance prior to the kernel instance that + * will be filling. + * + * \par + * Similarly, a "draining" GridQueue works by works by atomically-incrementing a + * zero-initialized counter, returning a unique offset for the calling thread to + * read its items. Threads can safely drain until the array's logical fill-size is + * exceeded. The drain counter must be reset using GridQueue::ResetDrain or + * GridQueue::FillAndResetDrain by the host or kernel instance prior to the kernel instance that + * will be filling. (For dynamic work distribution of existing data, the corresponding fill-size + * is simply the number of elements in the array.) + * + * \par + * Iterative work management can be implemented simply with a pair of flip-flopping + * work buffers, each with an associated set of fill and drain GridQueue descriptors. + * + * \tparam OffsetT Signed integer type for global offsets + */ +template +class GridQueue +{ +private: + + /// Counter indices + enum + { + FILL = 0, + DRAIN = 1, + }; + + /// Pair of counters + OffsetT *d_counters; + +public: + + /// Returns the device allocation size in bytes needed to construct a GridQueue instance + __host__ __device__ __forceinline__ + static size_t AllocationSize() + { + return sizeof(OffsetT) * 2; + } + + + /// Constructs an invalid GridQueue descriptor + __host__ __device__ __forceinline__ GridQueue() + : + d_counters(NULL) + {} + + + /// Constructs a GridQueue descriptor around the device storage allocation + __host__ __device__ __forceinline__ GridQueue( + void *d_storage) ///< Device allocation to back the GridQueue. Must be at least as big as AllocationSize(). + : + d_counters((OffsetT*) d_storage) + {} + + + /// This operation sets the fill-size and resets the drain counter, preparing the GridQueue for draining in the next kernel instance. To be called by the host or by a kernel prior to that which will be draining. + __host__ __device__ __forceinline__ cudaError_t FillAndResetDrain( + OffsetT fill_size, + cudaStream_t stream = 0) + { +#if (CUB_PTX_ARCH > 0) + (void)stream; + d_counters[FILL] = fill_size; + d_counters[DRAIN] = 0; + return cudaSuccess; +#else + OffsetT counters[2]; + counters[FILL] = fill_size; + counters[DRAIN] = 0; + return CubDebug(cudaMemcpyAsync(d_counters, counters, sizeof(OffsetT) * 2, cudaMemcpyHostToDevice, stream)); +#endif + } + + + /// This operation resets the drain so that it may advance to meet the existing fill-size. To be called by the host or by a kernel prior to that which will be draining. + __host__ __device__ __forceinline__ cudaError_t ResetDrain(cudaStream_t stream = 0) + { +#if (CUB_PTX_ARCH > 0) + (void)stream; + d_counters[DRAIN] = 0; + return cudaSuccess; +#else + return CubDebug(cudaMemsetAsync(d_counters + DRAIN, 0, sizeof(OffsetT), stream)); +#endif + } + + + /// This operation resets the fill counter. To be called by the host or by a kernel prior to that which will be filling. + __host__ __device__ __forceinline__ cudaError_t ResetFill(cudaStream_t stream = 0) + { +#if (CUB_PTX_ARCH > 0) + (void)stream; + d_counters[FILL] = 0; + return cudaSuccess; +#else + return CubDebug(cudaMemsetAsync(d_counters + FILL, 0, sizeof(OffsetT), stream)); +#endif + } + + + /// Returns the fill-size established by the parent or by the previous kernel. + __host__ __device__ __forceinline__ cudaError_t FillSize( + OffsetT &fill_size, + cudaStream_t stream = 0) + { +#if (CUB_PTX_ARCH > 0) + (void)stream; + fill_size = d_counters[FILL]; + return cudaSuccess; +#else + return CubDebug(cudaMemcpyAsync(&fill_size, d_counters + FILL, sizeof(OffsetT), cudaMemcpyDeviceToHost, stream)); +#endif + } + + + /// Drain \p num_items from the queue. Returns offset from which to read items. To be called from CUDA kernel. + __device__ __forceinline__ OffsetT Drain(OffsetT num_items) + { + return atomicAdd(d_counters + DRAIN, num_items); + } + + + /// Fill \p num_items into the queue. Returns offset from which to write items. To be called from CUDA kernel. + __device__ __forceinline__ OffsetT Fill(OffsetT num_items) + { + return atomicAdd(d_counters + FILL, num_items); + } +}; + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/** + * Reset grid queue (call with 1 block of 1 thread) + */ +template +__global__ void FillAndResetDrainKernel( + GridQueue grid_queue, + OffsetT num_items) +{ + grid_queue.FillAndResetDrain(num_items); +} + + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** @} */ // end group GridModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/host/mutex.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/host/mutex.cuh new file mode 100644 index 0000000..ff7ec90 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/host/mutex.cuh @@ -0,0 +1,171 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Simple portable mutex + */ + + +#pragma once + +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) + #include +#else + #if defined(_WIN32) || defined(_WIN64) + #include + + #define WIN32_LEAN_AND_MEAN + #define NOMINMAX + #include + #undef WIN32_LEAN_AND_MEAN + #undef NOMINMAX + + /** + * Compiler read/write barrier + */ + #pragma intrinsic(_ReadWriteBarrier) + + #endif +#endif + +#include "../util_namespace.cuh" + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * Simple portable mutex + * - Wraps std::mutex when compiled with C++11 or newer (supported on all platforms) + * - Uses GNU/Windows spinlock mechanisms for pre C++11 (supported on x86/x64 when compiled with cl.exe or g++) + */ +struct Mutex +{ +#if (__cplusplus > 199711L) || (defined(_MSC_VER) && _MSC_VER >= 1800) + + std::mutex mtx; + + void Lock() + { + mtx.lock(); + } + + void Unlock() + { + mtx.unlock(); + } + + void TryLock() + { + mtx.try_lock(); + } + +#else //__cplusplus > 199711L + + #if defined(_MSC_VER) + + // Microsoft VC++ + typedef long Spinlock; + + #else + + // GNU g++ + typedef int Spinlock; + + /** + * Compiler read/write barrier + */ + __forceinline__ void _ReadWriteBarrier() + { + __sync_synchronize(); + } + + /** + * Atomic exchange + */ + __forceinline__ long _InterlockedExchange(volatile int * const Target, const int Value) + { + // NOTE: __sync_lock_test_and_set would be an acquire barrier, so we force a full barrier + _ReadWriteBarrier(); + return __sync_lock_test_and_set(Target, Value); + } + + /** + * Pause instruction to prevent excess processor bus usage + */ + __forceinline__ void YieldProcessor() + { + } + + #endif // defined(_MSC_VER) + + /// Lock member + volatile Spinlock lock; + + /** + * Constructor + */ + Mutex() : lock(0) {} + + /** + * Return when the specified spinlock has been acquired + */ + __forceinline__ void Lock() + { + while (1) + { + if (!_InterlockedExchange(&lock, 1)) return; + while (lock) YieldProcessor(); + } + } + + + /** + * Release the specified spinlock + */ + __forceinline__ void Unlock() + { + _ReadWriteBarrier(); + lock = 0; + } + +#endif // __cplusplus > 199711L + +}; + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/arg_index_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/arg_index_input_iterator.cuh new file mode 100644 index 0000000..95a84a5 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/arg_index_input_iterator.cuh @@ -0,0 +1,259 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +#include + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A random-access input wrapper for pairing dereferenced values with their corresponding indices (forming \p KeyValuePair tuples). + * + * \par Overview + * - ArgIndexInputIteratorTwraps a random access input iterator \p itr of type \p InputIteratorT. + * Dereferencing an ArgIndexInputIteratorTat offset \p i produces a \p KeyValuePair value whose + * \p key field is \p i and whose \p value field is itr[i]. + * - Can be used with any data type. + * - Can be constructed, manipulated, and exchanged within and between host and device + * functions. Wrapped host memory can only be dereferenced on the host, and wrapped + * device memory can only be dereferenced on the device. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p ArgIndexInputIteratorTto + * dereference an array of doubles + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize a device array + * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] + * + * // Create an iterator wrapper + * cub::ArgIndexInputIterator itr(d_in); + * + * // Within device code: + * typedef typename cub::ArgIndexInputIterator::value_type Tuple; + * Tuple item_offset_pair.key = *itr; + * printf("%f @ %d\n", + * item_offset_pair.value, + * item_offset_pair.key); // 8.0 @ 0 + * + * itr = itr + 6; + * item_offset_pair.key = *itr; + * printf("%f @ %d\n", + * item_offset_pair.value, + * item_offset_pair.key); // 9.0 @ 6 + * + * \endcode + * + * \tparam InputIteratorT The value type of the wrapped input iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + * \tparam OutputValueT The paired value type of the tuple (Default: value type of input iterator) + */ +template < + typename InputIteratorT, + typename OffsetT = ptrdiff_t, + typename OutputValueT = typename std::iterator_traits::value_type> +class ArgIndexInputIterator +{ +public: + + // Required iterator traits + typedef ArgIndexInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef KeyValuePair value_type; ///< The type of the element the iterator can point to + typedef value_type* pointer; ///< The type of a pointer to an element the iterator can point to + typedef value_type reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::any_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + InputIteratorT itr; + difference_type offset; + +public: + + /// Constructor + __host__ __device__ __forceinline__ ArgIndexInputIterator( + InputIteratorT itr, ///< Input iterator to wrap + difference_type offset = 0) ///< OffsetT (in items) from \p itr denoting the position of the iterator + : + itr(itr), + offset(offset) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + offset++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + offset++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + value_type retval; + retval.value = itr[offset]; + retval.key = offset; + return retval; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(itr, offset + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + offset += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(itr, offset - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + offset -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return offset - other.offset; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + self_type offset = (*this) + n; + return *offset; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &(*(*this)); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return ((itr == rhs.itr) && (offset == rhs.offset)); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return ((itr != rhs.itr) || (offset != rhs.offset)); + } + + /// Normalize + __host__ __device__ __forceinline__ void normalize() + { + itr += offset; + offset = 0; + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) + { + return os; + } +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_input_iterator.cuh new file mode 100644 index 0000000..b4ad91e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_input_iterator.cuh @@ -0,0 +1,240 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A random-access input wrapper for dereferencing array values using a PTX cache load modifier. + * + * \par Overview + * - CacheModifiedInputIteratorTis a random-access input iterator that wraps a native + * device pointer of type ValueType*. \p ValueType references are + * made by reading \p ValueType values through loads modified by \p MODIFIER. + * - Can be used to load any data type from memory using PTX cache load modifiers (e.g., "LOAD_LDG", + * "LOAD_CG", "LOAD_CA", "LOAD_CS", "LOAD_CV", etc.). + * - Can be constructed, manipulated, and exchanged within and between host and device + * functions, but can only be dereferenced within device functions. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p CacheModifiedInputIteratorTto + * dereference a device array of double using the "ldg" PTX load modifier + * (i.e., load values through texture cache). + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize a device array + * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] + * + * // Create an iterator wrapper + * cub::CacheModifiedInputIterator itr(d_in); + * + * // Within device code: + * printf("%f\n", itr[0]); // 8.0 + * printf("%f\n", itr[1]); // 6.0 + * printf("%f\n", itr[6]); // 9.0 + * + * \endcode + * + * \tparam CacheLoadModifier The cub::CacheLoadModifier to use when accessing data + * \tparam ValueType The value type of this iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + CacheLoadModifier MODIFIER, + typename ValueType, + typename OffsetT = ptrdiff_t> +class CacheModifiedInputIterator +{ +public: + + // Required iterator traits + typedef CacheModifiedInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef ValueType value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::device_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + + +public: + + /// Wrapped native pointer + ValueType* ptr; + + /// Constructor + template + __host__ __device__ __forceinline__ CacheModifiedInputIterator( + QualifiedValueType* ptr) ///< Native pointer to wrap + : + ptr(const_cast::Type *>(ptr)) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + ptr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + ptr++; + return *this; + } + + /// Indirection + __device__ __forceinline__ reference operator*() const + { + return ThreadLoad(ptr); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(ptr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(ptr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return ptr - other.ptr; + } + + /// Array subscript + template + __device__ __forceinline__ reference operator[](Distance n) const + { + return ThreadLoad(ptr + n); + } + + /// Structure dereference + __device__ __forceinline__ pointer operator->() + { + return &ThreadLoad(ptr); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& /*itr*/) + { + return os; + } +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_output_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_output_iterator.cuh new file mode 100644 index 0000000..c3e3321 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/cache_modified_output_iterator.cuh @@ -0,0 +1,254 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A random-access output wrapper for storing array values using a PTX cache-modifier. + * + * \par Overview + * - CacheModifiedOutputIterator is a random-access output iterator that wraps a native + * device pointer of type ValueType*. \p ValueType references are + * made by writing \p ValueType values through stores modified by \p MODIFIER. + * - Can be used to store any data type to memory using PTX cache store modifiers (e.g., "STORE_WB", + * "STORE_CG", "STORE_CS", "STORE_WT", etc.). + * - Can be constructed, manipulated, and exchanged within and between host and device + * functions, but can only be dereferenced within device functions. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p CacheModifiedOutputIterator to + * dereference a device array of doubles using the "wt" PTX load modifier + * (i.e., write-through to system memory). + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize a device array + * double *d_out; // e.g., [, , , , , , ] + * + * // Create an iterator wrapper + * cub::CacheModifiedOutputIterator itr(d_out); + * + * // Within device code: + * itr[0] = 8.0; + * itr[1] = 66.0; + * itr[55] = 24.0; + * + * \endcode + * + * \par Usage Considerations + * - Can only be dereferenced within device code + * + * \tparam CacheStoreModifier The cub::CacheStoreModifier to use when accessing data + * \tparam ValueType The value type of this iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + CacheStoreModifier MODIFIER, + typename ValueType, + typename OffsetT = ptrdiff_t> +class CacheModifiedOutputIterator +{ +private: + + // Proxy object + struct Reference + { + ValueType* ptr; + + /// Constructor + __host__ __device__ __forceinline__ Reference(ValueType* ptr) : ptr(ptr) {} + + /// Assignment + __device__ __forceinline__ ValueType operator =(ValueType val) + { + ThreadStore(ptr, val); + return val; + } + }; + +public: + + // Required iterator traits + typedef CacheModifiedOutputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef void value_type; ///< The type of the element the iterator can point to + typedef void pointer; ///< The type of a pointer to an element the iterator can point to + typedef Reference reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::device_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + ValueType* ptr; + +public: + + /// Constructor + template + __host__ __device__ __forceinline__ CacheModifiedOutputIterator( + QualifiedValueType* ptr) ///< Native pointer to wrap + : + ptr(const_cast::Type *>(ptr)) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + ptr++; + return retval; + } + + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + ptr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + return Reference(ptr); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(ptr + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + ptr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(ptr - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + ptr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return ptr - other.ptr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + return Reference(ptr + n); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (ptr == rhs.ptr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (ptr != rhs.ptr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + return os; + } +}; + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/constant_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/constant_input_iterator.cuh new file mode 100644 index 0000000..1e0a910 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/constant_input_iterator.cuh @@ -0,0 +1,235 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A random-access input generator for dereferencing a sequence of homogeneous values + * + * \par Overview + * - Read references to a ConstantInputIteratorTiterator always return the supplied constant + * of type \p ValueType. + * - Can be used with any data type. + * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device + * functions. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p ConstantInputIteratorTto + * dereference a sequence of homogeneous doubles. + * \par + * \code + * #include // or equivalently + * + * cub::ConstantInputIterator itr(5.0); + * + * printf("%f\n", itr[0]); // 5.0 + * printf("%f\n", itr[1]); // 5.0 + * printf("%f\n", itr[2]); // 5.0 + * printf("%f\n", itr[50]); // 5.0 + * + * \endcode + * + * \tparam ValueType The value type of this iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + typename ValueType, + typename OffsetT = ptrdiff_t> +class ConstantInputIterator +{ +public: + + // Required iterator traits + typedef ConstantInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef ValueType value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::any_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + ValueType val; + OffsetT offset; +#ifdef _WIN32 + OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) +#endif + +public: + + /// Constructor + __host__ __device__ __forceinline__ ConstantInputIterator( + ValueType val, ///< Starting value for the iterator instance to report + OffsetT offset = 0) ///< Base offset + : + val(val), + offset(offset) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + offset++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + offset++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + return val; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(val, offset + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + offset += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(val, offset - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + offset -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return offset - other.offset; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance /*n*/) const + { + return val; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &val; + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (offset == rhs.offset) && ((val == rhs.val)); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (offset != rhs.offset) || (val!= rhs.val); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + os << "[" << itr.val << "," << itr.offset << "]"; + return os; + } + +}; + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/counting_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/counting_input_iterator.cuh new file mode 100644 index 0000000..7f49348 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/counting_input_iterator.cuh @@ -0,0 +1,228 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIterator + * @{ + */ + +/** + * \brief A random-access input generator for dereferencing a sequence of incrementing integer values. + * + * \par Overview + * - After initializing a CountingInputIteratorTto a certain integer \p base, read references + * at \p offset will return the value \p base + \p offset. + * - Can be constructed, manipulated, dereferenced, and exchanged within and between host and device + * functions. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p CountingInputIteratorTto + * dereference a sequence of incrementing integers. + * \par + * \code + * #include // or equivalently + * + * cub::CountingInputIterator itr(5); + * + * printf("%d\n", itr[0]); // 5 + * printf("%d\n", itr[1]); // 6 + * printf("%d\n", itr[2]); // 7 + * printf("%d\n", itr[50]); // 55 + * + * \endcode + * + * \tparam ValueType The value type of this iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + typename ValueType, + typename OffsetT = ptrdiff_t> +class CountingInputIterator +{ +public: + + // Required iterator traits + typedef CountingInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef ValueType value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::any_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + ValueType val; + +public: + + /// Constructor + __host__ __device__ __forceinline__ CountingInputIterator( + const ValueType &val) ///< Starting value for the iterator instance to report + : + val(val) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + val++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + val++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + return val; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(val + (ValueType) n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + val += (ValueType) n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(val - (ValueType) n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + val -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return (difference_type) (val - other.val); + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + return val + (ValueType) n; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &val; + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (val == rhs.val); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (val != rhs.val); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + os << "[" << itr.val << "]"; + return os; + } + +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/discard_output_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/discard_output_iterator.cuh new file mode 100644 index 0000000..28473e5 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/discard_output_iterator.cuh @@ -0,0 +1,220 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../util_namespace.cuh" +#include "../util_macro.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A discard iterator + */ +template +class DiscardOutputIterator +{ +public: + + // Required iterator traits + typedef DiscardOutputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef void value_type; ///< The type of the element the iterator can point to + typedef void pointer; ///< The type of a pointer to an element the iterator can point to + typedef void reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::any_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + OffsetT offset; + +#if defined(_WIN32) || !defined(_WIN64) + // Workaround for win32 parameter-passing bug (ulonglong2 argmin DeviceReduce) + OffsetT pad[CUB_MAX(1, (16 / sizeof(OffsetT) - 1))]; +#endif + +public: + + /// Constructor + __host__ __device__ __forceinline__ DiscardOutputIterator( + OffsetT offset = 0) ///< Base offset + : + offset(offset) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + offset++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + offset++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ self_type& operator*() + { + // return self reference, which can be assigned to anything + return *this; + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(offset + n); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + offset += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(offset - n); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + offset -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return offset - other.offset; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ self_type& operator[](Distance n) + { + // return self reference, which can be assigned to anything + return *this; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return; + } + + /// Assignment to self (no-op) + __host__ __device__ __forceinline__ void operator=(self_type const& other) + { + offset = other.offset; + } + + /// Assignment to anything else (no-op) + template + __host__ __device__ __forceinline__ void operator=(T const&) + {} + + /// Cast to void* operator + __host__ __device__ __forceinline__ operator void*() const { return NULL; } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (offset == rhs.offset); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (offset != rhs.offset); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + os << "[" << itr.offset << "]"; + return os; + } + +}; + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_obj_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_obj_input_iterator.cuh new file mode 100644 index 0000000..b99103e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_obj_input_iterator.cuh @@ -0,0 +1,310 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_debug.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIterator + * @{ + */ + + + +/** + * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses newer Kepler-style texture objects. + * + * \par Overview + * - TexObjInputIteratorTwraps a native device pointer of type ValueType*. References + * to elements are to be loaded through texture cache. + * - Can be used to load any data type from memory through texture cache. + * - Can be manipulated and exchanged within and between host and device + * functions, can only be constructed within host functions, and can only be + * dereferenced within device functions. + * - With regard to nested/dynamic parallelism, TexObjInputIteratorTiterators may only be + * created by the host thread, but can be used by any descendant kernel. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p TexRefInputIteratorTto + * dereference a device array of doubles through texture cache. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize a device array + * int num_items; // e.g., 7 + * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] + * + * // Create an iterator wrapper + * cub::TexObjInputIterator itr; + * itr.BindTexture(d_in, sizeof(double) * num_items); + * ... + * + * // Within device code: + * printf("%f\n", itr[0]); // 8.0 + * printf("%f\n", itr[1]); // 6.0 + * printf("%f\n", itr[6]); // 9.0 + * + * ... + * itr.UnbindTexture(); + * + * \endcode + * + * \tparam T The value type of this iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + typename T, + typename OffsetT = ptrdiff_t> +class TexObjInputIterator +{ +public: + + // Required iterator traits + typedef TexObjInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef T value_type; ///< The type of the element the iterator can point to + typedef T* pointer; ///< The type of a pointer to an element the iterator can point to + typedef T reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::device_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + // Largest texture word we can use in device + typedef typename UnitWord::TextureWord TextureWord; + + // Number of texture words per T + enum { + TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) + }; + +private: + + T* ptr; + difference_type tex_offset; + cudaTextureObject_t tex_obj; + +public: + + /// Constructor + __host__ __device__ __forceinline__ TexObjInputIterator() + : + ptr(NULL), + tex_offset(0), + tex_obj(0) + {} + + /// Use this iterator to bind \p ptr with a texture reference + template + cudaError_t BindTexture( + QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment + size_t bytes = size_t(-1), ///< Number of bytes in the range + size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator + { + this->ptr = const_cast::Type *>(ptr); + this->tex_offset = tex_offset; + + cudaChannelFormatDesc channel_desc = cudaCreateChannelDesc(); + cudaResourceDesc res_desc; + cudaTextureDesc tex_desc; + memset(&res_desc, 0, sizeof(cudaResourceDesc)); + memset(&tex_desc, 0, sizeof(cudaTextureDesc)); + res_desc.resType = cudaResourceTypeLinear; + res_desc.res.linear.devPtr = this->ptr; + res_desc.res.linear.desc = channel_desc; + res_desc.res.linear.sizeInBytes = bytes; + tex_desc.readMode = cudaReadModeElementType; + return cudaCreateTextureObject(&tex_obj, &res_desc, &tex_desc, NULL); + } + + /// Unbind this iterator from its texture reference + cudaError_t UnbindTexture() + { + return cudaDestroyTextureObject(tex_obj); + } + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + tex_offset++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + tex_offset++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { +#if (CUB_PTX_ARCH == 0) + // Simply dereference the pointer on the host + return ptr[tex_offset]; +#else + // Move array of uninitialized words, then alias and assign to return value + TextureWord words[TEXTURE_MULTIPLE]; + + #pragma unroll + for (int i = 0; i < TEXTURE_MULTIPLE; ++i) + { + words[i] = tex1Dfetch( + tex_obj, + (tex_offset * TEXTURE_MULTIPLE) + i); + } + + // Load from words + return *reinterpret_cast(words); +#endif + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval; + retval.ptr = ptr; + retval.tex_obj = tex_obj; + retval.tex_offset = tex_offset + n; + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + tex_offset += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval; + retval.ptr = ptr; + retval.tex_obj = tex_obj; + retval.tex_offset = tex_offset - n; + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + tex_offset -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return tex_offset - other.tex_offset; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + self_type offset = (*this) + n; + return *offset; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &(*(*this)); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset) && (tex_obj == rhs.tex_obj)); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset) || (tex_obj != rhs.tex_obj)); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + return os; + } + +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_ref_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_ref_input_iterator.cuh new file mode 100644 index 0000000..95d0ffb --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/tex_ref_input_iterator.cuh @@ -0,0 +1,374 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_debug.cuh" +#include "../util_namespace.cuh" + +#if (CUDA_VERSION >= 5050) || defined(DOXYGEN_ACTIVE) // This iterator is compatible with CUDA 5.5 and newer + +#if (THRUST_VERSION >= 100700) // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/****************************************************************************** + * Static file-scope Tesla/Fermi-style texture references + *****************************************************************************/ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +// Anonymous namespace +namespace { + +/// Global texture reference specialized by type +template +struct IteratorTexRef +{ + /// And by unique ID + template + struct TexId + { + // Largest texture word we can use in device + typedef typename UnitWord::DeviceWord DeviceWord; + typedef typename UnitWord::TextureWord TextureWord; + + // Number of texture words per T + enum { + DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord), + TEXTURE_MULTIPLE = sizeof(T) / sizeof(TextureWord) + }; + + // Texture reference type + typedef texture TexRef; + + // Texture reference + static TexRef ref; + + /// Bind texture + static cudaError_t BindTexture(void *d_in, size_t &offset) + { + if (d_in) + { + cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); + ref.channelDesc = tex_desc; + return (CubDebug(cudaBindTexture(&offset, ref, d_in))); + } + + return cudaSuccess; + } + + /// Unbind texture + static cudaError_t UnbindTexture() + { + return CubDebug(cudaUnbindTexture(ref)); + } + + /// Fetch element + template + static __device__ __forceinline__ T Fetch(Distance tex_offset) + { + DeviceWord temp[DEVICE_MULTIPLE]; + TextureWord *words = reinterpret_cast(temp); + + #pragma unroll + for (int i = 0; i < TEXTURE_MULTIPLE; ++i) + { + words[i] = tex1Dfetch(ref, (tex_offset * TEXTURE_MULTIPLE) + i); + } + + return reinterpret_cast(temp); + } + }; +}; + +// Texture reference definitions +template +template +typename IteratorTexRef::template TexId::TexRef IteratorTexRef::template TexId::ref = 0; + + +} // Anonymous namespace + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + +/** + * \addtogroup UtilIterator + * @{ + */ + + + +/** + * \brief A random-access input wrapper for dereferencing array values through texture cache. Uses older Tesla/Fermi-style texture references. + * + * \par Overview + * - TexRefInputIteratorTwraps a native device pointer of type ValueType*. References + * to elements are to be loaded through texture cache. + * - Can be used to load any data type from memory through texture cache. + * - Can be manipulated and exchanged within and between host and device + * functions, can only be constructed within host functions, and can only be + * dereferenced within device functions. + * - The \p UNIQUE_ID template parameter is used to statically name the underlying texture + * reference. Only one TexRefInputIteratorTinstance can be bound at any given time for a + * specific combination of (1) data type \p T, (2) \p UNIQUE_ID, (3) host + * thread, and (4) compilation .o unit. + * - With regard to nested/dynamic parallelism, TexRefInputIteratorTiterators may only be + * created by the host thread and used by a top-level kernel (i.e. the one which is launched + * from the host). + * - Compatible with Thrust API v1.7 or newer. + * - Compatible with CUDA toolkit v5.5 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p TexRefInputIteratorTto + * dereference a device array of doubles through texture cache. + * \par + * \code + * #include // or equivalently + * + * // Declare, allocate, and initialize a device array + * int num_items; // e.g., 7 + * double *d_in; // e.g., [8.0, 6.0, 7.0, 5.0, 3.0, 0.0, 9.0] + * + * // Create an iterator wrapper + * cub::TexRefInputIterator itr; + * itr.BindTexture(d_in, sizeof(double) * num_items); + * ... + * + * // Within device code: + * printf("%f\n", itr[0]); // 8.0 + * printf("%f\n", itr[1]); // 6.0 + * printf("%f\n", itr[6]); // 9.0 + * + * ... + * itr.UnbindTexture(); + * + * \endcode + * + * \tparam T The value type of this iterator + * \tparam UNIQUE_ID A globally-unique identifier (within the compilation unit) to name the underlying texture reference + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + */ +template < + typename T, + int UNIQUE_ID, + typename OffsetT = ptrdiff_t> +class TexRefInputIterator +{ +public: + + // Required iterator traits + typedef TexRefInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef T value_type; ///< The type of the element the iterator can point to + typedef T* pointer; ///< The type of a pointer to an element the iterator can point to + typedef T reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::device_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + T* ptr; + difference_type tex_offset; + + // Texture reference wrapper (old Tesla/Fermi-style textures) + typedef typename IteratorTexRef::template TexId TexId; + +public: +/* + /// Constructor + __host__ __device__ __forceinline__ TexRefInputIterator() + : + ptr(NULL), + tex_offset(0) + {} +*/ + /// Use this iterator to bind \p ptr with a texture reference + template + cudaError_t BindTexture( + QualifiedT *ptr, ///< Native pointer to wrap that is aligned to cudaDeviceProp::textureAlignment + size_t bytes = size_t(-1), ///< Number of bytes in the range + size_t tex_offset = 0) ///< OffsetT (in items) from \p ptr denoting the position of the iterator + { + this->ptr = const_cast::Type *>(ptr); + size_t offset; + cudaError_t retval = TexId::BindTexture(this->ptr + tex_offset, offset); + this->tex_offset = (difference_type) (offset / sizeof(QualifiedT)); + return retval; + } + + /// Unbind this iterator from its texture reference + cudaError_t UnbindTexture() + { + return TexId::UnbindTexture(); + } + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + tex_offset++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + tex_offset++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { +#if (CUB_PTX_ARCH == 0) + // Simply dereference the pointer on the host + return ptr[tex_offset]; +#else + // Use the texture reference + return TexId::Fetch(tex_offset); +#endif + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval; + retval.ptr = ptr; + retval.tex_offset = tex_offset + n; + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + tex_offset += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval; + retval.ptr = ptr; + retval.tex_offset = tex_offset - n; + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + tex_offset -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return tex_offset - other.tex_offset; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + self_type offset = (*this) + n; + return *offset; + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &(*(*this)); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return ((ptr == rhs.ptr) && (tex_offset == rhs.tex_offset)); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return ((ptr != rhs.ptr) || (tex_offset != rhs.tex_offset)); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + return os; + } + +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) + +#endif // CUDA_VERSION diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/transform_input_iterator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/transform_input_iterator.cuh new file mode 100644 index 0000000..dad1f50 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/iterator/transform_input_iterator.cuh @@ -0,0 +1,252 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Random-access iterator types + */ + +#pragma once + +#include +#include + +#include "../thread/thread_load.cuh" +#include "../thread/thread_store.cuh" +#include "../util_device.cuh" +#include "../util_namespace.cuh" + +#if (THRUST_VERSION >= 100700) + // This iterator is compatible with Thrust API 1.7 and newer + #include + #include +#endif // THRUST_VERSION + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIterator + * @{ + */ + + +/** + * \brief A random-access input wrapper for transforming dereferenced values. + * + * \par Overview + * - TransformInputIteratorTwraps a unary conversion functor of type \p + * ConversionOp and a random-access input iterator of type InputIteratorT, + * using the former to produce references of type \p ValueType from the latter. + * - Can be used with any data type. + * - Can be constructed, manipulated, and exchanged within and between host and device + * functions. Wrapped host memory can only be dereferenced on the host, and wrapped + * device memory can only be dereferenced on the device. + * - Compatible with Thrust API v1.7 or newer. + * + * \par Snippet + * The code snippet below illustrates the use of \p TransformInputIteratorTto + * dereference an array of integers, tripling the values and converting them to doubles. + * \par + * \code + * #include // or equivalently + * + * // Functor for tripling integer values and converting to doubles + * struct TripleDoubler + * { + * __host__ __device__ __forceinline__ + * double operator()(const int &a) const { + * return double(a * 3); + * } + * }; + * + * // Declare, allocate, and initialize a device array + * int *d_in; // e.g., [8, 6, 7, 5, 3, 0, 9] + * TripleDoubler conversion_op; + * + * // Create an iterator wrapper + * cub::TransformInputIterator itr(d_in, conversion_op); + * + * // Within device code: + * printf("%f\n", itr[0]); // 24.0 + * printf("%f\n", itr[1]); // 18.0 + * printf("%f\n", itr[6]); // 27.0 + * + * \endcode + * + * \tparam ValueType The value type of this iterator + * \tparam ConversionOp Unary functor type for mapping objects of type \p InputType to type \p ValueType. Must have member ValueType operator()(const InputType &datum). + * \tparam InputIteratorT The type of the wrapped input iterator + * \tparam OffsetT The difference type of this iterator (Default: \p ptrdiff_t) + * + */ +template < + typename ValueType, + typename ConversionOp, + typename InputIteratorT, + typename OffsetT = ptrdiff_t> +class TransformInputIterator +{ +public: + + // Required iterator traits + typedef TransformInputIterator self_type; ///< My own type + typedef OffsetT difference_type; ///< Type to express the result of subtracting one iterator from another + typedef ValueType value_type; ///< The type of the element the iterator can point to + typedef ValueType* pointer; ///< The type of a pointer to an element the iterator can point to + typedef ValueType reference; ///< The type of a reference to an element the iterator can point to + +#if (THRUST_VERSION >= 100700) + // Use Thrust's iterator categories so we can use these iterators in Thrust 1.7 (or newer) methods + typedef typename thrust::detail::iterator_facade_category< + thrust::any_system_tag, + thrust::random_access_traversal_tag, + value_type, + reference + >::type iterator_category; ///< The iterator category +#else + typedef std::random_access_iterator_tag iterator_category; ///< The iterator category +#endif // THRUST_VERSION + +private: + + ConversionOp conversion_op; + InputIteratorT input_itr; + +public: + + /// Constructor + __host__ __device__ __forceinline__ TransformInputIterator( + InputIteratorT input_itr, ///< Input iterator to wrap + ConversionOp conversion_op) ///< Conversion functor to wrap + : + conversion_op(conversion_op), + input_itr(input_itr) + {} + + /// Postfix increment + __host__ __device__ __forceinline__ self_type operator++(int) + { + self_type retval = *this; + input_itr++; + return retval; + } + + /// Prefix increment + __host__ __device__ __forceinline__ self_type operator++() + { + input_itr++; + return *this; + } + + /// Indirection + __host__ __device__ __forceinline__ reference operator*() const + { + return conversion_op(*input_itr); + } + + /// Addition + template + __host__ __device__ __forceinline__ self_type operator+(Distance n) const + { + self_type retval(input_itr + n, conversion_op); + return retval; + } + + /// Addition assignment + template + __host__ __device__ __forceinline__ self_type& operator+=(Distance n) + { + input_itr += n; + return *this; + } + + /// Subtraction + template + __host__ __device__ __forceinline__ self_type operator-(Distance n) const + { + self_type retval(input_itr - n, conversion_op); + return retval; + } + + /// Subtraction assignment + template + __host__ __device__ __forceinline__ self_type& operator-=(Distance n) + { + input_itr -= n; + return *this; + } + + /// Distance + __host__ __device__ __forceinline__ difference_type operator-(self_type other) const + { + return input_itr - other.input_itr; + } + + /// Array subscript + template + __host__ __device__ __forceinline__ reference operator[](Distance n) const + { + return conversion_op(input_itr[n]); + } + + /// Structure dereference + __host__ __device__ __forceinline__ pointer operator->() + { + return &conversion_op(*input_itr); + } + + /// Equal to + __host__ __device__ __forceinline__ bool operator==(const self_type& rhs) + { + return (input_itr == rhs.input_itr); + } + + /// Not equal to + __host__ __device__ __forceinline__ bool operator!=(const self_type& rhs) + { + return (input_itr != rhs.input_itr); + } + + /// ostream operator + friend std::ostream& operator<<(std::ostream& os, const self_type& itr) + { + return os; + } +}; + + + +/** @} */ // end group UtilIterator + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_load.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_load.cuh new file mode 100644 index 0000000..b1ca412 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_load.cuh @@ -0,0 +1,438 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Thread utilities for reading memory using PTX cache modifiers. + */ + +#pragma once + +#include + +#include + +#include "../util_ptx.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIo + * @{ + */ + +//----------------------------------------------------------------------------- +// Tags and constants +//----------------------------------------------------------------------------- + +/** + * \brief Enumeration of cache modifiers for memory load operations. + */ +enum CacheLoadModifier +{ + LOAD_DEFAULT, ///< Default (no modifier) + LOAD_CA, ///< Cache at all levels + LOAD_CG, ///< Cache at global level + LOAD_CS, ///< Cache streaming (likely to be accessed once) + LOAD_CV, ///< Cache as volatile (including cached system lines) + LOAD_LDG, ///< Cache as texture + LOAD_VOLATILE, ///< Volatile (any memory space) +}; + + +/** + * \name Thread I/O (cache modified) + * @{ + */ + +/** + * \brief Thread utility for reading memory using cub::CacheLoadModifier cache modifiers. Can be used to load any data type. + * + * \par Example + * \code + * #include // or equivalently + * + * // 32-bit load using cache-global modifier: + * int *d_in; + * int val = cub::ThreadLoad(d_in + threadIdx.x); + * + * // 16-bit load using default modifier + * short *d_in; + * short val = cub::ThreadLoad(d_in + threadIdx.x); + * + * // 256-bit load using cache-volatile modifier + * double4 *d_in; + * double4 val = cub::ThreadLoad(d_in + threadIdx.x); + * + * // 96-bit load using cache-streaming modifier + * struct TestFoo { bool a; short b; }; + * TestFoo *d_struct; + * TestFoo val = cub::ThreadLoad(d_in + threadIdx.x); + * \endcode + * + * \tparam MODIFIER [inferred] CacheLoadModifier enumeration + * \tparam InputIteratorT [inferred] Input iterator type \iterator + */ +template < + CacheLoadModifier MODIFIER, + typename InputIteratorT> +__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr); + + +//@} end member group + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/// Helper structure for templated load iteration (inductive case) +template +struct IterateThreadLoad +{ + template + static __device__ __forceinline__ void Load(T const *ptr, T *vals) + { + vals[COUNT] = ThreadLoad(ptr + COUNT); + IterateThreadLoad::template Load(ptr, vals); + } + + template + static __device__ __forceinline__ void Dereference(InputIteratorT itr, T *vals) + { + vals[COUNT] = itr[COUNT]; + IterateThreadLoad::Dereference(itr, vals); + } +}; + + +/// Helper structure for templated load iteration (termination case) +template +struct IterateThreadLoad +{ + template + static __device__ __forceinline__ void Load(T const * /*ptr*/, T * /*vals*/) {} + + template + static __device__ __forceinline__ void Dereference(InputIteratorT /*itr*/, T * /*vals*/) {} +}; + + +/** + * Define a uint4 (16B) ThreadLoad specialization for the given Cache load modifier + */ +#define _CUB_LOAD_16(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ uint4 ThreadLoad(uint4 const *ptr) \ + { \ + uint4 retval; \ + asm volatile ("ld."#ptx_modifier".v4.u32 {%0, %1, %2, %3}, [%4];" : \ + "=r"(retval.x), \ + "=r"(retval.y), \ + "=r"(retval.z), \ + "=r"(retval.w) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } \ + template<> \ + __device__ __forceinline__ ulonglong2 ThreadLoad(ulonglong2 const *ptr) \ + { \ + ulonglong2 retval; \ + asm volatile ("ld."#ptx_modifier".v2.u64 {%0, %1}, [%2];" : \ + "=l"(retval.x), \ + "=l"(retval.y) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } + +/** + * Define a uint2 (8B) ThreadLoad specialization for the given Cache load modifier + */ +#define _CUB_LOAD_8(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ ushort4 ThreadLoad(ushort4 const *ptr) \ + { \ + ushort4 retval; \ + asm volatile ("ld."#ptx_modifier".v4.u16 {%0, %1, %2, %3}, [%4];" : \ + "=h"(retval.x), \ + "=h"(retval.y), \ + "=h"(retval.z), \ + "=h"(retval.w) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } \ + template<> \ + __device__ __forceinline__ uint2 ThreadLoad(uint2 const *ptr) \ + { \ + uint2 retval; \ + asm volatile ("ld."#ptx_modifier".v2.u32 {%0, %1}, [%2];" : \ + "=r"(retval.x), \ + "=r"(retval.y) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } \ + template<> \ + __device__ __forceinline__ unsigned long long ThreadLoad(unsigned long long const *ptr) \ + { \ + unsigned long long retval; \ + asm volatile ("ld."#ptx_modifier".u64 %0, [%1];" : \ + "=l"(retval) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } + +/** + * Define a uint (4B) ThreadLoad specialization for the given Cache load modifier + */ +#define _CUB_LOAD_4(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ unsigned int ThreadLoad(unsigned int const *ptr) \ + { \ + unsigned int retval; \ + asm volatile ("ld."#ptx_modifier".u32 %0, [%1];" : \ + "=r"(retval) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } + + +/** + * Define a unsigned short (2B) ThreadLoad specialization for the given Cache load modifier + */ +#define _CUB_LOAD_2(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ unsigned short ThreadLoad(unsigned short const *ptr) \ + { \ + unsigned short retval; \ + asm volatile ("ld."#ptx_modifier".u16 %0, [%1];" : \ + "=h"(retval) : \ + _CUB_ASM_PTR_(ptr)); \ + return retval; \ + } + + +/** + * Define an unsigned char (1B) ThreadLoad specialization for the given Cache load modifier + */ +#define _CUB_LOAD_1(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ unsigned char ThreadLoad(unsigned char const *ptr) \ + { \ + unsigned short retval; \ + asm volatile ( \ + "{" \ + " .reg .u8 datum;" \ + " ld."#ptx_modifier".u8 datum, [%1];" \ + " cvt.u16.u8 %0, datum;" \ + "}" : \ + "=h"(retval) : \ + _CUB_ASM_PTR_(ptr)); \ + return (unsigned char) retval; \ + } + + +/** + * Define powers-of-two ThreadLoad specializations for the given Cache load modifier + */ +#define _CUB_LOAD_ALL(cub_modifier, ptx_modifier) \ + _CUB_LOAD_16(cub_modifier, ptx_modifier) \ + _CUB_LOAD_8(cub_modifier, ptx_modifier) \ + _CUB_LOAD_4(cub_modifier, ptx_modifier) \ + _CUB_LOAD_2(cub_modifier, ptx_modifier) \ + _CUB_LOAD_1(cub_modifier, ptx_modifier) \ + + +/** + * Define powers-of-two ThreadLoad specializations for the various Cache load modifiers + */ +#if CUB_PTX_ARCH >= 200 + _CUB_LOAD_ALL(LOAD_CA, ca) + _CUB_LOAD_ALL(LOAD_CG, cg) + _CUB_LOAD_ALL(LOAD_CS, cs) + _CUB_LOAD_ALL(LOAD_CV, cv) +#else + _CUB_LOAD_ALL(LOAD_CA, global) + // Use volatile to ensure coherent reads when this PTX is JIT'd to run on newer architectures with L1 + _CUB_LOAD_ALL(LOAD_CG, volatile.global) + _CUB_LOAD_ALL(LOAD_CS, global) + _CUB_LOAD_ALL(LOAD_CV, volatile.global) +#endif + +#if CUB_PTX_ARCH >= 350 + _CUB_LOAD_ALL(LOAD_LDG, global.nc) +#else + _CUB_LOAD_ALL(LOAD_LDG, global) +#endif + + +// Macro cleanup +#undef _CUB_LOAD_ALL +#undef _CUB_LOAD_1 +#undef _CUB_LOAD_2 +#undef _CUB_LOAD_4 +#undef _CUB_LOAD_8 +#undef _CUB_LOAD_16 + + + +/** + * ThreadLoad definition for LOAD_DEFAULT modifier on iterator types + */ +template +__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad( + InputIteratorT itr, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + return *itr; +} + + +/** + * ThreadLoad definition for LOAD_DEFAULT modifier on pointer types + */ +template +__device__ __forceinline__ T ThreadLoad( + T *ptr, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + return *ptr; +} + + +/** + * ThreadLoad definition for LOAD_VOLATILE modifier on primitive pointer types + */ +template +__device__ __forceinline__ T ThreadLoadVolatilePointer( + T *ptr, + Int2Type /*is_primitive*/) +{ + T retval = *reinterpret_cast(ptr); + return retval; +} + + +/** + * ThreadLoad definition for LOAD_VOLATILE modifier on non-primitive pointer types + */ +template +__device__ __forceinline__ T ThreadLoadVolatilePointer( + T *ptr, + Int2Type /*is_primitive*/) +{ + typedef typename UnitWord::VolatileWord VolatileWord; // Word type for memcopying + + const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); +/* + VolatileWord words[VOLATILE_MULTIPLE]; + + IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( + reinterpret_cast(ptr), + words); + + return *reinterpret_cast(words); +*/ + + T retval; + VolatileWord *words = reinterpret_cast(&retval); + IterateThreadLoad<0, VOLATILE_MULTIPLE>::Dereference( + reinterpret_cast(ptr), + words); + return retval; +} + + +/** + * ThreadLoad definition for LOAD_VOLATILE modifier on pointer types + */ +template +__device__ __forceinline__ T ThreadLoad( + T *ptr, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + // Apply tags for partial-specialization + return ThreadLoadVolatilePointer(ptr, Int2Type::PRIMITIVE>()); +} + + +/** + * ThreadLoad definition for generic modifiers on pointer types + */ +template +__device__ __forceinline__ T ThreadLoad( + T const *ptr, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + typedef typename UnitWord::DeviceWord DeviceWord; + + const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); + + DeviceWord words[DEVICE_MULTIPLE]; + + IterateThreadLoad<0, DEVICE_MULTIPLE>::template Load( + reinterpret_cast(const_cast(ptr)), + words); + + return *reinterpret_cast(words); +} + + +/** + * ThreadLoad definition for generic modifiers + */ +template < + CacheLoadModifier MODIFIER, + typename InputIteratorT> +__device__ __forceinline__ typename std::iterator_traits::value_type ThreadLoad(InputIteratorT itr) +{ + // Apply tags for partial-specialization + return ThreadLoad( + itr, + Int2Type(), + Int2Type::VALUE>()); +} + + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** @} */ // end group UtilIo + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_operators.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_operators.cuh new file mode 100644 index 0000000..76cd800 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_operators.cuh @@ -0,0 +1,317 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Simple binary operator functor types + */ + +/****************************************************************************** + * Simple functor operators + ******************************************************************************/ + +#pragma once + +#include "../util_macro.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilModule + * @{ + */ + +/** + * \brief Default equality functor + */ +struct Equality +{ + /// Boolean equality operator, returns (a == b) + template + __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const + { + return a == b; + } +}; + + +/** + * \brief Default inequality functor + */ +struct Inequality +{ + /// Boolean inequality operator, returns (a != b) + template + __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) const + { + return a != b; + } +}; + + +/** + * \brief Inequality functor (wraps equality functor) + */ +template +struct InequalityWrapper +{ + /// Wrapped equality operator + EqualityOp op; + + /// Constructor + __host__ __device__ __forceinline__ + InequalityWrapper(EqualityOp op) : op(op) {} + + /// Boolean inequality operator, returns (a != b) + template + __host__ __device__ __forceinline__ bool operator()(const T &a, const T &b) + { + return !op(a, b); + } +}; + + +/** + * \brief Default sum functor + */ +struct Sum +{ + /// Boolean sum operator, returns a + b + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return a + b; + } +}; + + +/** + * \brief Default max functor + */ +struct Max +{ + /// Boolean max operator, returns (a > b) ? a : b + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return CUB_MAX(a, b); + } +}; + + +/** + * \brief Arg max functor (keeps the value and offset of the first occurrence of the larger item) + */ +struct ArgMax +{ + /// Boolean max operator, preferring the item having the smaller offset in case of ties + template + __host__ __device__ __forceinline__ KeyValuePair operator()( + const KeyValuePair &a, + const KeyValuePair &b) const + { +// Mooch BUG (device reduce argmax gk110 3.2 million random fp32) +// return ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; + + if ((b.value > a.value) || ((a.value == b.value) && (b.key < a.key))) + return b; + return a; + } +}; + + +/** + * \brief Default min functor + */ +struct Min +{ + /// Boolean min operator, returns (a < b) ? a : b + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return CUB_MIN(a, b); + } +}; + + +/** + * \brief Arg min functor (keeps the value and offset of the first occurrence of the smallest item) + */ +struct ArgMin +{ + /// Boolean min operator, preferring the item having the smaller offset in case of ties + template + __host__ __device__ __forceinline__ KeyValuePair operator()( + const KeyValuePair &a, + const KeyValuePair &b) const + { +// Mooch BUG (device reduce argmax gk110 3.2 million random fp32) +// return ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) ? b : a; + + if ((b.value < a.value) || ((a.value == b.value) && (b.key < a.key))) + return b; + return a; + } +}; + + +/** + * \brief Default cast functor + */ +template +struct CastOp +{ + /// Cast operator, returns (B) a + template + __host__ __device__ __forceinline__ B operator()(const A &a) const + { + return (B) a; + } +}; + + +/** + * \brief Binary operator wrapper for switching non-commutative scan arguments + */ +template +class SwizzleScanOp +{ +private: + + /// Wrapped scan operator + ScanOp scan_op; + +public: + + /// Constructor + __host__ __device__ __forceinline__ + SwizzleScanOp(ScanOp scan_op) : scan_op(scan_op) {} + + /// Switch the scan arguments + template + __host__ __device__ __forceinline__ + T operator()(const T &a, const T &b) + { + T _a(a); + T _b(b); + + return scan_op(_b, _a); + } +}; + + +/** + * \brief Reduce-by-segment functor. + * + * Given two cub::KeyValuePair inputs \p a and \p b and a + * binary associative combining operator \p f(const T &x, const T &y), + * an instance of this functor returns a cub::KeyValuePair whose \p key + * field is a.key + b.key, and whose \p value field + * is either b.value if b.key is non-zero, or f(a.value, b.value) otherwise. + * + * ReduceBySegmentOp is an associative, non-commutative binary combining operator + * for input sequences of cub::KeyValuePair pairings. Such + * sequences are typically used to represent a segmented set of values to be reduced + * and a corresponding set of {0,1}-valued integer "head flags" demarcating the + * first value of each segment. + * + */ +template ///< Binary reduction operator to apply to values +struct ReduceBySegmentOp +{ + /// Wrapped reduction operator + ReductionOpT op; + + /// Constructor + __host__ __device__ __forceinline__ ReduceBySegmentOp() {} + + /// Constructor + __host__ __device__ __forceinline__ ReduceBySegmentOp(ReductionOpT op) : op(op) {} + + /// Scan operator + template ///< KeyValuePair pairing of T (value) and OffsetT (head flag) + __host__ __device__ __forceinline__ KeyValuePairT operator()( + const KeyValuePairT &first, ///< First partial reduction + const KeyValuePairT &second) ///< Second partial reduction + { + KeyValuePairT retval; + retval.key = first.key + second.key; + retval.value = (second.key) ? + second.value : // The second partial reduction spans a segment reset, so it's value aggregate becomes the running aggregate + op(first.value, second.value); // The second partial reduction does not span a reset, so accumulate both into the running aggregate + return retval; + } +}; + + + +template ///< Binary reduction operator to apply to values +struct ReduceByKeyOp +{ + /// Wrapped reduction operator + ReductionOpT op; + + /// Constructor + __host__ __device__ __forceinline__ ReduceByKeyOp() {} + + /// Constructor + __host__ __device__ __forceinline__ ReduceByKeyOp(ReductionOpT op) : op(op) {} + + /// Scan operator + template + __host__ __device__ __forceinline__ KeyValuePairT operator()( + const KeyValuePairT &first, ///< First partial reduction + const KeyValuePairT &second) ///< Second partial reduction + { + KeyValuePairT retval = second; + + if (first.key == second.key) + retval.value = op(first.value, retval.value); + + return retval; + } +}; + + + + + + + +/** @} */ // end group UtilModule + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_reduce.cuh new file mode 100644 index 0000000..4c13688 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_reduce.cuh @@ -0,0 +1,152 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Thread utilities for sequential reduction over statically-sized array types + */ + +#pragma once + +#include "../thread/thread_operators.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) +namespace internal { + +/** + * Sequential reduction over statically-sized array types + */ +template < + int LENGTH, + typename T, + typename ReductionOp> +__device__ __forceinline__ T ThreadReduce( + T* input, ///< [in] Input array + ReductionOp reduction_op, ///< [in] Binary reduction operator + T prefix, ///< [in] Prefix to seed reduction with + Int2Type /*length*/) +{ + T retval = prefix; + + #pragma unroll + for (int i = 0; i < LENGTH; ++i) + retval = reduction_op(retval, input[i]); + + return retval; +} + + +/** + * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH LengthT of input array + * \tparam T [inferred] The data type to be reduced. + * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ReductionOp> +__device__ __forceinline__ T ThreadReduce( + T* input, ///< [in] Input array + ReductionOp reduction_op, ///< [in] Binary reduction operator + T prefix) ///< [in] Prefix to seed reduction with +{ + return ThreadReduce(input, reduction_op, prefix, Int2Type()); +} + + +/** + * \brief Perform a sequential reduction over \p LENGTH elements of the \p input array. The aggregate is returned. + * + * \tparam LENGTH LengthT of input array + * \tparam T [inferred] The data type to be reduced. + * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ReductionOp> +__device__ __forceinline__ T ThreadReduce( + T* input, ///< [in] Input array + ReductionOp reduction_op) ///< [in] Binary reduction operator +{ + T prefix = input[0]; + return ThreadReduce(input + 1, reduction_op, prefix); +} + + +/** + * \brief Perform a sequential reduction over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH [inferred] LengthT of \p input array + * \tparam T [inferred] The data type to be reduced. + * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ReductionOp> +__device__ __forceinline__ T ThreadReduce( + T (&input)[LENGTH], ///< [in] Input array + ReductionOp reduction_op, ///< [in] Binary reduction operator + T prefix) ///< [in] Prefix to seed reduction with +{ + return ThreadReduce(input, reduction_op, prefix, Int2Type()); +} + + +/** + * \brief Serial reduction with the specified operator + * + * \tparam LENGTH [inferred] LengthT of \p input array + * \tparam T [inferred] The data type to be reduced. + * \tparam ScanOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ReductionOp> +__device__ __forceinline__ T ThreadReduce( + T (&input)[LENGTH], ///< [in] Input array + ReductionOp reduction_op) ///< [in] Binary reduction operator +{ + return ThreadReduce((T*) input, reduction_op); +} + + +} // internal namespace +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_scan.cuh new file mode 100644 index 0000000..8d67549 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_scan.cuh @@ -0,0 +1,268 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Thread utilities for sequential prefix scan over statically-sized array types + */ + +#pragma once + +#include "../thread/thread_operators.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/// Internal namespace (to prevent ADL mishaps between static functions when mixing different CUB installations) +namespace internal { + + +/** + * \addtogroup UtilModule + * @{ + */ + +/** + * \name Sequential prefix scan over statically-sized array types + * @{ + */ + +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanExclusive( + T inclusive, + T exclusive, + T *input, ///< [in] Input array + T *output, ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + Int2Type /*length*/) +{ + #pragma unroll + for (int i = 0; i < LENGTH; ++i) + { + inclusive = scan_op(exclusive, input[i]); + output[i] = exclusive; + exclusive = inclusive; + } + + return inclusive; +} + + + +/** + * \brief Perform a sequential exclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanExclusive( + T *input, ///< [in] Input array + T *output, ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T prefix, ///< [in] Prefix to seed scan with + bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. If not, the first output element is undefined. (Handy for preventing thread-0 from applying a prefix.) +{ + T inclusive = input[0]; + if (apply_prefix) + { + inclusive = scan_op(prefix, inclusive); + } + output[0] = prefix; + T exclusive = inclusive; + + return ThreadScanExclusive(inclusive, exclusive, input + 1, output + 1, scan_op, Int2Type()); +} + + +/** + * \brief Perform a sequential exclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanExclusive( + T (&input)[LENGTH], ///< [in] Input array + T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T prefix, ///< [in] Prefix to seed scan with + bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) +{ + return ThreadScanExclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); +} + + + + + + + + + +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanInclusive( + T inclusive, + T *input, ///< [in] Input array + T *output, ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + Int2Type /*length*/) +{ + #pragma unroll + for (int i = 0; i < LENGTH; ++i) + { + inclusive = scan_op(inclusive, input[i]); + output[i] = inclusive; + } + + return inclusive; +} + + +/** + * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array. The aggregate is returned. + * + * \tparam LENGTH LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanInclusive( + T *input, ///< [in] Input array + T *output, ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator +{ + T inclusive = input[0]; + output[0] = inclusive; + + // Continue scan + return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); +} + + +/** + * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array. The aggregate is returned. + * + * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanInclusive( + T (&input)[LENGTH], ///< [in] Input array + T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op) ///< [in] Binary scan operator +{ + return ThreadScanInclusive((T*) input, (T*) output, scan_op); +} + + +/** + * \brief Perform a sequential inclusive prefix scan over \p LENGTH elements of the \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanInclusive( + T *input, ///< [in] Input array + T *output, ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T prefix, ///< [in] Prefix to seed scan with + bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) +{ + T inclusive = input[0]; + if (apply_prefix) + { + inclusive = scan_op(prefix, inclusive); + } + output[0] = inclusive; + + // Continue scan + return ThreadScanInclusive(inclusive, input + 1, output + 1, scan_op, Int2Type()); +} + + +/** + * \brief Perform a sequential inclusive prefix scan over the statically-sized \p input array, seeded with the specified \p prefix. The aggregate is returned. + * + * \tparam LENGTH [inferred] LengthT of \p input and \p output arrays + * \tparam T [inferred] The data type to be scanned. + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ +template < + int LENGTH, + typename T, + typename ScanOp> +__device__ __forceinline__ T ThreadScanInclusive( + T (&input)[LENGTH], ///< [in] Input array + T (&output)[LENGTH], ///< [out] Output array (may be aliased to \p input) + ScanOp scan_op, ///< [in] Binary scan operator + T prefix, ///< [in] Prefix to seed scan with + bool apply_prefix = true) ///< [in] Whether or not the calling thread should apply its prefix. (Handy for preventing thread-0 from applying a prefix.) +{ + return ThreadScanInclusive((T*) input, (T*) output, scan_op, prefix, apply_prefix); +} + + +//@} end member group + +/** @} */ // end group UtilModule + + +} // internal namespace +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_search.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_search.cuh new file mode 100644 index 0000000..3099080 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_search.cuh @@ -0,0 +1,154 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Thread utilities for sequential search + */ + +#pragma once + +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * Computes the begin offsets into A and B for the specific diagonal + */ +template < + typename AIteratorT, + typename BIteratorT, + typename OffsetT, + typename CoordinateT> +__host__ __device__ __forceinline__ void MergePathSearch( + OffsetT diagonal, + AIteratorT a, + BIteratorT b, + OffsetT a_len, + OffsetT b_len, + CoordinateT& path_coordinate) +{ + /// The value type of the input iterator + typedef typename std::iterator_traits::value_type T; + + OffsetT split_min = CUB_MAX(diagonal - b_len, 0); + OffsetT split_max = CUB_MIN(diagonal, a_len); + + while (split_min < split_max) + { + OffsetT split_pivot = (split_min + split_max) >> 1; + if (a[split_pivot] <= b[diagonal - split_pivot - 1]) + { + // Move candidate split range up A, down B + split_min = split_pivot + 1; + } + else + { + // Move candidate split range up B, down A + split_max = split_pivot; + } + } + + path_coordinate.x = CUB_MIN(split_min, a_len); + path_coordinate.y = diagonal - split_min; +} + + + +/** + * \brief Returns the offset of the first value within \p input which does not compare less than \p val + */ +template < + typename InputIteratorT, + typename OffsetT, + typename T> +__device__ __forceinline__ OffsetT LowerBound( + InputIteratorT input, ///< [in] Input sequence + OffsetT num_items, ///< [in] Input sequence length + T val) ///< [in] Search key +{ + OffsetT retval = 0; + while (num_items > 0) + { + OffsetT half = num_items >> 1; + if (input[retval + half] < val) + { + retval = retval + (half + 1); + num_items = num_items - (half + 1); + } + else + { + num_items = half; + } + } + + return retval; +} + + +/** + * \brief Returns the offset of the first value within \p input which compares greater than \p val + */ +template < + typename InputIteratorT, + typename OffsetT, + typename T> +__device__ __forceinline__ OffsetT UpperBound( + InputIteratorT input, ///< [in] Input sequence + OffsetT num_items, ///< [in] Input sequence length + T val) ///< [in] Search key +{ + OffsetT retval = 0; + while (num_items > 0) + { + OffsetT half = num_items >> 1; + if (val < input[retval + half]) + { + num_items = half; + } + else + { + retval = retval + (half + 1); + num_items = num_items - (half + 1); + } + } + + return retval; +} + + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_store.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_store.cuh new file mode 100644 index 0000000..ec20b36 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/thread/thread_store.cuh @@ -0,0 +1,422 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Thread utilities for writing memory using PTX cache modifiers. + */ + +#pragma once + +#include + +#include "../util_ptx.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup UtilIo + * @{ + */ + + +//----------------------------------------------------------------------------- +// Tags and constants +//----------------------------------------------------------------------------- + +/** + * \brief Enumeration of cache modifiers for memory store operations. + */ +enum CacheStoreModifier +{ + STORE_DEFAULT, ///< Default (no modifier) + STORE_WB, ///< Cache write-back all coherent levels + STORE_CG, ///< Cache at global level + STORE_CS, ///< Cache streaming (likely to be accessed once) + STORE_WT, ///< Cache write-through (to system memory) + STORE_VOLATILE, ///< Volatile shared (any memory space) +}; + + +/** + * \name Thread I/O (cache modified) + * @{ + */ + +/** + * \brief Thread utility for writing memory using cub::CacheStoreModifier cache modifiers. Can be used to store any data type. + * + * \par Example + * \code + * #include // or equivalently + * + * // 32-bit store using cache-global modifier: + * int *d_out; + * int val; + * cub::ThreadStore(d_out + threadIdx.x, val); + * + * // 16-bit store using default modifier + * short *d_out; + * short val; + * cub::ThreadStore(d_out + threadIdx.x, val); + * + * // 256-bit store using write-through modifier + * double4 *d_out; + * double4 val; + * cub::ThreadStore(d_out + threadIdx.x, val); + * + * // 96-bit store using cache-streaming cache modifier + * struct TestFoo { bool a; short b; }; + * TestFoo *d_struct; + * TestFoo val; + * cub::ThreadStore(d_out + threadIdx.x, val); + * \endcode + * + * \tparam MODIFIER [inferred] CacheStoreModifier enumeration + * \tparam InputIteratorT [inferred] Output iterator type \iterator + * \tparam T [inferred] Data type of output value + */ +template < + CacheStoreModifier MODIFIER, + typename OutputIteratorT, + typename T> +__device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val); + + +//@} end member group + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/// Helper structure for templated store iteration (inductive case) +template +struct IterateThreadStore +{ + template + static __device__ __forceinline__ void Store(T *ptr, T *vals) + { + ThreadStore(ptr + COUNT, vals[COUNT]); + IterateThreadStore::template Store(ptr, vals); + } + + template + static __device__ __forceinline__ void Dereference(OutputIteratorT ptr, T *vals) + { + ptr[COUNT] = vals[COUNT]; + IterateThreadStore::Dereference(ptr, vals); + } + +}; + +/// Helper structure for templated store iteration (termination case) +template +struct IterateThreadStore +{ + template + static __device__ __forceinline__ void Store(T * /*ptr*/, T * /*vals*/) {} + + template + static __device__ __forceinline__ void Dereference(OutputIteratorT /*ptr*/, T * /*vals*/) {} +}; + + +/** + * Define a uint4 (16B) ThreadStore specialization for the given Cache load modifier + */ +#define _CUB_STORE_16(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ void ThreadStore(uint4* ptr, uint4 val) \ + { \ + asm volatile ("st."#ptx_modifier".v4.u32 [%0], {%1, %2, %3, %4};" : : \ + _CUB_ASM_PTR_(ptr), \ + "r"(val.x), \ + "r"(val.y), \ + "r"(val.z), \ + "r"(val.w)); \ + } \ + template<> \ + __device__ __forceinline__ void ThreadStore(ulonglong2* ptr, ulonglong2 val) \ + { \ + asm volatile ("st."#ptx_modifier".v2.u64 [%0], {%1, %2};" : : \ + _CUB_ASM_PTR_(ptr), \ + "l"(val.x), \ + "l"(val.y)); \ + } + + +/** + * Define a uint2 (8B) ThreadStore specialization for the given Cache load modifier + */ +#define _CUB_STORE_8(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ void ThreadStore(ushort4* ptr, ushort4 val) \ + { \ + asm volatile ("st."#ptx_modifier".v4.u16 [%0], {%1, %2, %3, %4};" : : \ + _CUB_ASM_PTR_(ptr), \ + "h"(val.x), \ + "h"(val.y), \ + "h"(val.z), \ + "h"(val.w)); \ + } \ + template<> \ + __device__ __forceinline__ void ThreadStore(uint2* ptr, uint2 val) \ + { \ + asm volatile ("st."#ptx_modifier".v2.u32 [%0], {%1, %2};" : : \ + _CUB_ASM_PTR_(ptr), \ + "r"(val.x), \ + "r"(val.y)); \ + } \ + template<> \ + __device__ __forceinline__ void ThreadStore(unsigned long long* ptr, unsigned long long val) \ + { \ + asm volatile ("st."#ptx_modifier".u64 [%0], %1;" : : \ + _CUB_ASM_PTR_(ptr), \ + "l"(val)); \ + } + +/** + * Define a unsigned int (4B) ThreadStore specialization for the given Cache load modifier + */ +#define _CUB_STORE_4(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ void ThreadStore(unsigned int* ptr, unsigned int val) \ + { \ + asm volatile ("st."#ptx_modifier".u32 [%0], %1;" : : \ + _CUB_ASM_PTR_(ptr), \ + "r"(val)); \ + } + + +/** + * Define a unsigned short (2B) ThreadStore specialization for the given Cache load modifier + */ +#define _CUB_STORE_2(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ void ThreadStore(unsigned short* ptr, unsigned short val) \ + { \ + asm volatile ("st."#ptx_modifier".u16 [%0], %1;" : : \ + _CUB_ASM_PTR_(ptr), \ + "h"(val)); \ + } + + +/** + * Define a unsigned char (1B) ThreadStore specialization for the given Cache load modifier + */ +#define _CUB_STORE_1(cub_modifier, ptx_modifier) \ + template<> \ + __device__ __forceinline__ void ThreadStore(unsigned char* ptr, unsigned char val) \ + { \ + asm volatile ( \ + "{" \ + " .reg .u8 datum;" \ + " cvt.u8.u16 datum, %1;" \ + " st."#ptx_modifier".u8 [%0], datum;" \ + "}" : : \ + _CUB_ASM_PTR_(ptr), \ + "h"((unsigned short) val)); \ + } + +/** + * Define powers-of-two ThreadStore specializations for the given Cache load modifier + */ +#define _CUB_STORE_ALL(cub_modifier, ptx_modifier) \ + _CUB_STORE_16(cub_modifier, ptx_modifier) \ + _CUB_STORE_8(cub_modifier, ptx_modifier) \ + _CUB_STORE_4(cub_modifier, ptx_modifier) \ + _CUB_STORE_2(cub_modifier, ptx_modifier) \ + _CUB_STORE_1(cub_modifier, ptx_modifier) \ + + +/** + * Define ThreadStore specializations for the various Cache load modifiers + */ +#if CUB_PTX_ARCH >= 200 + _CUB_STORE_ALL(STORE_WB, wb) + _CUB_STORE_ALL(STORE_CG, cg) + _CUB_STORE_ALL(STORE_CS, cs) + _CUB_STORE_ALL(STORE_WT, wt) +#else + _CUB_STORE_ALL(STORE_WB, global) + _CUB_STORE_ALL(STORE_CG, global) + _CUB_STORE_ALL(STORE_CS, global) + _CUB_STORE_ALL(STORE_WT, volatile.global) +#endif + + +// Macro cleanup +#undef _CUB_STORE_ALL +#undef _CUB_STORE_1 +#undef _CUB_STORE_2 +#undef _CUB_STORE_4 +#undef _CUB_STORE_8 +#undef _CUB_STORE_16 + + +/** + * ThreadStore definition for STORE_DEFAULT modifier on iterator types + */ +template +__device__ __forceinline__ void ThreadStore( + OutputIteratorT itr, + T val, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + *itr = val; +} + + +/** + * ThreadStore definition for STORE_DEFAULT modifier on pointer types + */ +template +__device__ __forceinline__ void ThreadStore( + T *ptr, + T val, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + *ptr = val; +} + + +/** + * ThreadStore definition for STORE_VOLATILE modifier on primitive pointer types + */ +template +__device__ __forceinline__ void ThreadStoreVolatilePtr( + T *ptr, + T val, + Int2Type /*is_primitive*/) +{ + *reinterpret_cast(ptr) = val; +} + + +/** + * ThreadStore definition for STORE_VOLATILE modifier on non-primitive pointer types + */ +template +__device__ __forceinline__ void ThreadStoreVolatilePtr( + T *ptr, + T val, + Int2Type /*is_primitive*/) +{ + // Create a temporary using shuffle-words, then store using volatile-words + typedef typename UnitWord::VolatileWord VolatileWord; + typedef typename UnitWord::ShuffleWord ShuffleWord; + + const int VOLATILE_MULTIPLE = sizeof(T) / sizeof(VolatileWord); + const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); + + VolatileWord words[VOLATILE_MULTIPLE]; + + #pragma unroll + for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) + reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; + + IterateThreadStore<0, VOLATILE_MULTIPLE>::template Dereference( + reinterpret_cast(ptr), + words); +} + + +/** + * ThreadStore definition for STORE_VOLATILE modifier on pointer types + */ +template +__device__ __forceinline__ void ThreadStore( + T *ptr, + T val, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + ThreadStoreVolatilePtr(ptr, val, Int2Type::PRIMITIVE>()); +} + + +/** + * ThreadStore definition for generic modifiers on pointer types + */ +template +__device__ __forceinline__ void ThreadStore( + T *ptr, + T val, + Int2Type /*modifier*/, + Int2Type /*is_pointer*/) +{ + // Create a temporary using shuffle-words, then store using device-words + typedef typename UnitWord::DeviceWord DeviceWord; + typedef typename UnitWord::ShuffleWord ShuffleWord; + + const int DEVICE_MULTIPLE = sizeof(T) / sizeof(DeviceWord); + const int SHUFFLE_MULTIPLE = sizeof(T) / sizeof(ShuffleWord); + + DeviceWord words[DEVICE_MULTIPLE]; + + #pragma unroll + for (int i = 0; i < SHUFFLE_MULTIPLE; ++i) + reinterpret_cast(words)[i] = reinterpret_cast(&val)[i]; + + IterateThreadStore<0, DEVICE_MULTIPLE>::template Store( + reinterpret_cast(ptr), + words); +} + + +/** + * ThreadStore definition for generic modifiers + */ +template +__device__ __forceinline__ void ThreadStore(OutputIteratorT itr, T val) +{ + ThreadStore( + itr, + val, + Int2Type(), + Int2Type::VALUE>()); +} + + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** @} */ // end group UtilIo + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_allocator.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_allocator.cuh new file mode 100644 index 0000000..0e6dd04 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_allocator.cuh @@ -0,0 +1,708 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple caching allocator for device memory allocations. The allocator is + * thread-safe and capable of managing device allocations on multiple devices. + ******************************************************************************/ + +#pragma once + +#include "util_namespace.cuh" +#include "util_debug.cuh" + +#include +#include + +#include "host/mutex.cuh" +#include + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilMgmt + * @{ + */ + + +/****************************************************************************** + * CachingDeviceAllocator (host use) + ******************************************************************************/ + +/** + * \brief A simple caching allocator for device memory allocations. + * + * \par Overview + * The allocator is thread-safe and stream-safe and is capable of managing cached + * device allocations on multiple devices. It behaves as follows: + * + * \par + * - Allocations from the allocator are associated with an \p active_stream. Once freed, + * the allocation becomes available immediately for reuse within the \p active_stream + * with which it was associated with during allocation, and it becomes available for + * reuse within other streams when all prior work submitted to \p active_stream has completed. + * - Allocations are categorized and cached by bin size. A new allocation request of + * a given size will only consider cached allocations within the corresponding bin. + * - Bin limits progress geometrically in accordance with the growth factor + * \p bin_growth provided during construction. Unused device allocations within + * a larger bin cache are not reused for allocation requests that categorize to + * smaller bin sizes. + * - Allocation requests below (\p bin_growth ^ \p min_bin) are rounded up to + * (\p bin_growth ^ \p min_bin). + * - Allocations above (\p bin_growth ^ \p max_bin) are not rounded up to the nearest + * bin and are simply freed when they are deallocated instead of being returned + * to a bin-cache. + * - %If the total storage of cached allocations on a given device will exceed + * \p max_cached_bytes, allocations for that device are simply freed when they are + * deallocated instead of being returned to their bin-cache. + * + * \par + * For example, the default-constructed CachingDeviceAllocator is configured with: + * - \p bin_growth = 8 + * - \p min_bin = 3 + * - \p max_bin = 7 + * - \p max_cached_bytes = 6MB - 1B + * + * \par + * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB + * and sets a maximum of 6,291,455 cached bytes per device + * + */ +struct CachingDeviceAllocator +{ + + //--------------------------------------------------------------------- + // Constants + //--------------------------------------------------------------------- + + /// Out-of-bounds bin + static const unsigned int INVALID_BIN = (unsigned int) -1; + + /// Invalid size + static const size_t INVALID_SIZE = (size_t) -1; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + /// Invalid device ordinal + static const int INVALID_DEVICE_ORDINAL = -1; + + //--------------------------------------------------------------------- + // Type definitions and helper types + //--------------------------------------------------------------------- + + /** + * Descriptor for device memory allocations + */ + struct BlockDescriptor + { + void* d_ptr; // Device pointer + size_t bytes; // Size of allocation in bytes + unsigned int bin; // Bin enumeration + int device; // device ordinal + cudaStream_t associated_stream; // Associated associated_stream + cudaEvent_t ready_event; // Signal when associated stream has run to the point at which this block was freed + + // Constructor (suitable for searching maps for a specific block, given its pointer and device) + BlockDescriptor(void *d_ptr, int device) : + d_ptr(d_ptr), + bytes(0), + bin(INVALID_BIN), + device(device), + associated_stream(0), + ready_event(0) + {} + + // Constructor (suitable for searching maps for a range of suitable blocks, given a device) + BlockDescriptor(int device) : + d_ptr(NULL), + bytes(0), + bin(INVALID_BIN), + device(device), + associated_stream(0), + ready_event(0) + {} + + // Comparison functor for comparing device pointers + static bool PtrCompare(const BlockDescriptor &a, const BlockDescriptor &b) + { + if (a.device == b.device) + return (a.d_ptr < b.d_ptr); + else + return (a.device < b.device); + } + + // Comparison functor for comparing allocation sizes + static bool SizeCompare(const BlockDescriptor &a, const BlockDescriptor &b) + { + if (a.device == b.device) + return (a.bytes < b.bytes); + else + return (a.device < b.device); + } + }; + + /// BlockDescriptor comparator function interface + typedef bool (*Compare)(const BlockDescriptor &, const BlockDescriptor &); + + class TotalBytes { + public: + size_t free; + size_t live; + TotalBytes() { free = live = 0; } + }; + + /// Set type for cached blocks (ordered by size) + typedef std::multiset CachedBlocks; + + /// Set type for live blocks (ordered by ptr) + typedef std::multiset BusyBlocks; + + /// Map type of device ordinals to the number of cached bytes cached by each device + typedef std::map GpuCachedBytes; + + + //--------------------------------------------------------------------- + // Utility functions + //--------------------------------------------------------------------- + + /** + * Integer pow function for unsigned base and exponent + */ + static unsigned int IntPow( + unsigned int base, + unsigned int exp) + { + unsigned int retval = 1; + while (exp > 0) + { + if (exp & 1) { + retval = retval * base; // multiply the result by the current base + } + base = base * base; // square the base + exp = exp >> 1; // divide the exponent in half + } + return retval; + } + + + /** + * Round up to the nearest power-of + */ + void NearestPowerOf( + unsigned int &power, + size_t &rounded_bytes, + unsigned int base, + size_t value) + { + power = 0; + rounded_bytes = 1; + + if (value * base < value) + { + // Overflow + power = sizeof(size_t) * 8; + rounded_bytes = size_t(0) - 1; + return; + } + + while (rounded_bytes < value) + { + rounded_bytes *= base; + power++; + } + } + + + //--------------------------------------------------------------------- + // Fields + //--------------------------------------------------------------------- + + cub::Mutex mutex; /// Mutex for thread-safety + + unsigned int bin_growth; /// Geometric growth factor for bin-sizes + unsigned int min_bin; /// Minimum bin enumeration + unsigned int max_bin; /// Maximum bin enumeration + + size_t min_bin_bytes; /// Minimum bin size + size_t max_bin_bytes; /// Maximum bin size + size_t max_cached_bytes; /// Maximum aggregate cached bytes per device + + const bool skip_cleanup; /// Whether or not to skip a call to FreeAllCached() when destructor is called. (The CUDA runtime may have already shut down for statically declared allocators) + bool debug; /// Whether or not to print (de)allocation events to stdout + + GpuCachedBytes cached_bytes; /// Map of device ordinal to aggregate cached bytes on that device + CachedBlocks cached_blocks; /// Set of cached device allocations available for reuse + BusyBlocks live_blocks; /// Set of live device allocations currently in use + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + //--------------------------------------------------------------------- + // Methods + //--------------------------------------------------------------------- + + /** + * \brief Constructor. + */ + CachingDeviceAllocator( + unsigned int bin_growth, ///< Geometric growth factor for bin-sizes + unsigned int min_bin = 1, ///< Minimum bin (default is bin_growth ^ 1) + unsigned int max_bin = INVALID_BIN, ///< Maximum bin (default is no max bin) + size_t max_cached_bytes = INVALID_SIZE, ///< Maximum aggregate cached bytes per device (default is no limit) + bool skip_cleanup = false, ///< Whether or not to skip a call to \p FreeAllCached() when the destructor is called (default is to deallocate) + bool debug = false) ///< Whether or not to print (de)allocation events to stdout (default is no stderr output) + : + bin_growth(bin_growth), + min_bin(min_bin), + max_bin(max_bin), + min_bin_bytes(IntPow(bin_growth, min_bin)), + max_bin_bytes(IntPow(bin_growth, max_bin)), + max_cached_bytes(max_cached_bytes), + skip_cleanup(skip_cleanup), + debug(debug), + cached_blocks(BlockDescriptor::SizeCompare), + live_blocks(BlockDescriptor::PtrCompare) + {} + + + /** + * \brief Default constructor. + * + * Configured with: + * \par + * - \p bin_growth = 8 + * - \p min_bin = 3 + * - \p max_bin = 7 + * - \p max_cached_bytes = (\p bin_growth ^ \p max_bin) * 3) - 1 = 6,291,455 bytes + * + * which delineates five bin-sizes: 512B, 4KB, 32KB, 256KB, and 2MB and + * sets a maximum of 6,291,455 cached bytes per device + */ + CachingDeviceAllocator( + bool skip_cleanup = false, + bool debug = false) + : + bin_growth(8), + min_bin(3), + max_bin(7), + min_bin_bytes(IntPow(bin_growth, min_bin)), + max_bin_bytes(IntPow(bin_growth, max_bin)), + max_cached_bytes((max_bin_bytes * 3) - 1), + skip_cleanup(skip_cleanup), + debug(debug), + cached_blocks(BlockDescriptor::SizeCompare), + live_blocks(BlockDescriptor::PtrCompare) + {} + + + /** + * \brief Sets the limit on the number bytes this allocator is allowed to cache per device. + * + * Changing the ceiling of cached bytes does not cause any allocations (in-use or + * cached-in-reserve) to be freed. See \p FreeAllCached(). + */ + cudaError_t SetMaxCachedBytes( + size_t max_cached_bytes) + { + // Lock + mutex.Lock(); + + if (debug) _CubLog("Changing max_cached_bytes (%lld -> %lld)\n", (long long) this->max_cached_bytes, (long long) max_cached_bytes); + + this->max_cached_bytes = max_cached_bytes; + + // Unlock + mutex.Unlock(); + + return cudaSuccess; + } + + + /** + * \brief Provides a suitable allocation of device memory for the given size on the specified device. + * + * Once freed, the allocation becomes available immediately for reuse within the \p active_stream + * with which it was associated with during allocation, and it becomes available for reuse within other + * streams when all prior work submitted to \p active_stream has completed. + */ + cudaError_t DeviceAllocate( + int device, ///< [in] Device on which to place the allocation + void **d_ptr, ///< [out] Reference to pointer to the allocation + size_t bytes, ///< [in] Minimum number of bytes for the allocation + cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation + { + *d_ptr = NULL; + int entrypoint_device = INVALID_DEVICE_ORDINAL; + cudaError_t error = cudaSuccess; + + if (device == INVALID_DEVICE_ORDINAL) + { + if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; + device = entrypoint_device; + } + + // Create a block descriptor for the requested allocation + bool found = false; + BlockDescriptor search_key(device); + search_key.associated_stream = active_stream; + NearestPowerOf(search_key.bin, search_key.bytes, bin_growth, bytes); + + if (search_key.bin > max_bin) + { + // Bin is greater than our maximum bin: allocate the request + // exactly and give out-of-bounds bin. It will not be cached + // for reuse when returned. + search_key.bin = INVALID_BIN; + search_key.bytes = bytes; + } + else + { + // Search for a suitable cached allocation: lock + mutex.Lock(); + + if (search_key.bin < min_bin) + { + // Bin is less than minimum bin: round up + search_key.bin = min_bin; + search_key.bytes = min_bin_bytes; + } + + // Iterate through the range of cached blocks on the same device in the same bin + CachedBlocks::iterator block_itr = cached_blocks.lower_bound(search_key); + while ((block_itr != cached_blocks.end()) + && (block_itr->device == device) + && (block_itr->bin == search_key.bin)) + { + // To prevent races with reusing blocks returned by the host but still + // in use by the device, only consider cached blocks that are + // either (from the active stream) or (from an idle stream) + if ((active_stream == block_itr->associated_stream) || + (cudaEventQuery(block_itr->ready_event) != cudaErrorNotReady)) + { + // Reuse existing cache block. Insert into live blocks. + found = true; + search_key = *block_itr; + search_key.associated_stream = active_stream; + live_blocks.insert(search_key); + + // Remove from free blocks + cached_bytes[device].free -= search_key.bytes; + cached_bytes[device].live += search_key.bytes; + + if (debug) _CubLog("\tDevice %d reused cached block at %p (%lld bytes) for stream %lld (previously associated with stream %lld).\n", + device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) block_itr->associated_stream); + + cached_blocks.erase(block_itr); + + break; + } + block_itr++; + } + + // Done searching: unlock + mutex.Unlock(); + } + + // Allocate the block if necessary + if (!found) + { + // Set runtime's current device to specified device (entrypoint may not be set) + if (device != entrypoint_device) + { + if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; + if (CubDebug(error = cudaSetDevice(device))) return error; + } + + // Attempt to allocate + if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes)) == cudaErrorMemoryAllocation) + { + // The allocation attempt failed: free all cached blocks on device and retry + if (debug) _CubLog("\tDevice %d failed to allocate %lld bytes for stream %lld, retrying after freeing cached allocations", + device, (long long) search_key.bytes, (long long) search_key.associated_stream); + + error = cudaSuccess; // Reset the error we will return + cudaGetLastError(); // Reset CUDART's error + + // Lock + mutex.Lock(); + + // Iterate the range of free blocks on the same device + BlockDescriptor free_key(device); + CachedBlocks::iterator block_itr = cached_blocks.lower_bound(free_key); + + while ((block_itr != cached_blocks.end()) && (block_itr->device == device)) + { + // No need to worry about synchronization with the device: cudaFree is + // blocking and will synchronize across all kernels executing + // on the current device + + // Free device memory and destroy stream event. + if (CubDebug(error = cudaFree(block_itr->d_ptr))) break; + if (CubDebug(error = cudaEventDestroy(block_itr->ready_event))) break; + + // Reduce balance and erase entry + cached_bytes[device].free -= block_itr->bytes; + + if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", + device, (long long) block_itr->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); + + cached_blocks.erase(block_itr); + + block_itr++; + } + + // Unlock + mutex.Unlock(); + + // Return under error + if (error) return error; + + // Try to allocate again + if (CubDebug(error = cudaMalloc(&search_key.d_ptr, search_key.bytes))) return error; + } + + // Create ready event + if (CubDebug(error = cudaEventCreateWithFlags(&search_key.ready_event, cudaEventDisableTiming))) + return error; + + // Insert into live blocks + mutex.Lock(); + live_blocks.insert(search_key); + cached_bytes[device].live += search_key.bytes; + mutex.Unlock(); + + if (debug) _CubLog("\tDevice %d allocated new device block at %p (%lld bytes associated with stream %lld).\n", + device, search_key.d_ptr, (long long) search_key.bytes, (long long) search_key.associated_stream); + + // Attempt to revert back to previous device if necessary + if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) + { + if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; + } + } + + // Copy device pointer to output parameter + *d_ptr = search_key.d_ptr; + + if (debug) _CubLog("\t\t%lld available blocks cached (%lld bytes), %lld live blocks outstanding(%lld bytes).\n", + (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); + + return error; + } + + + /** + * \brief Provides a suitable allocation of device memory for the given size on the current device. + * + * Once freed, the allocation becomes available immediately for reuse within the \p active_stream + * with which it was associated with during allocation, and it becomes available for reuse within other + * streams when all prior work submitted to \p active_stream has completed. + */ + cudaError_t DeviceAllocate( + void **d_ptr, ///< [out] Reference to pointer to the allocation + size_t bytes, ///< [in] Minimum number of bytes for the allocation + cudaStream_t active_stream = 0) ///< [in] The stream to be associated with this allocation + { + return DeviceAllocate(INVALID_DEVICE_ORDINAL, d_ptr, bytes, active_stream); + } + + + /** + * \brief Frees a live allocation of device memory on the specified device, returning it to the allocator. + * + * Once freed, the allocation becomes available immediately for reuse within the \p active_stream + * with which it was associated with during allocation, and it becomes available for reuse within other + * streams when all prior work submitted to \p active_stream has completed. + */ + cudaError_t DeviceFree( + int device, + void* d_ptr) + { + int entrypoint_device = INVALID_DEVICE_ORDINAL; + cudaError_t error = cudaSuccess; + + if (device == INVALID_DEVICE_ORDINAL) + { + if (CubDebug(error = cudaGetDevice(&entrypoint_device))) + return error; + device = entrypoint_device; + } + + // Lock + mutex.Lock(); + + // Find corresponding block descriptor + bool recached = false; + BlockDescriptor search_key(d_ptr, device); + BusyBlocks::iterator block_itr = live_blocks.find(search_key); + if (block_itr != live_blocks.end()) + { + // Remove from live blocks + search_key = *block_itr; + live_blocks.erase(block_itr); + cached_bytes[device].live -= search_key.bytes; + + // Keep the returned allocation if bin is valid and we won't exceed the max cached threshold + if ((search_key.bin != INVALID_BIN) && (cached_bytes[device].free + search_key.bytes <= max_cached_bytes)) + { + // Insert returned allocation into free blocks + recached = true; + cached_blocks.insert(search_key); + cached_bytes[device].free += search_key.bytes; + + if (debug) _CubLog("\tDevice %d returned %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks outstanding. (%lld bytes)\n", + device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), + (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); + } + } + + // Unlock + mutex.Unlock(); + + // First set to specified device (entrypoint may not be set) + if (device != entrypoint_device) + { + if (CubDebug(error = cudaGetDevice(&entrypoint_device))) return error; + if (CubDebug(error = cudaSetDevice(device))) return error; + } + + if (recached) + { + // Insert the ready event in the associated stream (must have current device set properly) + if (CubDebug(error = cudaEventRecord(search_key.ready_event, search_key.associated_stream))) return error; + } + else + { + // Free the allocation from the runtime and cleanup the event. + if (CubDebug(error = cudaFree(d_ptr))) return error; + if (CubDebug(error = cudaEventDestroy(search_key.ready_event))) return error; + + if (debug) _CubLog("\tDevice %d freed %lld bytes from associated stream %lld.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", + device, (long long) search_key.bytes, (long long) search_key.associated_stream, (long long) cached_blocks.size(), (long long) cached_bytes[device].free, (long long) live_blocks.size(), (long long) cached_bytes[device].live); + } + + // Reset device + if ((entrypoint_device != INVALID_DEVICE_ORDINAL) && (entrypoint_device != device)) + { + if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; + } + + return error; + } + + + /** + * \brief Frees a live allocation of device memory on the current device, returning it to the allocator. + * + * Once freed, the allocation becomes available immediately for reuse within the \p active_stream + * with which it was associated with during allocation, and it becomes available for reuse within other + * streams when all prior work submitted to \p active_stream has completed. + */ + cudaError_t DeviceFree( + void* d_ptr) + { + return DeviceFree(INVALID_DEVICE_ORDINAL, d_ptr); + } + + + /** + * \brief Frees all cached device allocations on all devices + */ + cudaError_t FreeAllCached() + { + cudaError_t error = cudaSuccess; + int entrypoint_device = INVALID_DEVICE_ORDINAL; + int current_device = INVALID_DEVICE_ORDINAL; + + mutex.Lock(); + + while (!cached_blocks.empty()) + { + // Get first block + CachedBlocks::iterator begin = cached_blocks.begin(); + + // Get entry-point device ordinal if necessary + if (entrypoint_device == INVALID_DEVICE_ORDINAL) + { + if (CubDebug(error = cudaGetDevice(&entrypoint_device))) break; + } + + // Set current device ordinal if necessary + if (begin->device != current_device) + { + if (CubDebug(error = cudaSetDevice(begin->device))) break; + current_device = begin->device; + } + + // Free device memory + if (CubDebug(error = cudaFree(begin->d_ptr))) break; + if (CubDebug(error = cudaEventDestroy(begin->ready_event))) break; + + // Reduce balance and erase entry + cached_bytes[current_device].free -= begin->bytes; + + if (debug) _CubLog("\tDevice %d freed %lld bytes.\n\t\t %lld available blocks cached (%lld bytes), %lld live blocks (%lld bytes) outstanding.\n", + current_device, (long long) begin->bytes, (long long) cached_blocks.size(), (long long) cached_bytes[current_device].free, (long long) live_blocks.size(), (long long) cached_bytes[current_device].live); + + cached_blocks.erase(begin); + } + + mutex.Unlock(); + + // Attempt to revert back to entry-point device if necessary + if (entrypoint_device != INVALID_DEVICE_ORDINAL) + { + if (CubDebug(error = cudaSetDevice(entrypoint_device))) return error; + } + + return error; + } + + + /** + * \brief Destructor + */ + virtual ~CachingDeviceAllocator() + { + if (!skip_cleanup) + FreeAllCached(); + } + +}; + + + + +/** @} */ // end group UtilMgmt + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_arch.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_arch.cuh new file mode 100644 index 0000000..28d81e7 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_arch.cuh @@ -0,0 +1,151 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Static architectural properties by SM version. + */ + +#pragma once + +#include "util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS) + #define CUB_USE_COOPERATIVE_GROUPS +#endif + +/// CUB_PTX_ARCH reflects the PTX version targeted by the active compiler pass (or zero during the host pass). +#ifndef CUB_PTX_ARCH + #ifndef __CUDA_ARCH__ + #define CUB_PTX_ARCH 0 + #else + #define CUB_PTX_ARCH __CUDA_ARCH__ + #endif +#endif + + +/// Whether or not the source targeted by the active compiler pass is allowed to invoke device kernels or methods from the CUDA runtime API. +#ifndef CUB_RUNTIME_FUNCTION + #if !defined(__CUDA_ARCH__) || (__CUDA_ARCH__>= 350 && defined(__CUDACC_RDC__)) + #define CUB_RUNTIME_ENABLED + #define CUB_RUNTIME_FUNCTION __host__ __device__ + #else + #define CUB_RUNTIME_FUNCTION __host__ + #endif +#endif + + +/// Number of threads per warp +#ifndef CUB_LOG_WARP_THREADS + #define CUB_LOG_WARP_THREADS(arch) \ + (5) + #define CUB_WARP_THREADS(arch) \ + (1 << CUB_LOG_WARP_THREADS(arch)) + + #define CUB_PTX_WARP_THREADS CUB_WARP_THREADS(CUB_PTX_ARCH) + #define CUB_PTX_LOG_WARP_THREADS CUB_LOG_WARP_THREADS(CUB_PTX_ARCH) +#endif + + +/// Number of smem banks +#ifndef CUB_LOG_SMEM_BANKS + #define CUB_LOG_SMEM_BANKS(arch) \ + ((arch >= 200) ? \ + (5) : \ + (4)) + #define CUB_SMEM_BANKS(arch) \ + (1 << CUB_LOG_SMEM_BANKS(arch)) + + #define CUB_PTX_LOG_SMEM_BANKS CUB_LOG_SMEM_BANKS(CUB_PTX_ARCH) + #define CUB_PTX_SMEM_BANKS CUB_SMEM_BANKS(CUB_PTX_ARCH) +#endif + + +/// Oversubscription factor +#ifndef CUB_SUBSCRIPTION_FACTOR + #define CUB_SUBSCRIPTION_FACTOR(arch) \ + ((arch >= 300) ? \ + (5) : \ + ((arch >= 200) ? \ + (3) : \ + (10))) + #define CUB_PTX_SUBSCRIPTION_FACTOR CUB_SUBSCRIPTION_FACTOR(CUB_PTX_ARCH) +#endif + + +/// Prefer padding overhead vs X-way conflicts greater than this threshold +#ifndef CUB_PREFER_CONFLICT_OVER_PADDING + #define CUB_PREFER_CONFLICT_OVER_PADDING(arch) \ + ((arch >= 300) ? \ + (1) : \ + (4)) + #define CUB_PTX_PREFER_CONFLICT_OVER_PADDING CUB_PREFER_CONFLICT_OVER_PADDING(CUB_PTX_ARCH) +#endif + + +/// Scale down the number of threads to keep same amount of scratch storage as the nominal configuration for 4B data. Minimum of two warps. +#ifndef CUB_SCALED_BLOCK_THREADS + #define CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ + (CUB_MIN( \ + NOMINAL_4B_BLOCK_THREADS, \ + CUB_WARP_THREADS(PTX_ARCH) * CUB_MAX( \ + 2, \ + (NOMINAL_4B_BLOCK_THREADS / CUB_WARP_THREADS(PTX_ARCH)) * 4 / sizeof(T)))) +#endif + +/// Scale down number of items per thread to keep the same amount of register storage as the nominal configuration for 4B data. Minimum 1 item per thread +#ifndef CUB_SCALED_ITEMS_PER_THREAD + #define CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) \ + CUB_MAX( \ + 1, \ + (sizeof(T) < 4) ? \ + ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH) / 2 : \ + ((NOMINAL_4B_ITEMS_PER_THREAD * NOMINAL_4B_BLOCK_THREADS * 4) / CUB_MAX(4, sizeof(T))) / CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, PTX_ARCH)) +#endif + +/// Define both nominal threads-per-block and items-per-thread +#ifndef CUB_SCALED_GRANULARITIES + #define CUB_SCALED_GRANULARITIES(NOMINAL_4B_BLOCK_THREADS, NOMINAL_4B_ITEMS_PER_THREAD, T) \ + CUB_SCALED_BLOCK_THREADS(NOMINAL_4B_BLOCK_THREADS, T, 200), \ + CUB_SCALED_ITEMS_PER_THREAD(NOMINAL_4B_ITEMS_PER_THREAD, NOMINAL_4B_BLOCK_THREADS, T, 200) +#endif + + + +#endif // Do not document + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_debug.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_debug.cuh new file mode 100644 index 0000000..3ad832e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_debug.cuh @@ -0,0 +1,145 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Error and event logging routines. + * + * The following macros definitions are supported: + * - \p CUB_LOG. Simple event messages are printed to \p stdout. + */ + +#pragma once + +#include +#include "util_namespace.cuh" +#include "util_arch.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilMgmt + * @{ + */ + + +/// CUB error reporting macro (prints error messages to stderr) +#if (defined(DEBUG) || defined(_DEBUG)) && !defined(CUB_STDERR) + #define CUB_STDERR +#endif + + + +/** + * \brief %If \p CUB_STDERR is defined and \p error is not \p cudaSuccess, the corresponding error message is printed to \p stderr (or \p stdout in device code) along with the supplied source context. + * + * \return The CUDA error. + */ +__host__ __device__ __forceinline__ cudaError_t Debug( + cudaError_t error, + const char* filename, + int line) +{ + (void)filename; + (void)line; +#ifdef CUB_STDERR + if (error) + { + #if (CUB_PTX_ARCH == 0) + fprintf(stderr, "CUDA error %d [%s, %d]: %s\n", error, filename, line, cudaGetErrorString(error)); + fflush(stderr); + #elif (CUB_PTX_ARCH >= 200) + printf("CUDA error %d [block (%d,%d,%d) thread (%d,%d,%d), %s, %d]\n", error, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, filename, line); + #endif + } +#endif + return error; +} + + +/** + * \brief Debug macro + */ +#ifndef CubDebug + #define CubDebug(e) cub::Debug((cudaError_t) (e), __FILE__, __LINE__) +#endif + + +/** + * \brief Debug macro with exit + */ +#ifndef CubDebugExit + #define CubDebugExit(e) if (cub::Debug((cudaError_t) (e), __FILE__, __LINE__)) { exit(1); } +#endif + + +/** + * \brief Log macro for printf statements. + */ +#if !defined(_CubLog) + #if !(defined(__clang__) && defined(__CUDA__)) + #if (CUB_PTX_ARCH == 0) + #define _CubLog(format, ...) printf(format,__VA_ARGS__); + #elif (CUB_PTX_ARCH >= 200) + #define _CubLog(format, ...) printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, __VA_ARGS__); + #endif + #else + // XXX shameless hack for clang around variadic printf... + // Compilies w/o supplying -std=c++11 but shows warning, + // so we sielence them :) + #pragma clang diagnostic ignored "-Wc++11-extensions" + #pragma clang diagnostic ignored "-Wunnamed-type-template-args" + template + inline __host__ __device__ void va_printf(char const* format, Args const&... args) + { + #ifdef __CUDA_ARCH__ + printf(format, blockIdx.z, blockIdx.y, blockIdx.x, threadIdx.z, threadIdx.y, threadIdx.x, args...); + #else + printf(format, args...); + #endif + } + #ifndef __CUDA_ARCH__ + #define _CubLog(format, ...) va_printf(format,__VA_ARGS__); + #else + #define _CubLog(format, ...) va_printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, __VA_ARGS__); + #endif + #endif +#endif + + + + +/** @} */ // end group UtilMgmt + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_device.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_device.cuh new file mode 100644 index 0000000..a5f3b61 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_device.cuh @@ -0,0 +1,347 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Properties of a given CUDA device and the corresponding PTX bundle + */ + +#pragma once + +#include "util_type.cuh" +#include "util_arch.cuh" +#include "util_debug.cuh" +#include "util_namespace.cuh" +#include "util_macro.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilMgmt + * @{ + */ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/** + * Alias temporaries to externally-allocated device storage (or simply return the amount of storage needed). + */ +template +__host__ __device__ __forceinline__ +cudaError_t AliasTemporaries( + void *d_temp_storage, ///< [in] %Device-accessible allocation of temporary storage. When NULL, the required allocation size is written to \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Size in bytes of \t d_temp_storage allocation + void* (&allocations)[ALLOCATIONS], ///< [in,out] Pointers to device allocations needed + size_t (&allocation_sizes)[ALLOCATIONS]) ///< [in] Sizes in bytes of device allocations needed +{ + const int ALIGN_BYTES = 256; + const int ALIGN_MASK = ~(ALIGN_BYTES - 1); + + // Compute exclusive prefix sum over allocation requests + size_t allocation_offsets[ALLOCATIONS]; + size_t bytes_needed = 0; + for (int i = 0; i < ALLOCATIONS; ++i) + { + size_t allocation_bytes = (allocation_sizes[i] + ALIGN_BYTES - 1) & ALIGN_MASK; + allocation_offsets[i] = bytes_needed; + bytes_needed += allocation_bytes; + } + bytes_needed += ALIGN_BYTES - 1; + + // Check if the caller is simply requesting the size of the storage allocation + if (!d_temp_storage) + { + temp_storage_bytes = bytes_needed; + return cudaSuccess; + } + + // Check if enough storage provided + if (temp_storage_bytes < bytes_needed) + { + return CubDebug(cudaErrorInvalidValue); + } + + // Alias + d_temp_storage = (void *) ((size_t(d_temp_storage) + ALIGN_BYTES - 1) & ALIGN_MASK); + for (int i = 0; i < ALLOCATIONS; ++i) + { + allocations[i] = static_cast(d_temp_storage) + allocation_offsets[i]; + } + + return cudaSuccess; +} + + +/** + * Empty kernel for querying PTX manifest metadata (e.g., version) for the current device + */ +template +__global__ void EmptyKernel(void) { } + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + +/** + * \brief Retrieves the PTX version that will be used on the current device (major * 100 + minor * 10) + */ +CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t PtxVersion(int &ptx_version) +{ + struct Dummy + { + /// Type definition of the EmptyKernel kernel entry point + typedef void (*EmptyKernelPtr)(); + + /// Force EmptyKernel to be generated if this class is used + CUB_RUNTIME_FUNCTION __forceinline__ + EmptyKernelPtr Empty() + { + return EmptyKernel; + } + }; + + +#ifndef CUB_RUNTIME_ENABLED + (void)ptx_version; + + // CUDA API calls not supported from this device + return cudaErrorInvalidConfiguration; + +#elif (CUB_PTX_ARCH > 0) + + ptx_version = CUB_PTX_ARCH; + return cudaSuccess; + +#else + + cudaError_t error = cudaSuccess; + do + { + cudaFuncAttributes empty_kernel_attrs; + if (CubDebug(error = cudaFuncGetAttributes(&empty_kernel_attrs, EmptyKernel))) break; + ptx_version = empty_kernel_attrs.ptxVersion * 10; + } + while (0); + + return error; + +#endif +} + + +/** + * \brief Retrieves the SM version (major * 100 + minor * 10) + */ +CUB_RUNTIME_FUNCTION __forceinline__ cudaError_t SmVersion(int &sm_version, int device_ordinal) +{ +#ifndef CUB_RUNTIME_ENABLED + (void)sm_version; + (void)device_ordinal; + + // CUDA API calls not supported from this device + return cudaErrorInvalidConfiguration; + +#else + + cudaError_t error = cudaSuccess; + do + { + // Fill in SM version + int major, minor; + if (CubDebug(error = cudaDeviceGetAttribute(&major, cudaDevAttrComputeCapabilityMajor, device_ordinal))) break; + if (CubDebug(error = cudaDeviceGetAttribute(&minor, cudaDevAttrComputeCapabilityMinor, device_ordinal))) break; + sm_version = major * 100 + minor * 10; + } + while (0); + + return error; + +#endif +} + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +/** + * Synchronize the stream if specified + */ +CUB_RUNTIME_FUNCTION __forceinline__ +static cudaError_t SyncStream(cudaStream_t stream) +{ +#if (CUB_PTX_ARCH == 0) + return cudaStreamSynchronize(stream); +#else + (void)stream; + // Device can't yet sync on a specific stream + return cudaDeviceSynchronize(); +#endif +} + + +/** + * \brief Computes maximum SM occupancy in thread blocks for executing the given kernel function pointer \p kernel_ptr on the current device with \p block_threads per thread block. + * + * \par Snippet + * The code snippet below illustrates the use of the MaxSmOccupancy function. + * \par + * \code + * #include // or equivalently + * + * template + * __global__ void ExampleKernel() + * { + * // Allocate shared memory for BlockScan + * __shared__ volatile T buffer[4096]; + * + * ... + * } + * + * ... + * + * // Determine SM occupancy for ExampleKernel specialized for unsigned char + * int max_sm_occupancy; + * MaxSmOccupancy(max_sm_occupancy, ExampleKernel, 64); + * + * // max_sm_occupancy <-- 4 on SM10 + * // max_sm_occupancy <-- 8 on SM20 + * // max_sm_occupancy <-- 12 on SM35 + * + * \endcode + * + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t MaxSmOccupancy( + int &max_sm_occupancy, ///< [out] maximum number of thread blocks that can reside on a single SM + KernelPtr kernel_ptr, ///< [in] Kernel pointer for which to compute SM occupancy + int block_threads, ///< [in] Number of threads per thread block + int dynamic_smem_bytes = 0) +{ +#ifndef CUB_RUNTIME_ENABLED + (void)dynamic_smem_bytes; + (void)block_threads; + (void)kernel_ptr; + (void)max_sm_occupancy; + + // CUDA API calls not supported from this device + return CubDebug(cudaErrorInvalidConfiguration); + +#else + + return cudaOccupancyMaxActiveBlocksPerMultiprocessor ( + &max_sm_occupancy, + kernel_ptr, + block_threads, + dynamic_smem_bytes); + +#endif // CUB_RUNTIME_ENABLED +} + + +/****************************************************************************** + * Policy management + ******************************************************************************/ + +/** + * Kernel dispatch configuration + */ +struct KernelConfig +{ + int block_threads; + int items_per_thread; + int tile_size; + int sm_occupancy; + + CUB_RUNTIME_FUNCTION __forceinline__ + KernelConfig() : block_threads(0), items_per_thread(0), tile_size(0), sm_occupancy(0) {} + + template + CUB_RUNTIME_FUNCTION __forceinline__ + cudaError_t Init(KernelPtrT kernel_ptr) + { + block_threads = AgentPolicyT::BLOCK_THREADS; + items_per_thread = AgentPolicyT::ITEMS_PER_THREAD; + tile_size = block_threads * items_per_thread; + cudaError_t retval = MaxSmOccupancy(sm_occupancy, kernel_ptr, block_threads); + return retval; + } +}; + + + +/// Helper for dispatching into a policy chain +template +struct ChainedPolicy +{ + /// The policy for the active compiler pass + typedef typename If<(CUB_PTX_ARCH < PTX_VERSION), typename PrevPolicyT::ActivePolicy, PolicyT>::Type ActivePolicy; + + /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version + template + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Invoke(int ptx_version, FunctorT &op) + { + if (ptx_version < PTX_VERSION) { + return PrevPolicyT::Invoke(ptx_version, op); + } + return op.template Invoke(); + } +}; + +/// Helper for dispatching into a policy chain (end-of-chain specialization) +template +struct ChainedPolicy +{ + /// The policy for the active compiler pass + typedef PolicyT ActivePolicy; + + /// Specializes and dispatches op in accordance to the first policy in the chain of adequate PTX version + template + CUB_RUNTIME_FUNCTION __forceinline__ + static cudaError_t Invoke(int /*ptx_version*/, FunctorT &op) { + return op.template Invoke(); + } +}; + + + + +#endif // Do not document + + + + +/** @} */ // end group UtilMgmt + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_macro.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_macro.cuh new file mode 100644 index 0000000..ff86365 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_macro.cuh @@ -0,0 +1,103 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Common C/C++ macro utilities + ******************************************************************************/ + +#pragma once + +#include "util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilModule + * @{ + */ + +#ifndef CUB_ALIGN + #if defined(_WIN32) || defined(_WIN64) + /// Align struct + #define CUB_ALIGN(bytes) __declspec(align(32)) + #else + /// Align struct + #define CUB_ALIGN(bytes) __attribute__((aligned(bytes))) + #endif +#endif + +#ifndef CUB_MAX + /// Select maximum(a, b) + #define CUB_MAX(a, b) (((b) > (a)) ? (b) : (a)) +#endif + +#ifndef CUB_MIN + /// Select minimum(a, b) + #define CUB_MIN(a, b) (((b) < (a)) ? (b) : (a)) +#endif + +#ifndef CUB_QUOTIENT_FLOOR + /// Quotient of x/y rounded down to nearest integer + #define CUB_QUOTIENT_FLOOR(x, y) ((x) / (y)) +#endif + +#ifndef CUB_QUOTIENT_CEILING + /// Quotient of x/y rounded up to nearest integer + #define CUB_QUOTIENT_CEILING(x, y) (((x) + (y) - 1) / (y)) +#endif + +#ifndef CUB_ROUND_UP_NEAREST + /// x rounded up to the nearest multiple of y + #define CUB_ROUND_UP_NEAREST(x, y) ((((x) + (y) - 1) / (y)) * y) +#endif + +#ifndef CUB_ROUND_DOWN_NEAREST + /// x rounded down to the nearest multiple of y + #define CUB_ROUND_DOWN_NEAREST(x, y) (((x) / (y)) * y) +#endif + + +#ifndef CUB_STATIC_ASSERT + #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + #define CUB_CAT_(a, b) a ## b + #define CUB_CAT(a, b) CUB_CAT_(a, b) + #endif // DOXYGEN_SHOULD_SKIP_THIS + + /// Static assert + #define CUB_STATIC_ASSERT(cond, msg) typedef int CUB_CAT(cub_static_assert, __LINE__)[(cond) ? 1 : -1] +#endif + +/** @} */ // end group UtilModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_namespace.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_namespace.cuh new file mode 100644 index 0000000..c8991d0 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_namespace.cuh @@ -0,0 +1,46 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Place-holder for prefixing the cub namespace + */ + +#pragma once + +// For example: +//#define CUB_NS_PREFIX namespace thrust{ namespace detail { +//#define CUB_NS_POSTFIX } } + +#ifndef CUB_NS_PREFIX +#define CUB_NS_PREFIX +#endif + +#ifndef CUB_NS_POSTFIX +#define CUB_NS_POSTFIX +#endif diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_ptx.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_ptx.cuh new file mode 100644 index 0000000..582ca0d --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_ptx.cuh @@ -0,0 +1,758 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * PTX intrinsics + */ + + +#pragma once + +#include "util_type.cuh" +#include "util_arch.cuh" +#include "util_namespace.cuh" +#include "util_debug.cuh" + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilPtx + * @{ + */ + + +/****************************************************************************** + * PTX helper macros + ******************************************************************************/ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +/** + * Register modifier for pointer-types (for inlining PTX assembly) + */ +#if defined(_WIN64) || defined(__LP64__) + #define __CUB_LP64__ 1 + // 64-bit register modifier for inlined asm + #define _CUB_ASM_PTR_ "l" + #define _CUB_ASM_PTR_SIZE_ "u64" +#else + #define __CUB_LP64__ 0 + // 32-bit register modifier for inlined asm + #define _CUB_ASM_PTR_ "r" + #define _CUB_ASM_PTR_SIZE_ "u32" +#endif + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/****************************************************************************** + * Inlined PTX intrinsics + ******************************************************************************/ + +/** + * \brief Shift-right then add. Returns (\p x >> \p shift) + \p addend. + */ +__device__ __forceinline__ unsigned int SHR_ADD( + unsigned int x, + unsigned int shift, + unsigned int addend) +{ + unsigned int ret; +#if CUB_PTX_ARCH >= 200 + asm ("vshr.u32.u32.u32.clamp.add %0, %1, %2, %3;" : + "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); +#else + ret = (x >> shift) + addend; +#endif + return ret; +} + + +/** + * \brief Shift-left then add. Returns (\p x << \p shift) + \p addend. + */ +__device__ __forceinline__ unsigned int SHL_ADD( + unsigned int x, + unsigned int shift, + unsigned int addend) +{ + unsigned int ret; +#if CUB_PTX_ARCH >= 200 + asm ("vshl.u32.u32.u32.clamp.add %0, %1, %2, %3;" : + "=r"(ret) : "r"(x), "r"(shift), "r"(addend)); +#else + ret = (x << shift) + addend; +#endif + return ret; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +/** + * Bitfield-extract. + */ +template +__device__ __forceinline__ unsigned int BFE( + UnsignedBits source, + unsigned int bit_start, + unsigned int num_bits, + Int2Type /*byte_len*/) +{ + unsigned int bits; +#if CUB_PTX_ARCH >= 200 + asm ("bfe.u32 %0, %1, %2, %3;" : "=r"(bits) : "r"((unsigned int) source), "r"(bit_start), "r"(num_bits)); +#else + const unsigned int MASK = (1 << num_bits) - 1; + bits = (source >> bit_start) & MASK; +#endif + return bits; +} + + +/** + * Bitfield-extract for 64-bit types. + */ +template +__device__ __forceinline__ unsigned int BFE( + UnsignedBits source, + unsigned int bit_start, + unsigned int num_bits, + Int2Type<8> /*byte_len*/) +{ + const unsigned long long MASK = (1ull << num_bits) - 1; + return (source >> bit_start) & MASK; +} + +#endif // DOXYGEN_SHOULD_SKIP_THIS + +/** + * \brief Bitfield-extract. Extracts \p num_bits from \p source starting at bit-offset \p bit_start. The input \p source may be an 8b, 16b, 32b, or 64b unsigned integer type. + */ +template +__device__ __forceinline__ unsigned int BFE( + UnsignedBits source, + unsigned int bit_start, + unsigned int num_bits) +{ + return BFE(source, bit_start, num_bits, Int2Type()); +} + + +/** + * \brief Bitfield insert. Inserts the \p num_bits least significant bits of \p y into \p x at bit-offset \p bit_start. + */ +__device__ __forceinline__ void BFI( + unsigned int &ret, + unsigned int x, + unsigned int y, + unsigned int bit_start, + unsigned int num_bits) +{ +#if CUB_PTX_ARCH >= 200 + asm ("bfi.b32 %0, %1, %2, %3, %4;" : + "=r"(ret) : "r"(y), "r"(x), "r"(bit_start), "r"(num_bits)); +#else + x <<= bit_start; + unsigned int MASK_X = ((1 << num_bits) - 1) << bit_start; + unsigned int MASK_Y = ~MASK_X; + ret = (y & MASK_Y) | (x & MASK_X); +#endif +} + + +/** + * \brief Three-operand add. Returns \p x + \p y + \p z. + */ +__device__ __forceinline__ unsigned int IADD3(unsigned int x, unsigned int y, unsigned int z) +{ +#if CUB_PTX_ARCH >= 200 + asm ("vadd.u32.u32.u32.add %0, %1, %2, %3;" : "=r"(x) : "r"(x), "r"(y), "r"(z)); +#else + x = x + y + z; +#endif + return x; +} + + +/** + * \brief Byte-permute. Pick four arbitrary bytes from two 32-bit registers, and reassemble them into a 32-bit destination register. For SM2.0 or later. + * + * \par + * The bytes in the two source registers \p a and \p b are numbered from 0 to 7: + * {\p b, \p a} = {{b7, b6, b5, b4}, {b3, b2, b1, b0}}. For each of the four bytes + * {b3, b2, b1, b0} selected in the return value, a 4-bit selector is defined within + * the four lower "nibbles" of \p index: {\p index } = {n7, n6, n5, n4, n3, n2, n1, n0} + * + * \par Snippet + * The code snippet below illustrates byte-permute. + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * int a = 0x03020100; + * int b = 0x07060504; + * int index = 0x00007531; + * + * int selected = PRMT(a, b, index); // 0x07050301 + * + * \endcode + * + */ +__device__ __forceinline__ int PRMT(unsigned int a, unsigned int b, unsigned int index) +{ + int ret; + asm ("prmt.b32 %0, %1, %2, %3;" : "=r"(ret) : "r"(a), "r"(b), "r"(index)); + return ret; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +/** + * Sync-threads barrier. + */ +__device__ __forceinline__ void BAR(int count) +{ + asm volatile("bar.sync 1, %0;" : : "r"(count)); +} + +/** + * CTA barrier + */ +__device__ __forceinline__ void CTA_SYNC() +{ + __syncthreads(); +} + + +/** + * CTA barrier with predicate + */ +__device__ __forceinline__ int CTA_SYNC_AND(int p) +{ + return __syncthreads_and(p); +} + + +/** + * Warp barrier + */ +__device__ __forceinline__ void WARP_SYNC(unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + __syncwarp(member_mask); +#endif +} + + +/** + * Warp any + */ +__device__ __forceinline__ int WARP_ANY(int predicate, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + return __any_sync(member_mask, predicate); +#else + return ::__any(predicate); +#endif +} + + +/** + * Warp any + */ +__device__ __forceinline__ int WARP_ALL(int predicate, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + return __all_sync(member_mask, predicate); +#else + return ::__all(predicate); +#endif +} + + +/** + * Warp ballot + */ +__device__ __forceinline__ int WARP_BALLOT(int predicate, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + return __ballot_sync(member_mask, predicate); +#else + return __ballot(predicate); +#endif +} + +/** + * Warp synchronous shfl_up + */ +__device__ __forceinline__ +unsigned int SHFL_UP_SYNC(unsigned int word, int src_offset, int flags, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile("shfl.sync.up.b32 %0, %1, %2, %3, %4;" + : "=r"(word) : "r"(word), "r"(src_offset), "r"(flags), "r"(member_mask)); +#else + asm volatile("shfl.up.b32 %0, %1, %2, %3;" + : "=r"(word) : "r"(word), "r"(src_offset), "r"(flags)); +#endif + return word; +} + +/** + * Warp synchronous shfl_down + */ +__device__ __forceinline__ +unsigned int SHFL_DOWN_SYNC(unsigned int word, int src_offset, int flags, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile("shfl.sync.down.b32 %0, %1, %2, %3, %4;" + : "=r"(word) : "r"(word), "r"(src_offset), "r"(flags), "r"(member_mask)); +#else + asm volatile("shfl.down.b32 %0, %1, %2, %3;" + : "=r"(word) : "r"(word), "r"(src_offset), "r"(flags)); +#endif + return word; +} + +/** + * Warp synchronous shfl_idx + */ +__device__ __forceinline__ +unsigned int SHFL_IDX_SYNC(unsigned int word, int src_lane, int flags, unsigned int member_mask) +{ +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile("shfl.sync.idx.b32 %0, %1, %2, %3, %4;" + : "=r"(word) : "r"(word), "r"(src_lane), "r"(flags), "r"(member_mask)); +#else + asm volatile("shfl.idx.b32 %0, %1, %2, %3;" + : "=r"(word) : "r"(word), "r"(src_lane), "r"(flags)); +#endif + return word; +} + +/** + * Floating point multiply. (Mantissa LSB rounds towards zero.) + */ +__device__ __forceinline__ float FMUL_RZ(float a, float b) +{ + float d; + asm ("mul.rz.f32 %0, %1, %2;" : "=f"(d) : "f"(a), "f"(b)); + return d; +} + + +/** + * Floating point multiply-add. (Mantissa LSB rounds towards zero.) + */ +__device__ __forceinline__ float FFMA_RZ(float a, float b, float c) +{ + float d; + asm ("fma.rz.f32 %0, %1, %2, %3;" : "=f"(d) : "f"(a), "f"(b), "f"(c)); + return d; +} + +#endif // DOXYGEN_SHOULD_SKIP_THIS + +/** + * \brief Terminates the calling thread + */ +__device__ __forceinline__ void ThreadExit() { + asm volatile("exit;"); +} + + +/** + * \brief Abort execution and generate an interrupt to the host CPU + */ +__device__ __forceinline__ void ThreadTrap() { + asm volatile("trap;"); +} + + +/** + * \brief Returns the row-major linear thread identifier for a multidimensional thread block + */ +__device__ __forceinline__ int RowMajorTid(int block_dim_x, int block_dim_y, int block_dim_z) +{ + return ((block_dim_z == 1) ? 0 : (threadIdx.z * block_dim_x * block_dim_y)) + + ((block_dim_y == 1) ? 0 : (threadIdx.y * block_dim_x)) + + threadIdx.x; +} + + +/** + * \brief Returns the warp lane ID of the calling thread + */ +__device__ __forceinline__ unsigned int LaneId() +{ + unsigned int ret; + asm ("mov.u32 %0, %%laneid;" : "=r"(ret) ); + return ret; +} + + +/** + * \brief Returns the warp ID of the calling thread. Warp ID is guaranteed to be unique among warps, but may not correspond to a zero-based ranking within the thread block. + */ +__device__ __forceinline__ unsigned int WarpId() +{ + unsigned int ret; + asm ("mov.u32 %0, %%warpid;" : "=r"(ret) ); + return ret; +} + +/** + * \brief Returns the warp lane mask of all lanes less than the calling thread + */ +__device__ __forceinline__ unsigned int LaneMaskLt() +{ + unsigned int ret; + asm ("mov.u32 %0, %%lanemask_lt;" : "=r"(ret) ); + return ret; +} + +/** + * \brief Returns the warp lane mask of all lanes less than or equal to the calling thread + */ +__device__ __forceinline__ unsigned int LaneMaskLe() +{ + unsigned int ret; + asm ("mov.u32 %0, %%lanemask_le;" : "=r"(ret) ); + return ret; +} + +/** + * \brief Returns the warp lane mask of all lanes greater than the calling thread + */ +__device__ __forceinline__ unsigned int LaneMaskGt() +{ + unsigned int ret; + asm ("mov.u32 %0, %%lanemask_gt;" : "=r"(ret) ); + return ret; +} + +/** + * \brief Returns the warp lane mask of all lanes greater than or equal to the calling thread + */ +__device__ __forceinline__ unsigned int LaneMaskGe() +{ + unsigned int ret; + asm ("mov.u32 %0, %%lanemask_ge;" : "=r"(ret) ); + return ret; +} + +/** @} */ // end group UtilPtx + + + + +/** + * \brief Shuffle-up for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei-src_offset. For thread lanes \e i < src_offset, the thread's own \p input is returned to the thread. ![](shfl_up_logo.png) + * \ingroup WarpModule + * + * \tparam LOGICAL_WARP_THREADS The number of threads per "logical" warp. Must be a power-of-two <= 32. + * \tparam T [inferred] The input/output element type + * + * \par + * - Available only for SM3.0 or newer + * + * \par Snippet + * The code snippet below illustrates each thread obtaining a \p double value from the + * predecessor of its predecessor. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Obtain one input item per thread + * double thread_data = ... + * + * // Obtain item from two ranks below + * double peer_data = ShuffleUp<32>(thread_data, 2, 0, 0xffffffff); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. + * The corresponding output \p peer_data will be {1.0, 2.0, 1.0, 2.0, 3.0, ..., 30.0}. + * + */ +template < + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + typename T> +__device__ __forceinline__ T ShuffleUp( + T input, ///< [in] The value to broadcast + int src_offset, ///< [in] The relative down-offset of the peer to read from + int first_thread, ///< [in] Index of first lane in logical warp (typically 0) + unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes +{ + /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up + enum { + SHFL_C = (32 - LOGICAL_WARP_THREADS) << 8 + }; + + typedef typename UnitWord::ShuffleWord ShuffleWord; + + const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); + + T output; + ShuffleWord *output_alias = reinterpret_cast(&output); + ShuffleWord *input_alias = reinterpret_cast(&input); + + unsigned int shuffle_word; + shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[0], src_offset, first_thread | SHFL_C, member_mask); + output_alias[0] = shuffle_word; + + #pragma unroll + for (int WORD = 1; WORD < WORDS; ++WORD) + { + shuffle_word = SHFL_UP_SYNC((unsigned int)input_alias[WORD], src_offset, first_thread | SHFL_C, member_mask); + output_alias[WORD] = shuffle_word; + } + + return output; +} + + +/** + * \brief Shuffle-down for any data type. Each warp-lanei obtains the value \p input contributed by warp-lanei+src_offset. For thread lanes \e i >= WARP_THREADS, the thread's own \p input is returned to the thread. ![](shfl_down_logo.png) + * \ingroup WarpModule + * + * \tparam LOGICAL_WARP_THREADS The number of threads per "logical" warp. Must be a power-of-two <= 32. + * \tparam T [inferred] The input/output element type + * + * \par + * - Available only for SM3.0 or newer + * + * \par Snippet + * The code snippet below illustrates each thread obtaining a \p double value from the + * successor of its successor. + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Obtain one input item per thread + * double thread_data = ... + * + * // Obtain item from two ranks below + * double peer_data = ShuffleDown<32>(thread_data, 2, 31, 0xffffffff); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. + * The corresponding output \p peer_data will be {3.0, 4.0, 5.0, 6.0, 7.0, ..., 32.0}. + * + */ +template < + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + typename T> +__device__ __forceinline__ T ShuffleDown( + T input, ///< [in] The value to broadcast + int src_offset, ///< [in] The relative up-offset of the peer to read from + int last_thread, ///< [in] Index of last thread in logical warp (typically 31 for a 32-thread warp) + unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes +{ + /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up + enum { + SHFL_C = (32 - LOGICAL_WARP_THREADS) << 8 + }; + + typedef typename UnitWord::ShuffleWord ShuffleWord; + + const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); + + T output; + ShuffleWord *output_alias = reinterpret_cast(&output); + ShuffleWord *input_alias = reinterpret_cast(&input); + + unsigned int shuffle_word; + shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[0], src_offset, last_thread | SHFL_C, member_mask); + output_alias[0] = shuffle_word; + + #pragma unroll + for (int WORD = 1; WORD < WORDS; ++WORD) + { + shuffle_word = SHFL_DOWN_SYNC((unsigned int)input_alias[WORD], src_offset, last_thread | SHFL_C, member_mask); + output_alias[WORD] = shuffle_word; + } + + return output; +} + + +/** + * \brief Shuffle-broadcast for any data type. Each warp-lanei obtains the value \p input + * contributed by warp-lanesrc_lane. For \p src_lane < 0 or \p src_lane >= WARP_THREADS, + * then the thread's own \p input is returned to the thread. ![](shfl_broadcast_logo.png) + * + * \tparam LOGICAL_WARP_THREADS The number of threads per "logical" warp. Must be a power-of-two <= 32. + * \tparam T [inferred] The input/output element type + * + * \ingroup WarpModule + * + * \par + * - Available only for SM3.0 or newer + * + * \par Snippet + * The code snippet below illustrates each thread obtaining a \p double value from warp-lane0. + * + * \par + * \code + * #include // or equivalently + * + * __global__ void ExampleKernel(...) + * { + * // Obtain one input item per thread + * double thread_data = ... + * + * // Obtain item from thread 0 + * double peer_data = ShuffleIndex<32>(thread_data, 0, 0xffffffff); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the first warp of threads is {1.0, 2.0, 3.0, 4.0, 5.0, ..., 32.0}. + * The corresponding output \p peer_data will be {1.0, 1.0, 1.0, 1.0, 1.0, ..., 1.0}. + * + */ +template < + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + typename T> +__device__ __forceinline__ T ShuffleIndex( + T input, ///< [in] The value to broadcast + int src_lane, ///< [in] Which warp lane is to do the broadcasting + unsigned int member_mask) ///< [in] 32-bit mask of participating warp lanes +{ + /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up + enum { + SHFL_C = ((32 - LOGICAL_WARP_THREADS) << 8) | (LOGICAL_WARP_THREADS - 1) + }; + + typedef typename UnitWord::ShuffleWord ShuffleWord; + + const int WORDS = (sizeof(T) + sizeof(ShuffleWord) - 1) / sizeof(ShuffleWord); + + T output; + ShuffleWord *output_alias = reinterpret_cast(&output); + ShuffleWord *input_alias = reinterpret_cast(&input); + + unsigned int shuffle_word; + shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[0], + src_lane, + SHFL_C, + member_mask); + + output_alias[0] = shuffle_word; + + #pragma unroll + for (int WORD = 1; WORD < WORDS; ++WORD) + { + shuffle_word = SHFL_IDX_SYNC((unsigned int)input_alias[WORD], + src_lane, + SHFL_C, + member_mask); + + output_alias[WORD] = shuffle_word; + } + + return output; +} + + + +/** + * Compute a 32b mask of threads having the same least-significant + * LABEL_BITS of \p label as the calling thread. + */ +template +inline __device__ unsigned int MatchAny(unsigned int label) +{ + unsigned int retval; + + // Extract masks of common threads for each bit + #pragma unroll + for (int BIT = 0; BIT < LABEL_BITS; ++BIT) + { + unsigned int mask; + unsigned int current_bit = 1 << BIT; + asm ("{\n" + " .reg .pred p;\n" + " and.b32 %0, %1, %2;" + " setp.eq.u32 p, %0, %2;\n" +#ifdef CUB_USE_COOPERATIVE_GROUPS + " vote.ballot.sync.b32 %0, p, 0xffffffff;\n" +#else + " vote.ballot.b32 %0, p;\n" +#endif + " @!p not.b32 %0, %0;\n" + "}\n" : "=r"(mask) : "r"(label), "r"(current_bit)); + + // Remove peers who differ + retval = (BIT == 0) ? mask : retval & mask; + } + + return retval; + +// // VOLTA match +// unsigned int retval; +// asm ("{\n" +// " match.any.sync.b32 %0, %1, 0xffffffff;\n" +// "}\n" : "=r"(retval) : "r"(label)); +// return retval; + +} + + + + + + + + + + + + + + + + + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_type.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_type.cuh new file mode 100644 index 0000000..0ba41e1 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/util_type.cuh @@ -0,0 +1,1167 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * Common type manipulation (metaprogramming) utilities + */ + +#pragma once + +#include +#include +#include + +#if (__CUDACC_VER_MAJOR__ >= 9) + #include +#endif + +#include "util_macro.cuh" +#include "util_arch.cuh" +#include "util_namespace.cuh" + + + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup UtilModule + * @{ + */ + + + +/****************************************************************************** + * Type equality + ******************************************************************************/ + +/** + * \brief Type selection (IF ? ThenType : ElseType) + */ +template +struct If +{ + /// Conditional type result + typedef ThenType Type; // true +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct If +{ + typedef ElseType Type; // false +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + +/****************************************************************************** + * Conditional types + ******************************************************************************/ + +/** + * \brief Type equality test + */ +template +struct Equals +{ + enum { + VALUE = 0, + NEGATE = 1 + }; +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct Equals +{ + enum { + VALUE = 1, + NEGATE = 0 + }; +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/****************************************************************************** + * Static math + ******************************************************************************/ + +/** + * \brief Statically determine log2(N), rounded up. + * + * For example: + * Log2<8>::VALUE // 3 + * Log2<3>::VALUE // 2 + */ +template +struct Log2 +{ + /// Static logarithm value + enum { VALUE = Log2> 1), COUNT + 1>::VALUE }; // Inductive case +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct Log2 +{ + enum {VALUE = (1 << (COUNT - 1) < N) ? // Base case + COUNT : + COUNT - 1 }; +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** + * \brief Statically determine if N is a power-of-two + */ +template +struct PowerOfTwo +{ + enum { VALUE = ((N & (N - 1)) == 0) }; +}; + + + +/****************************************************************************** + * Pointer vs. iterator detection + ******************************************************************************/ + +/** + * \brief Pointer vs. iterator + */ +template +struct IsPointer +{ + enum { VALUE = 0 }; +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct IsPointer +{ + enum { VALUE = 1 }; +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + +/****************************************************************************** + * Qualifier detection + ******************************************************************************/ + +/** + * \brief Volatile modifier test + */ +template +struct IsVolatile +{ + enum { VALUE = 0 }; +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct IsVolatile +{ + enum { VALUE = 1 }; +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/****************************************************************************** + * Qualifier removal + ******************************************************************************/ + +/** + * \brief Removes \p const and \p volatile qualifiers from type \p Tp. + * + * For example: + * typename RemoveQualifiers::Type // int; + */ +template +struct RemoveQualifiers +{ + /// Type without \p const and \p volatile qualifiers + typedef Up Type; +}; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +template +struct RemoveQualifiers +{ + typedef Up Type; +}; + +template +struct RemoveQualifiers +{ + typedef Up Type; +}; + +template +struct RemoveQualifiers +{ + typedef Up Type; +}; + + +/****************************************************************************** + * Marker types + ******************************************************************************/ + +/** + * \brief A simple "NULL" marker type + */ +struct NullType +{ +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + template + __host__ __device__ __forceinline__ NullType& operator =(const T&) { return *this; } + + __host__ __device__ __forceinline__ bool operator ==(const NullType&) { return true; } + + __host__ __device__ __forceinline__ bool operator !=(const NullType&) { return false; } + +#endif // DOXYGEN_SHOULD_SKIP_THIS +}; + + +/** + * \brief Allows for the treatment of an integral constant as a type at compile-time (e.g., to achieve static call dispatch based on constant integral values) + */ +template +struct Int2Type +{ + enum {VALUE = A}; +}; + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/****************************************************************************** + * Size and alignment + ******************************************************************************/ + +/// Structure alignment +template +struct AlignBytes +{ + struct Pad + { + T val; + char byte; + }; + + enum + { + /// The "true CUDA" alignment of T in bytes + ALIGN_BYTES = sizeof(Pad) - sizeof(T) + }; + + /// The "truly aligned" type + typedef T Type; +}; + +// Specializations where host C++ compilers (e.g., 32-bit Windows) may disagree +// with device C++ compilers (EDG) on types passed as template parameters through +// kernel functions + +#define __CUB_ALIGN_BYTES(t, b) \ + template <> struct AlignBytes \ + { enum { ALIGN_BYTES = b }; typedef __align__(b) t Type; }; + +__CUB_ALIGN_BYTES(short4, 8) +__CUB_ALIGN_BYTES(ushort4, 8) +__CUB_ALIGN_BYTES(int2, 8) +__CUB_ALIGN_BYTES(uint2, 8) +__CUB_ALIGN_BYTES(long long, 8) +__CUB_ALIGN_BYTES(unsigned long long, 8) +__CUB_ALIGN_BYTES(float2, 8) +__CUB_ALIGN_BYTES(double, 8) +#ifdef _WIN32 + __CUB_ALIGN_BYTES(long2, 8) + __CUB_ALIGN_BYTES(ulong2, 8) +#else + __CUB_ALIGN_BYTES(long2, 16) + __CUB_ALIGN_BYTES(ulong2, 16) +#endif +__CUB_ALIGN_BYTES(int4, 16) +__CUB_ALIGN_BYTES(uint4, 16) +__CUB_ALIGN_BYTES(float4, 16) +__CUB_ALIGN_BYTES(long4, 16) +__CUB_ALIGN_BYTES(ulong4, 16) +__CUB_ALIGN_BYTES(longlong2, 16) +__CUB_ALIGN_BYTES(ulonglong2, 16) +__CUB_ALIGN_BYTES(double2, 16) +__CUB_ALIGN_BYTES(longlong4, 16) +__CUB_ALIGN_BYTES(ulonglong4, 16) +__CUB_ALIGN_BYTES(double4, 16) + +template struct AlignBytes : AlignBytes {}; +template struct AlignBytes : AlignBytes {}; +template struct AlignBytes : AlignBytes {}; + + +/// Unit-words of data movement +template +struct UnitWord +{ + enum { + ALIGN_BYTES = AlignBytes::ALIGN_BYTES + }; + + template + struct IsMultiple + { + enum { + UNIT_ALIGN_BYTES = AlignBytes::ALIGN_BYTES, + IS_MULTIPLE = (sizeof(T) % sizeof(Unit) == 0) && (ALIGN_BYTES % UNIT_ALIGN_BYTES == 0) + }; + }; + + /// Biggest shuffle word that T is a whole multiple of and is not larger than the alignment of T + typedef typename If::IS_MULTIPLE, + unsigned int, + typename If::IS_MULTIPLE, + unsigned short, + unsigned char>::Type>::Type ShuffleWord; + + /// Biggest volatile word that T is a whole multiple of and is not larger than the alignment of T + typedef typename If::IS_MULTIPLE, + unsigned long long, + ShuffleWord>::Type VolatileWord; + + /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T + typedef typename If::IS_MULTIPLE, + ulonglong2, + VolatileWord>::Type DeviceWord; + + /// Biggest texture reference word that T is a whole multiple of and is not larger than the alignment of T + typedef typename If::IS_MULTIPLE, + uint4, + typename If::IS_MULTIPLE, + uint2, + ShuffleWord>::Type>::Type TextureWord; +}; + + +// float2 specialization workaround (for SM10-SM13) +template <> +struct UnitWord +{ + typedef int ShuffleWord; +#if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) + typedef float VolatileWord; + typedef uint2 DeviceWord; +#else + typedef unsigned long long VolatileWord; + typedef unsigned long long DeviceWord; +#endif + typedef float2 TextureWord; +}; + +// float4 specialization workaround (for SM10-SM13) +template <> +struct UnitWord +{ + typedef int ShuffleWord; +#if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) + typedef float VolatileWord; + typedef uint4 DeviceWord; +#else + typedef unsigned long long VolatileWord; + typedef ulonglong2 DeviceWord; +#endif + typedef float4 TextureWord; +}; + + +// char2 specialization workaround (for SM10-SM13) +template <> +struct UnitWord +{ + typedef unsigned short ShuffleWord; +#if (CUB_PTX_ARCH > 0) && (CUB_PTX_ARCH <= 130) + typedef unsigned short VolatileWord; + typedef short DeviceWord; +#else + typedef unsigned short VolatileWord; + typedef unsigned short DeviceWord; +#endif + typedef unsigned short TextureWord; +}; + + +template struct UnitWord : UnitWord {}; +template struct UnitWord : UnitWord {}; +template struct UnitWord : UnitWord {}; + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + +/****************************************************************************** + * Vector type inference utilities. + ******************************************************************************/ + +/** + * \brief Exposes a member typedef \p Type that names the corresponding CUDA vector type if one exists. Otherwise \p Type refers to the CubVector structure itself, which will wrap the corresponding \p x, \p y, etc. vector fields. + */ +template struct CubVector; + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + +enum +{ + /// The maximum number of elements in CUDA vector types + MAX_VEC_ELEMENTS = 4, +}; + + +/** + * Generic vector-1 type + */ +template +struct CubVector +{ + T x; + + typedef T BaseType; + typedef CubVector Type; +}; + +/** + * Generic vector-2 type + */ +template +struct CubVector +{ + T x; + T y; + + typedef T BaseType; + typedef CubVector Type; +}; + +/** + * Generic vector-3 type + */ +template +struct CubVector +{ + T x; + T y; + T z; + + typedef T BaseType; + typedef CubVector Type; +}; + +/** + * Generic vector-4 type + */ +template +struct CubVector +{ + T x; + T y; + T z; + T w; + + typedef T BaseType; + typedef CubVector Type; +}; + + +/** + * Macro for expanding partially-specialized built-in vector types + */ +#define CUB_DEFINE_VECTOR_TYPE(base_type,short_type) \ + \ + template<> struct CubVector : short_type##1 \ + { \ + typedef base_type BaseType; \ + typedef short_type##1 Type; \ + __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x + other.x; \ + return retval; \ + } \ + __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x - other.x; \ + return retval; \ + } \ + }; \ + \ + template<> struct CubVector : short_type##2 \ + { \ + typedef base_type BaseType; \ + typedef short_type##2 Type; \ + __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x + other.x; \ + retval.y = y + other.y; \ + return retval; \ + } \ + __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x - other.x; \ + retval.y = y - other.y; \ + return retval; \ + } \ + }; \ + \ + template<> struct CubVector : short_type##3 \ + { \ + typedef base_type BaseType; \ + typedef short_type##3 Type; \ + __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x + other.x; \ + retval.y = y + other.y; \ + retval.z = z + other.z; \ + return retval; \ + } \ + __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x - other.x; \ + retval.y = y - other.y; \ + retval.z = z - other.z; \ + return retval; \ + } \ + }; \ + \ + template<> struct CubVector : short_type##4 \ + { \ + typedef base_type BaseType; \ + typedef short_type##4 Type; \ + __host__ __device__ __forceinline__ CubVector operator+(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x + other.x; \ + retval.y = y + other.y; \ + retval.z = z + other.z; \ + retval.w = w + other.w; \ + return retval; \ + } \ + __host__ __device__ __forceinline__ CubVector operator-(const CubVector &other) const { \ + CubVector retval; \ + retval.x = x - other.x; \ + retval.y = y - other.y; \ + retval.z = z - other.z; \ + retval.w = w - other.w; \ + return retval; \ + } \ + }; + + + +// Expand CUDA vector types for built-in primitives +CUB_DEFINE_VECTOR_TYPE(char, char) +CUB_DEFINE_VECTOR_TYPE(signed char, char) +CUB_DEFINE_VECTOR_TYPE(short, short) +CUB_DEFINE_VECTOR_TYPE(int, int) +CUB_DEFINE_VECTOR_TYPE(long, long) +CUB_DEFINE_VECTOR_TYPE(long long, longlong) +CUB_DEFINE_VECTOR_TYPE(unsigned char, uchar) +CUB_DEFINE_VECTOR_TYPE(unsigned short, ushort) +CUB_DEFINE_VECTOR_TYPE(unsigned int, uint) +CUB_DEFINE_VECTOR_TYPE(unsigned long, ulong) +CUB_DEFINE_VECTOR_TYPE(unsigned long long, ulonglong) +CUB_DEFINE_VECTOR_TYPE(float, float) +CUB_DEFINE_VECTOR_TYPE(double, double) +CUB_DEFINE_VECTOR_TYPE(bool, uchar) + +// Undefine macros +#undef CUB_DEFINE_VECTOR_TYPE + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + + +/****************************************************************************** + * Wrapper types + ******************************************************************************/ + +/** + * \brief A storage-backing wrapper that allows types with non-trivial constructors to be aliased in unions + */ +template +struct Uninitialized +{ + /// Biggest memory-access word that T is a whole multiple of and is not larger than the alignment of T + typedef typename UnitWord::DeviceWord DeviceWord; + + enum + { + WORDS = sizeof(T) / sizeof(DeviceWord) + }; + + /// Backing storage + DeviceWord storage[WORDS]; + + /// Alias + __host__ __device__ __forceinline__ T& Alias() + { + return reinterpret_cast(*this); + } +}; + + +/** + * \brief A key identifier paired with a corresponding value + */ +template < + typename _Key, + typename _Value +#if defined(_WIN32) && !defined(_WIN64) + , bool KeyIsLT = (AlignBytes<_Key>::ALIGN_BYTES < AlignBytes<_Value>::ALIGN_BYTES) + , bool ValIsLT = (AlignBytes<_Value>::ALIGN_BYTES < AlignBytes<_Key>::ALIGN_BYTES) +#endif // #if defined(_WIN32) && !defined(_WIN64) + > +struct KeyValuePair +{ + typedef _Key Key; ///< Key data type + typedef _Value Value; ///< Value data type + + Key key; ///< Item key + Value value; ///< Item value + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair() {} + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} + + /// Inequality operator + __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) + { + return (value != b.value) || (key != b.key); + } +}; + +#if defined(_WIN32) && !defined(_WIN64) + +/** + * Win32 won't do 16B alignment. This can present two problems for + * should-be-16B-aligned (but actually 8B aligned) built-in and intrinsics members: + * 1) If a smaller-aligned item were to be listed first, the host compiler places the + * should-be-16B item at too early an offset (and disagrees with device compiler) + * 2) Or, if a smaller-aligned item lists second, the host compiler gets the size + * of the struct wrong (and disagrees with device compiler) + * + * So we put the larger-should-be-aligned item first, and explicitly pad the + * end of the struct + */ + +/// Smaller key specialization +template +struct KeyValuePair +{ + typedef K Key; + typedef V Value; + + typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; + + Value value; // Value has larger would-be alignment and goes first + Key key; + Pad pad; + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair() {} + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} + + /// Inequality operator + __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) + { + return (value != b.value) || (key != b.key); + } +}; + + +/// Smaller value specialization +template +struct KeyValuePair +{ + typedef K Key; + typedef V Value; + + typedef char Pad[AlignBytes::ALIGN_BYTES - AlignBytes::ALIGN_BYTES]; + + Key key; // Key has larger would-be alignment and goes first + Value value; + Pad pad; + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair() {} + + /// Constructor + __host__ __device__ __forceinline__ + KeyValuePair(Key const& key, Value const& value) : key(key), value(value) {} + + /// Inequality operator + __host__ __device__ __forceinline__ bool operator !=(const KeyValuePair &b) + { + return (value != b.value) || (key != b.key); + } +}; + +#endif // #if defined(_WIN32) && !defined(_WIN64) + + +#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + +/** + * \brief A wrapper for passing simple static arrays as kernel parameters + */ +template +struct ArrayWrapper +{ + + /// Statically-sized array of type \p T + T array[COUNT]; + + /// Constructor + __host__ __device__ __forceinline__ ArrayWrapper() {} +}; + +#endif // DOXYGEN_SHOULD_SKIP_THIS + +/** + * \brief Double-buffer storage wrapper for multi-pass stream transformations that require more than one storage array for streaming intermediate results back and forth. + * + * Many multi-pass computations require a pair of "ping-pong" storage + * buffers (e.g., one for reading from and the other for writing to, and then + * vice-versa for the subsequent pass). This structure wraps a set of device + * buffers and a "selector" member to track which is "current". + */ +template +struct DoubleBuffer +{ + /// Pair of device buffer pointers + T *d_buffers[2]; + + /// Selector into \p d_buffers (i.e., the active/valid buffer) + int selector; + + /// \brief Constructor + __host__ __device__ __forceinline__ DoubleBuffer() + { + selector = 0; + d_buffers[0] = NULL; + d_buffers[1] = NULL; + } + + /// \brief Constructor + __host__ __device__ __forceinline__ DoubleBuffer( + T *d_current, ///< The currently valid buffer + T *d_alternate) ///< Alternate storage buffer of the same size as \p d_current + { + selector = 0; + d_buffers[0] = d_current; + d_buffers[1] = d_alternate; + } + + /// \brief Return pointer to the currently valid buffer + __host__ __device__ __forceinline__ T* Current() { return d_buffers[selector]; } + + /// \brief Return pointer to the currently invalid buffer + __host__ __device__ __forceinline__ T* Alternate() { return d_buffers[selector ^ 1]; } + +}; + + + +/****************************************************************************** + * Typedef-detection + ******************************************************************************/ + + +/** + * \brief Defines a structure \p detector_name that is templated on type \p T. The \p detector_name struct exposes a constant member \p VALUE indicating whether or not parameter \p T exposes a nested type \p nested_type_name + */ +#define CUB_DEFINE_DETECT_NESTED_TYPE(detector_name, nested_type_name) \ + template \ + struct detector_name \ + { \ + template \ + static char& test(typename C::nested_type_name*); \ + template \ + static int& test(...); \ + enum \ + { \ + VALUE = sizeof(test(0)) < sizeof(int) \ + }; \ + }; + + + +/****************************************************************************** + * Simple enable-if (similar to Boost) + ******************************************************************************/ + +/** + * \brief Simple enable-if (similar to Boost) + */ +template +struct EnableIf +{ + /// Enable-if type for SFINAE dummy variables + typedef T Type; +}; + + +template +struct EnableIf {}; + + + +/****************************************************************************** + * Typedef-detection + ******************************************************************************/ + +/** + * \brief Determine whether or not BinaryOp's functor is of the form bool operator()(const T& a, const T&b) or bool operator()(const T& a, const T&b, unsigned int idx) + */ +template +struct BinaryOpHasIdxParam +{ +private: +/* + template struct SFINAE1 {}; + template struct SFINAE2 {}; + template struct SFINAE3 {}; + template struct SFINAE4 {}; +*/ + template struct SFINAE5 {}; + template struct SFINAE6 {}; + template struct SFINAE7 {}; + template struct SFINAE8 {}; +/* + template static char Test(SFINAE1 *); + template static char Test(SFINAE2 *); + template static char Test(SFINAE3 *); + template static char Test(SFINAE4 *); +*/ + template __host__ __device__ static char Test(SFINAE5 *); + template __host__ __device__ static char Test(SFINAE6 *); + template __host__ __device__ static char Test(SFINAE7 *); + template __host__ __device__ static char Test(SFINAE8 *); + + template static int Test(...); + +public: + + /// Whether the functor BinaryOp has a third unsigned int index param + static const bool HAS_PARAM = sizeof(Test(NULL)) == sizeof(char); +}; + + + + +/****************************************************************************** + * Simple type traits utilities. + * + * For example: + * Traits::CATEGORY // SIGNED_INTEGER + * Traits::NULL_TYPE // true + * Traits::CATEGORY // NOT_A_NUMBER + * Traits::PRIMITIVE; // false + * + ******************************************************************************/ + +/** + * \brief Basic type traits categories + */ +enum Category +{ + NOT_A_NUMBER, + SIGNED_INTEGER, + UNSIGNED_INTEGER, + FLOATING_POINT +}; + + +/** + * \brief Basic type traits + */ +template +struct BaseTraits +{ + /// Category + static const Category CATEGORY = _CATEGORY; + enum + { + PRIMITIVE = _PRIMITIVE, + NULL_TYPE = _NULL_TYPE, + }; +}; + + +/** + * Basic type traits (unsigned primitive specialization) + */ +template +struct BaseTraits +{ + typedef _UnsignedBits UnsignedBits; + + static const Category CATEGORY = UNSIGNED_INTEGER; + static const UnsignedBits LOWEST_KEY = UnsignedBits(0); + static const UnsignedBits MAX_KEY = UnsignedBits(-1); + + enum + { + PRIMITIVE = true, + NULL_TYPE = false, + }; + + + static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) + { + return key; + } + + static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) + { + return key; + } + + static __host__ __device__ __forceinline__ T Max() + { + UnsignedBits retval = MAX_KEY; + return reinterpret_cast(retval); + } + + static __host__ __device__ __forceinline__ T Lowest() + { + UnsignedBits retval = LOWEST_KEY; + return reinterpret_cast(retval); + } +}; + + +/** + * Basic type traits (signed primitive specialization) + */ +template +struct BaseTraits +{ + typedef _UnsignedBits UnsignedBits; + + static const Category CATEGORY = SIGNED_INTEGER; + static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); + static const UnsignedBits LOWEST_KEY = HIGH_BIT; + static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; + + enum + { + PRIMITIVE = true, + NULL_TYPE = false, + }; + + static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) + { + return key ^ HIGH_BIT; + }; + + static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) + { + return key ^ HIGH_BIT; + }; + + static __host__ __device__ __forceinline__ T Max() + { + UnsignedBits retval = MAX_KEY; + return reinterpret_cast(retval); + } + + static __host__ __device__ __forceinline__ T Lowest() + { + UnsignedBits retval = LOWEST_KEY; + return reinterpret_cast(retval); + } +}; + +template +struct FpLimits; + +template <> +struct FpLimits +{ + static __host__ __device__ __forceinline__ float Max() { + return FLT_MAX; + } + + static __host__ __device__ __forceinline__ float Lowest() { + return FLT_MAX * float(-1); + } +}; + +template <> +struct FpLimits +{ + static __host__ __device__ __forceinline__ double Max() { + return DBL_MAX; + } + + static __host__ __device__ __forceinline__ double Lowest() { + return DBL_MAX * double(-1); + } +}; + + +#if (__CUDACC_VER_MAJOR__ >= 9) +template <> +struct FpLimits<__half> +{ + static __host__ __device__ __forceinline__ __half Max() { + unsigned short max_word = 0x7BFF; + return reinterpret_cast<__half&>(max_word); + } + + static __host__ __device__ __forceinline__ __half Lowest() { + unsigned short lowest_word = 0xFBFF; + return reinterpret_cast<__half&>(lowest_word); + } +}; +#endif + + +/** + * Basic type traits (fp primitive specialization) + */ +template +struct BaseTraits +{ + typedef _UnsignedBits UnsignedBits; + + static const Category CATEGORY = FLOATING_POINT; + static const UnsignedBits HIGH_BIT = UnsignedBits(1) << ((sizeof(UnsignedBits) * 8) - 1); + static const UnsignedBits LOWEST_KEY = UnsignedBits(-1); + static const UnsignedBits MAX_KEY = UnsignedBits(-1) ^ HIGH_BIT; + + enum + { + PRIMITIVE = true, + NULL_TYPE = false, + }; + + static __device__ __forceinline__ UnsignedBits TwiddleIn(UnsignedBits key) + { + UnsignedBits mask = (key & HIGH_BIT) ? UnsignedBits(-1) : HIGH_BIT; + return key ^ mask; + }; + + static __device__ __forceinline__ UnsignedBits TwiddleOut(UnsignedBits key) + { + UnsignedBits mask = (key & HIGH_BIT) ? HIGH_BIT : UnsignedBits(-1); + return key ^ mask; + }; + + static __host__ __device__ __forceinline__ T Max() { + return FpLimits::Max(); + } + + static __host__ __device__ __forceinline__ T Lowest() { + return FpLimits::Lowest(); + } +}; + + +/** + * \brief Numeric type traits + */ +template struct NumericTraits : BaseTraits {}; + +template <> struct NumericTraits : BaseTraits {}; + +template <> struct NumericTraits : BaseTraits<(std::numeric_limits::is_signed) ? SIGNED_INTEGER : UNSIGNED_INTEGER, true, false, unsigned char, char> {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; + +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; + +template <> struct NumericTraits : BaseTraits {}; +template <> struct NumericTraits : BaseTraits {}; +#if (__CUDACC_VER_MAJOR__ >= 9) + template <> struct NumericTraits<__half> : BaseTraits {}; +#endif + +template <> struct NumericTraits : BaseTraits::VolatileWord, bool> {}; + + + +/** + * \brief Type traits + */ +template +struct Traits : NumericTraits::Type> {}; + + +#endif // DOXYGEN_SHOULD_SKIP_THIS + + +/** @} */ // end group UtilModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_shfl.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_shfl.cuh new file mode 100644 index 0000000..bbbf37e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_shfl.cuh @@ -0,0 +1,541 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "../../thread/thread_operators.cuh" +#include "../../util_ptx.cuh" +#include "../../util_type.cuh" +#include "../../util_macro.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \brief WarpReduceShfl provides SHFL-based variants of parallel reduction of items partitioned across a CUDA thread warp. + * + * LOGICAL_WARP_THREADS must be a power-of-two + */ +template < + typename T, ///< Data type being reduced + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct WarpReduceShfl +{ + //--------------------------------------------------------------------- + // Constants and type definitions + //--------------------------------------------------------------------- + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// The number of warp reduction steps + STEPS = Log2::VALUE, + + /// Number of logical warps in a PTX warp + LOGICAL_WARPS = CUB_WARP_THREADS(PTX_ARCH) / LOGICAL_WARP_THREADS, + + /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up + SHFL_C = (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS) << 8 + + }; + + template + struct IsInteger + { + enum { + ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange + IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) + }; + }; + + + /// Shared memory storage layout type + typedef NullType TempStorage; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + /// Lane index in logical warp + unsigned int lane_id; + + /// Logical warp index in 32-thread physical warp + unsigned int warp_id; + + /// 32-thread physical warp member mask of logical warp + unsigned int member_mask; + + + //--------------------------------------------------------------------- + // Construction + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ WarpReduceShfl( + TempStorage &/*temp_storage*/) + { + lane_id = LaneId(); + warp_id = 0; + member_mask = 0xffffffffu >> (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS); + + if (!IS_ARCH_WARP) + { + warp_id = lane_id / LOGICAL_WARP_THREADS; + lane_id = lane_id % LOGICAL_WARP_THREADS; + member_mask = member_mask << (warp_id * LOGICAL_WARP_THREADS); + } + } + + + //--------------------------------------------------------------------- + // Reduction steps + //--------------------------------------------------------------------- + + /// Reduction (specialized for summation across uint32 types) + __device__ __forceinline__ unsigned int ReduceStep( + unsigned int input, ///< [in] Calling thread's input item. + cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + unsigned int output; + int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 r0;" + " .reg .pred p;" + " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" + " @p add.u32 r0, r0, %4;" + " mov.u32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 r0;" + " .reg .pred p;" + " shfl.down.b32 r0|p, %1, %2, %3;" + " @p add.u32 r0, r0, %4;" + " mov.u32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); +#endif + + return output; + } + + + /// Reduction (specialized for summation across fp32 types) + __device__ __forceinline__ float ReduceStep( + float input, ///< [in] Calling thread's input item. + cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + float output; + int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .f32 r0;" + " .reg .pred p;" + " shfl.sync.down.b32 r0|p, %1, %2, %3, %5;" + " @p add.f32 r0, r0, %4;" + " mov.f32 %0, r0;" + "}" + : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .f32 r0;" + " .reg .pred p;" + " shfl.down.b32 r0|p, %1, %2, %3;" + " @p add.f32 r0, r0, %4;" + " mov.f32 %0, r0;" + "}" + : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input)); +#endif + + return output; + } + + + /// Reduction (specialized for summation across unsigned long long types) + __device__ __forceinline__ unsigned long long ReduceStep( + unsigned long long input, ///< [in] Calling thread's input item. + cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + unsigned long long output; + int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane) + +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" + " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" + " mov.b64 %0, {lo, hi};" + " @p add.u64 %0, %0, %1;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.down.b32 lo|p, lo, %2, %3;" + " shfl.down.b32 hi|p, hi, %2, %3;" + " mov.b64 %0, {lo, hi};" + " @p add.u64 %0, %0, %1;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c)); +#endif + + return output; + } + + + /// Reduction (specialized for summation across long long types) + __device__ __forceinline__ long long ReduceStep( + long long input, ///< [in] Calling thread's input item. + cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + long long output; + int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" + " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" + " mov.b64 %0, {lo, hi};" + " @p add.s64 %0, %0, %1;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.down.b32 lo|p, lo, %2, %3;" + " shfl.down.b32 hi|p, hi, %2, %3;" + " mov.b64 %0, {lo, hi};" + " @p add.s64 %0, %0, %1;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c)); +#endif + + return output; + } + + + /// Reduction (specialized for summation across double types) + __device__ __forceinline__ double ReduceStep( + double input, ///< [in] Calling thread's input item. + cub::Sum /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + double output; + int shfl_c = last_lane | SHFL_C; // Shuffle control (mask and last_lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " .reg .f64 r0;" + " mov.b64 %0, %1;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.down.b32 lo|p, lo, %2, %3, %4;" + " shfl.sync.down.b32 hi|p, hi, %2, %3, %4;" + " mov.b64 r0, {lo, hi};" + " @p add.f64 %0, %0, r0;" + "}" + : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " .reg .f64 r0;" + " mov.b64 %0, %1;" + " mov.b64 {lo, hi}, %1;" + " shfl.down.b32 lo|p, lo, %2, %3;" + " shfl.down.b32 hi|p, hi, %2, %3;" + " mov.b64 r0, {lo, hi};" + " @p add.f64 %0, %0, r0;" + "}" + : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c)); +#endif + + return output; + } + + + /// Reduction (specialized for swizzled ReduceByKeyOp across KeyValuePair types) + template + __device__ __forceinline__ KeyValuePair ReduceStep( + KeyValuePair input, ///< [in] Calling thread's input item. + SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + KeyValuePair output; + + KeyT other_key = ShuffleDown(input.key, offset, last_lane, member_mask); + + output.key = input.key; + output.value = ReduceStep( + input.value, + cub::Sum(), + last_lane, + offset, + Int2Type::IS_SMALL_UNSIGNED>()); + + if (input.key != other_key) + output.value = input.value; + + return output; + } + + + + /// Reduction (specialized for swizzled ReduceBySegmentOp across KeyValuePair types) + template + __device__ __forceinline__ KeyValuePair ReduceStep( + KeyValuePair input, ///< [in] Calling thread's input item. + SwizzleScanOp > /*reduction_op*/, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + KeyValuePair output; + + output.value = ReduceStep(input.value, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); + output.key = ReduceStep(input.key, cub::Sum(), last_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); + + if (input.key > 0) + output.value = input.value; + + return output; + } + + + /// Reduction step (generic) + template + __device__ __forceinline__ _T ReduceStep( + _T input, ///< [in] Calling thread's input item. + ReductionOp reduction_op, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset) ///< [in] Up-offset to pull from + { + _T output = input; + + _T temp = ShuffleDown(output, offset, last_lane, member_mask); + + // Perform reduction op if valid + if (offset + lane_id <= last_lane) + output = reduction_op(input, temp); + + return output; + } + + + /// Reduction step (specialized for small unsigned integers size 32b or less) + template + __device__ __forceinline__ _T ReduceStep( + _T input, ///< [in] Calling thread's input item. + ReductionOp reduction_op, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset, ///< [in] Up-offset to pull from + Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer + { + return ReduceStep(input, reduction_op, last_lane, offset); + } + + + /// Reduction step (specialized for types other than small unsigned integers size 32b or less) + template + __device__ __forceinline__ _T ReduceStep( + _T input, ///< [in] Calling thread's input item. + ReductionOp reduction_op, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + int offset, ///< [in] Up-offset to pull from + Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small unsigned integer + { + return ReduceStep(input, reduction_op, last_lane, offset); + } + + + //--------------------------------------------------------------------- + // Templated inclusive scan iteration + //--------------------------------------------------------------------- + + template + __device__ __forceinline__ void ReduceStep( + T& input, ///< [in] Calling thread's input item. + ReductionOp reduction_op, ///< [in] Binary reduction operator + int last_lane, ///< [in] Index of last lane in segment + Int2Type /*step*/) + { + input = ReduceStep(input, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); + + ReduceStep(input, reduction_op, last_lane, Int2Type()); + } + + template + __device__ __forceinline__ void ReduceStep( + T& /*input*/, ///< [in] Calling thread's input item. + ReductionOp /*reduction_op*/, ///< [in] Binary reduction operator + int /*last_lane*/, ///< [in] Index of last lane in segment + Int2Type /*step*/) + {} + + + //--------------------------------------------------------------------- + // Reduction operations + //--------------------------------------------------------------------- + + /// Reduction + template < + bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + int valid_items, ///< [in] Total number of valid items across the logical warp + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + int last_lane = (ALL_LANES_VALID) ? + LOGICAL_WARP_THREADS - 1 : + valid_items - 1; + + T output = input; + +// // Iterate reduction steps +// #pragma unroll +// for (int STEP = 0; STEP < STEPS; STEP++) +// { +// output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); +// } + + // Template-iterate reduction steps + ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); + + return output; + } + + + /// Segmented reduction + template < + bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail + typename FlagT, + typename ReductionOp> + __device__ __forceinline__ T SegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + // Get the start flags for each thread in the warp. + int warp_flags = WARP_BALLOT(flag, member_mask); + + // Convert to tail-segmented + if (HEAD_SEGMENTED) + warp_flags >>= 1; + + // Mask out the bits below the current thread + warp_flags &= LaneMaskGe(); + + // Mask of physical lanes outside the logical warp and convert to logical lanemask + if (!IS_ARCH_WARP) + { + warp_flags = (warp_flags & member_mask) >> (warp_id * LOGICAL_WARP_THREADS); + } + + // Mask in the last lane of logical warp + warp_flags |= 1u << (LOGICAL_WARP_THREADS - 1); + + // Find the next set flag + int last_lane = __clz(__brev(warp_flags)); + + T output = input; + +// // Iterate reduction steps +// #pragma unroll +// for (int STEP = 0; STEP < STEPS; STEP++) +// { +// output = ReduceStep(output, reduction_op, last_lane, 1 << STEP, Int2Type::IS_SMALL_UNSIGNED>()); +// } + + // Template-iterate reduction steps + ReduceStep(output, reduction_op, last_lane, Int2Type<0>()); + + return output; + } +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_smem.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_smem.cuh new file mode 100644 index 0000000..7baa573 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_reduce_smem.cuh @@ -0,0 +1,372 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "../../thread/thread_operators.cuh" +#include "../../thread/thread_load.cuh" +#include "../../thread/thread_store.cuh" +#include "../../util_type.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief WarpReduceSmem provides smem-based variants of parallel reduction of items partitioned across a CUDA thread warp. + */ +template < + typename T, ///< Data type being reduced + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct WarpReduceSmem +{ + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// Whether the logical warp size is a power-of-two + IS_POW_OF_TWO = PowerOfTwo::VALUE, + + /// The number of warp scan steps + STEPS = Log2::VALUE, + + /// The number of threads in half a warp + HALF_WARP_THREADS = 1 << (STEPS - 1), + + /// The number of shared memory elements per warp + WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, + + /// FlagT status (when not using ballot) + UNSET = 0x0, // Is initially unset + SET = 0x1, // Is initially set + SEEN = 0x2, // Has seen another head flag from a successor peer + }; + + /// Shared memory flag type + typedef unsigned char SmemFlag; + + /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) + struct _TempStorage + { + T reduce[WARP_SMEM_ELEMENTS]; + SmemFlag flags[WARP_SMEM_ELEMENTS]; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + _TempStorage &temp_storage; + unsigned int lane_id; + unsigned int member_mask; + + + /****************************************************************************** + * Construction + ******************************************************************************/ + + /// Constructor + __device__ __forceinline__ WarpReduceSmem( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + + lane_id(IS_ARCH_WARP ? + LaneId() : + LaneId() % LOGICAL_WARP_THREADS), + + member_mask((0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << ((IS_ARCH_WARP || !IS_POW_OF_TWO ) ? + 0 : // arch-width and non-power-of-two subwarps cannot be tiled with the arch-warp + ((LaneId() / LOGICAL_WARP_THREADS) * LOGICAL_WARP_THREADS))) + {} + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + //--------------------------------------------------------------------- + // Regular reduction + //--------------------------------------------------------------------- + + /** + * Reduction step + */ + template < + bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items + typename ReductionOp, + int STEP> + __device__ __forceinline__ T ReduceStep( + T input, ///< [in] Calling thread's input + int valid_items, ///< [in] Total number of valid items across the logical warp + ReductionOp reduction_op, ///< [in] Reduction operator + Int2Type /*step*/) + { + const int OFFSET = 1 << STEP; + + // Share input through buffer + ThreadStore(&temp_storage.reduce[lane_id], input); + + WARP_SYNC(member_mask); + + // Update input if peer_addend is in range + if ((ALL_LANES_VALID && IS_POW_OF_TWO) || ((lane_id + OFFSET) < valid_items)) + { + T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); + input = reduction_op(input, peer_addend); + } + + WARP_SYNC(member_mask); + + return ReduceStep(input, valid_items, reduction_op, Int2Type()); + } + + + /** + * Reduction step (terminate) + */ + template < + bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items + typename ReductionOp> + __device__ __forceinline__ T ReduceStep( + T input, ///< [in] Calling thread's input + int valid_items, ///< [in] Total number of valid items across the logical warp + ReductionOp /*reduction_op*/, ///< [in] Reduction operator + Int2Type /*step*/) + { + return input; + } + + + //--------------------------------------------------------------------- + // Segmented reduction + //--------------------------------------------------------------------- + + + /** + * Ballot-based segmented reduce + */ + template < + bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail + typename FlagT, + typename ReductionOp> + __device__ __forceinline__ T SegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail + ReductionOp reduction_op, ///< [in] Reduction operator + Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality + { + // Get the start flags for each thread in the warp. + int warp_flags = WARP_BALLOT(flag, member_mask); + + if (!HEAD_SEGMENTED) + warp_flags <<= 1; + + // Keep bits above the current thread. + warp_flags &= LaneMaskGt(); + + // Accommodate packing of multiple logical warps in a single physical warp + if (!IS_ARCH_WARP) + { + warp_flags >>= (LaneId() / LOGICAL_WARP_THREADS) * LOGICAL_WARP_THREADS; + } + + // Find next flag + int next_flag = __clz(__brev(warp_flags)); + + // Clip the next segment at the warp boundary if necessary + if (LOGICAL_WARP_THREADS != 32) + next_flag = CUB_MIN(next_flag, LOGICAL_WARP_THREADS); + + #pragma unroll + for (int STEP = 0; STEP < STEPS; STEP++) + { + const int OFFSET = 1 << STEP; + + // Share input into buffer + ThreadStore(&temp_storage.reduce[lane_id], input); + + WARP_SYNC(member_mask); + + // Update input if peer_addend is in range + if (OFFSET + lane_id < next_flag) + { + T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); + input = reduction_op(input, peer_addend); + } + + WARP_SYNC(member_mask); + } + + return input; + } + + + /** + * Smem-based segmented reduce + */ + template < + bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail + typename FlagT, + typename ReductionOp> + __device__ __forceinline__ T SegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail + ReductionOp reduction_op, ///< [in] Reduction operator + Int2Type /*has_ballot*/) ///< [in] Marker type for whether the target arch has ballot functionality + { + enum + { + UNSET = 0x0, // Is initially unset + SET = 0x1, // Is initially set + SEEN = 0x2, // Has seen another head flag from a successor peer + }; + + // Alias flags onto shared data storage + volatile SmemFlag *flag_storage = temp_storage.flags; + + SmemFlag flag_status = (flag) ? SET : UNSET; + + for (int STEP = 0; STEP < STEPS; STEP++) + { + const int OFFSET = 1 << STEP; + + // Share input through buffer + ThreadStore(&temp_storage.reduce[lane_id], input); + + WARP_SYNC(member_mask); + + // Get peer from buffer + T peer_addend = ThreadLoad(&temp_storage.reduce[lane_id + OFFSET]); + + WARP_SYNC(member_mask); + + // Share flag through buffer + flag_storage[lane_id] = flag_status; + + // Get peer flag from buffer + SmemFlag peer_flag_status = flag_storage[lane_id + OFFSET]; + + // Update input if peer was in range + if (lane_id < LOGICAL_WARP_THREADS - OFFSET) + { + if (HEAD_SEGMENTED) + { + // Head-segmented + if ((flag_status & SEEN) == 0) + { + // Has not seen a more distant head flag + if (peer_flag_status & SET) + { + // Has now seen a head flag + flag_status |= SEEN; + } + else + { + // Peer is not a head flag: grab its count + input = reduction_op(input, peer_addend); + } + + // Update seen status to include that of peer + flag_status |= (peer_flag_status & SEEN); + } + } + else + { + // Tail-segmented. Simply propagate flag status + if (!flag_status) + { + input = reduction_op(input, peer_addend); + flag_status |= peer_flag_status; + } + + } + } + } + + return input; + } + + + /****************************************************************************** + * Interface + ******************************************************************************/ + + /** + * Reduction + */ + template < + bool ALL_LANES_VALID, ///< Whether all lanes in each warp are contributing a valid fold of items + typename ReductionOp> + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + int valid_items, ///< [in] Total number of valid items across the logical warp + ReductionOp reduction_op) ///< [in] Reduction operator + { + return ReduceStep(input, valid_items, reduction_op, Int2Type<0>()); + } + + + /** + * Segmented reduction + */ + template < + bool HEAD_SEGMENTED, ///< Whether flags indicate a segment-head or a segment-tail + typename FlagT, + typename ReductionOp> + __device__ __forceinline__ T SegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT flag, ///< [in] Whether or not the current lane is a segment head/tail + ReductionOp reduction_op) ///< [in] Reduction operator + { + return SegmentedReduce(input, flag, reduction_op, Int2Type<(PTX_ARCH >= 200)>()); + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_shfl.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_shfl.cuh new file mode 100644 index 0000000..7f4e1c9 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_shfl.cuh @@ -0,0 +1,632 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "../../thread/thread_operators.cuh" +#include "../../util_type.cuh" +#include "../../util_ptx.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief WarpScanShfl provides SHFL-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. + * + * LOGICAL_WARP_THREADS must be a power-of-two + */ +template < + typename T, ///< Data type being scanned + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct WarpScanShfl +{ + //--------------------------------------------------------------------- + // Constants and type definitions + //--------------------------------------------------------------------- + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// The number of warp scan steps + STEPS = Log2::VALUE, + + /// The 5-bit SHFL mask for logically splitting warps into sub-segments starts 8-bits up + SHFL_C = (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS) << 8 + }; + + template + struct IntegerTraits + { + enum { + ///Whether the data type is a small (32b or less) integer for which we can use a single SFHL instruction per exchange + IS_SMALL_UNSIGNED = (Traits::CATEGORY == UNSIGNED_INTEGER) && (sizeof(S) <= sizeof(unsigned int)) + }; + }; + + /// Shared memory storage layout type + struct TempStorage {}; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + /// Lane index in logical warp + unsigned int lane_id; + + /// Logical warp index in 32-thread physical warp + unsigned int warp_id; + + /// 32-thread physical warp member mask of logical warp + unsigned int member_mask; + + //--------------------------------------------------------------------- + // Construction + //--------------------------------------------------------------------- + + /// Constructor + __device__ __forceinline__ WarpScanShfl( + TempStorage &/*temp_storage*/) + { + lane_id = LaneId(); + warp_id = 0; + member_mask = 0xffffffffu >> (CUB_WARP_THREADS(PTX_ARCH) - LOGICAL_WARP_THREADS); + + if (!IS_ARCH_WARP) + { + warp_id = lane_id / LOGICAL_WARP_THREADS; + lane_id = lane_id % LOGICAL_WARP_THREADS; + member_mask = member_mask << (warp_id * LOGICAL_WARP_THREADS); + } + } + + + //--------------------------------------------------------------------- + // Inclusive scan steps + //--------------------------------------------------------------------- + + /// Inclusive prefix scan step (specialized for summation across int32 types) + __device__ __forceinline__ int InclusiveScanStep( + int input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + int output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .s32 r0;" + " .reg .pred p;" + " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" + " @p add.s32 r0, r0, %4;" + " mov.s32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .s32 r0;" + " .reg .pred p;" + " shfl.up.b32 r0|p, %1, %2, %3;" + " @p add.s32 r0, r0, %4;" + " mov.s32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); +#endif + + return output; + } + + /// Inclusive prefix scan step (specialized for summation across uint32 types) + __device__ __forceinline__ unsigned int InclusiveScanStep( + unsigned int input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + unsigned int output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 r0;" + " .reg .pred p;" + " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" + " @p add.u32 r0, r0, %4;" + " mov.u32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 r0;" + " .reg .pred p;" + " shfl.up.b32 r0|p, %1, %2, %3;" + " @p add.u32 r0, r0, %4;" + " mov.u32 %0, r0;" + "}" + : "=r"(output) : "r"(input), "r"(offset), "r"(shfl_c), "r"(input)); +#endif + + return output; + } + + + /// Inclusive prefix scan step (specialized for summation across fp32 types) + __device__ __forceinline__ float InclusiveScanStep( + float input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + float output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .f32 r0;" + " .reg .pred p;" + " shfl.sync.up.b32 r0|p, %1, %2, %3, %5;" + " @p add.f32 r0, r0, %4;" + " mov.f32 %0, r0;" + "}" + : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .f32 r0;" + " .reg .pred p;" + " shfl.up.b32 r0|p, %1, %2, %3;" + " @p add.f32 r0, r0, %4;" + " mov.f32 %0, r0;" + "}" + : "=f"(output) : "f"(input), "r"(offset), "r"(shfl_c), "f"(input)); +#endif + + return output; + } + + + /// Inclusive prefix scan step (specialized for summation across unsigned long long types) + __device__ __forceinline__ unsigned long long InclusiveScanStep( + unsigned long long input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + unsigned long long output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u64 r0;" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" + " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" + " mov.b64 r0, {lo, hi};" + " @p add.u64 r0, r0, %4;" + " mov.u64 %0, r0;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u64 r0;" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.up.b32 lo|p, lo, %2, %3;" + " shfl.up.b32 hi|p, hi, %2, %3;" + " mov.b64 r0, {lo, hi};" + " @p add.u64 r0, r0, %4;" + " mov.u64 %0, r0;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); +#endif + + return output; + } + + + /// Inclusive prefix scan step (specialized for summation across long long types) + __device__ __forceinline__ long long InclusiveScanStep( + long long input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + long long output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .s64 r0;" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.up.b32 lo|p, lo, %2, %3, %5;" + " shfl.sync.up.b32 hi|p, hi, %2, %3, %5;" + " mov.b64 r0, {lo, hi};" + " @p add.s64 r0, r0, %4;" + " mov.s64 %0, r0;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .s64 r0;" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " mov.b64 {lo, hi}, %1;" + " shfl.up.b32 lo|p, lo, %2, %3;" + " shfl.up.b32 hi|p, hi, %2, %3;" + " mov.b64 r0, {lo, hi};" + " @p add.s64 r0, r0, %4;" + " mov.s64 %0, r0;" + "}" + : "=l"(output) : "l"(input), "r"(offset), "r"(shfl_c), "l"(input)); +#endif + + return output; + } + + + /// Inclusive prefix scan step (specialized for summation across fp64 types) + __device__ __forceinline__ double InclusiveScanStep( + double input, ///< [in] Calling thread's input item. + cub::Sum /*scan_op*/, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + double output; + int shfl_c = first_lane | SHFL_C; // Shuffle control (mask and first-lane) + + // Use predicate set from SHFL to guard against invalid peers +#ifdef CUB_USE_COOPERATIVE_GROUPS + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " .reg .f64 r0;" + " mov.b64 %0, %1;" + " mov.b64 {lo, hi}, %1;" + " shfl.sync.up.b32 lo|p, lo, %2, %3, %4;" + " shfl.sync.up.b32 hi|p, hi, %2, %3, %4;" + " mov.b64 r0, {lo, hi};" + " @p add.f64 %0, %0, r0;" + "}" + : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c), "r"(member_mask)); +#else + asm volatile( + "{" + " .reg .u32 lo;" + " .reg .u32 hi;" + " .reg .pred p;" + " .reg .f64 r0;" + " mov.b64 %0, %1;" + " mov.b64 {lo, hi}, %1;" + " shfl.up.b32 lo|p, lo, %2, %3;" + " shfl.up.b32 hi|p, hi, %2, %3;" + " mov.b64 r0, {lo, hi};" + " @p add.f64 %0, %0, r0;" + "}" + : "=d"(output) : "d"(input), "r"(offset), "r"(shfl_c)); +#endif + + return output; + } + + +/* + /// Inclusive prefix scan (specialized for ReduceBySegmentOp across KeyValuePair types) + template + __device__ __forceinline__ KeyValuePairInclusiveScanStep( + KeyValuePair input, ///< [in] Calling thread's input item. + ReduceBySegmentOp scan_op, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + KeyValuePair output; + + output.value = InclusiveScanStep(input.value, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); + output.key = InclusiveScanStep(input.key, cub::Sum(), first_lane, offset, Int2Type::IS_SMALL_UNSIGNED>()); + + if (input.key > 0) + output.value = input.value; + + return output; + } +*/ + + /// Inclusive prefix scan step (generic) + template + __device__ __forceinline__ _T InclusiveScanStep( + _T input, ///< [in] Calling thread's input item. + ScanOpT scan_op, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset) ///< [in] Up-offset to pull from + { + _T temp = ShuffleUp(input, offset, first_lane, member_mask); + + // Perform scan op if from a valid peer + _T output = scan_op(temp, input); + if (static_cast(lane_id) < first_lane + offset) + output = input; + + return output; + } + + + /// Inclusive prefix scan step (specialized for small integers size 32b or less) + template + __device__ __forceinline__ _T InclusiveScanStep( + _T input, ///< [in] Calling thread's input item. + ScanOpT scan_op, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset, ///< [in] Up-offset to pull from + Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer + { + return InclusiveScanStep(input, scan_op, first_lane, offset); + } + + + /// Inclusive prefix scan step (specialized for types other than small integers size 32b or less) + template + __device__ __forceinline__ _T InclusiveScanStep( + _T input, ///< [in] Calling thread's input item. + ScanOpT scan_op, ///< [in] Binary scan operator + int first_lane, ///< [in] Index of first lane in segment + int offset, ///< [in] Up-offset to pull from + Int2Type /*is_small_unsigned*/) ///< [in] Marker type indicating whether T is a small integer + { + return InclusiveScanStep(input, scan_op, first_lane, offset); + } + + + /****************************************************************************** + * Interface + ******************************************************************************/ + + //--------------------------------------------------------------------- + // Broadcast + //--------------------------------------------------------------------- + + /// Broadcast + __device__ __forceinline__ T Broadcast( + T input, ///< [in] The value to broadcast + int src_lane) ///< [in] Which warp lane is to do the broadcasting + { + return ShuffleIndex(input, src_lane, member_mask); + } + + + //--------------------------------------------------------------------- + // Inclusive operations + //--------------------------------------------------------------------- + + /// Inclusive scan + template + __device__ __forceinline__ void InclusiveScan( + _T input, ///< [in] Calling thread's input item. + _T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOpT scan_op) ///< [in] Binary scan operator + { + inclusive_output = input; + + // Iterate scan steps + int segment_first_lane = 0; + + // Iterate scan steps + #pragma unroll + for (int STEP = 0; STEP < STEPS; STEP++) + { + inclusive_output = InclusiveScanStep( + inclusive_output, + scan_op, + segment_first_lane, + (1 << STEP), + Int2Type::IS_SMALL_UNSIGNED>()); + } + + } + + /// Inclusive scan, specialized for reduce-value-by-key + template + __device__ __forceinline__ void InclusiveScan( + KeyValuePair input, ///< [in] Calling thread's input item. + KeyValuePair &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ReduceByKeyOp scan_op) ///< [in] Binary scan operator + { + inclusive_output = input; + + KeyT pred_key = ShuffleUp(inclusive_output.key, 1, 0, member_mask); + + unsigned int ballot = WARP_BALLOT((pred_key != inclusive_output.key), member_mask); + + // Mask away all lanes greater than ours + ballot = ballot & LaneMaskLe(); + + // Find index of first set bit + int segment_first_lane = CUB_MAX(0, 31 - __clz(ballot)); + + // Iterate scan steps + #pragma unroll + for (int STEP = 0; STEP < STEPS; STEP++) + { + inclusive_output.value = InclusiveScanStep( + inclusive_output.value, + scan_op.op, + segment_first_lane, + (1 << STEP), + Int2Type::IS_SMALL_UNSIGNED>()); + } + } + + + /// Inclusive scan with aggregate + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOpT scan_op, ///< [in] Binary scan operator + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InclusiveScan(input, inclusive_output, scan_op); + + // Grab aggregate from last warp lane + warp_aggregate = ShuffleIndex(inclusive_output, LOGICAL_WARP_THREADS - 1, member_mask); + } + + + //--------------------------------------------------------------------- + // Get exclusive from inclusive + //--------------------------------------------------------------------- + + /// Update inclusive and exclusive using input and inclusive + template + __device__ __forceinline__ void Update( + T /*input*/, ///< [in] + T &inclusive, ///< [in, out] + T &exclusive, ///< [out] + ScanOpT /*scan_op*/, ///< [in] + IsIntegerT /*is_integer*/) ///< [in] + { + // initial value unknown + exclusive = ShuffleUp(inclusive, 1, 0, member_mask); + } + + /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) + __device__ __forceinline__ void Update( + T input, + T &inclusive, + T &exclusive, + cub::Sum /*scan_op*/, + Int2Type /*is_integer*/) + { + // initial value presumed 0 + exclusive = inclusive - input; + } + + /// Update inclusive and exclusive using initial value using input, inclusive, and initial value + template + __device__ __forceinline__ void Update ( + T /*input*/, + T &inclusive, + T &exclusive, + ScanOpT scan_op, + T initial_value, + IsIntegerT /*is_integer*/) + { + inclusive = scan_op(initial_value, inclusive); + exclusive = ShuffleUp(inclusive, 1, 0, member_mask); + + if (lane_id == 0) + exclusive = initial_value; + } + + /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) + __device__ __forceinline__ void Update ( + T input, + T &inclusive, + T &exclusive, + cub::Sum scan_op, + T initial_value, + Int2Type /*is_integer*/) + { + inclusive = scan_op(initial_value, inclusive); + exclusive = inclusive - input; + } + + + /// Update inclusive, exclusive, and warp aggregate using input and inclusive + template + __device__ __forceinline__ void Update ( + T input, + T &inclusive, + T &exclusive, + T &warp_aggregate, + ScanOpT scan_op, + IsIntegerT is_integer) + { + warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, member_mask); + Update(input, inclusive, exclusive, scan_op, is_integer); + } + + /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value + template + __device__ __forceinline__ void Update ( + T input, + T &inclusive, + T &exclusive, + T &warp_aggregate, + ScanOpT scan_op, + T initial_value, + IsIntegerT is_integer) + { + warp_aggregate = ShuffleIndex(inclusive, LOGICAL_WARP_THREADS - 1, member_mask); + Update(input, inclusive, exclusive, scan_op, initial_value, is_integer); + } + + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_smem.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_smem.cuh new file mode 100644 index 0000000..3237fcb --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/specializations/warp_scan_smem.cuh @@ -0,0 +1,397 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * cub::WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "../../thread/thread_operators.cuh" +#include "../../thread/thread_load.cuh" +#include "../../thread/thread_store.cuh" +#include "../../util_type.cuh" +#include "../../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \brief WarpScanSmem provides smem-based variants of parallel prefix scan of items partitioned across a CUDA thread warp. + */ +template < + typename T, ///< Data type being scanned + int LOGICAL_WARP_THREADS, ///< Number of threads per logical warp + int PTX_ARCH> ///< The PTX compute capability for which to to specialize this collective +struct WarpScanSmem +{ + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// Whether the logical warp size is a power-of-two + IS_POW_OF_TWO = PowerOfTwo::VALUE, + + /// The number of warp scan steps + STEPS = Log2::VALUE, + + /// The number of threads in half a warp + HALF_WARP_THREADS = 1 << (STEPS - 1), + + /// The number of shared memory elements per warp + WARP_SMEM_ELEMENTS = LOGICAL_WARP_THREADS + HALF_WARP_THREADS, + }; + + /// Storage cell type (workaround for SM1x compiler bugs with custom-ops like Max() on signed chars) + typedef typename If<((Equals::VALUE || Equals::VALUE) && (PTX_ARCH < 200)), int, T>::Type CellT; + + /// Shared memory storage layout type (1.5 warps-worth of elements for each warp) + typedef CellT _TempStorage[WARP_SMEM_ELEMENTS]; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + _TempStorage &temp_storage; + unsigned int lane_id; + unsigned int member_mask; + + + /****************************************************************************** + * Construction + ******************************************************************************/ + + /// Constructor + __device__ __forceinline__ WarpScanSmem( + TempStorage &temp_storage) + : + temp_storage(temp_storage.Alias()), + + lane_id(IS_ARCH_WARP ? + LaneId() : + LaneId() % LOGICAL_WARP_THREADS), + + member_mask((0xffffffff >> (32 - LOGICAL_WARP_THREADS)) << ((IS_ARCH_WARP || !IS_POW_OF_TWO ) ? + 0 : // arch-width and non-power-of-two subwarps cannot be tiled with the arch-warp + ((LaneId() / LOGICAL_WARP_THREADS) * LOGICAL_WARP_THREADS))) + {} + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + + /// Basic inclusive scan iteration (template unrolled, inductive-case specialization) + template < + bool HAS_IDENTITY, + int STEP, + typename ScanOp> + __device__ __forceinline__ void ScanStep( + T &partial, + ScanOp scan_op, + Int2Type /*step*/) + { + const int OFFSET = 1 << STEP; + + // Share partial into buffer + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) partial); + + WARP_SYNC(member_mask); + + // Update partial if addend is in range + if (HAS_IDENTITY || (lane_id >= OFFSET)) + { + T addend = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - OFFSET]); + partial = scan_op(addend, partial); + } + WARP_SYNC(member_mask); + + ScanStep(partial, scan_op, Int2Type()); + } + + + /// Basic inclusive scan iteration(template unrolled, base-case specialization) + template < + bool HAS_IDENTITY, + typename ScanOp> + __device__ __forceinline__ void ScanStep( + T &/*partial*/, + ScanOp /*scan_op*/, + Int2Type /*step*/) + {} + + + /// Inclusive prefix scan (specialized for summation across primitive types) + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &output, ///< [out] Calling thread's output item. May be aliased with \p input. + Sum scan_op, ///< [in] Binary scan operator + Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type + { + T identity = 0; + ThreadStore(&temp_storage[lane_id], (CellT) identity); + + WARP_SYNC(member_mask); + + // Iterate scan steps + output = input; + ScanStep(output, scan_op, Int2Type<0>()); + } + + + /// Inclusive prefix scan + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op, ///< [in] Binary scan operator + Int2Type /*is_primitive*/) ///< [in] Marker type indicating whether T is primitive type + { + // Iterate scan steps + output = input; + ScanStep(output, scan_op, Int2Type<0>()); + } + + + /****************************************************************************** + * Interface + ******************************************************************************/ + + //--------------------------------------------------------------------- + // Broadcast + //--------------------------------------------------------------------- + + /// Broadcast + __device__ __forceinline__ T Broadcast( + T input, ///< [in] The value to broadcast + unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting + { + if (lane_id == src_lane) + { + ThreadStore(temp_storage, (CellT) input); + } + + WARP_SYNC(member_mask); + + return (T)ThreadLoad(temp_storage); + } + + + //--------------------------------------------------------------------- + // Inclusive operations + //--------------------------------------------------------------------- + + /// Inclusive scan + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op) ///< [in] Binary scan operator + { + InclusiveScan(input, inclusive_output, scan_op, Int2Type::PRIMITIVE>()); + } + + + /// Inclusive scan with aggregate + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op, ///< [in] Binary scan operator + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InclusiveScan(input, inclusive_output, scan_op); + + // Retrieve aggregate + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive_output); + + WARP_SYNC(member_mask); + + warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); + + WARP_SYNC(member_mask); + } + + + //--------------------------------------------------------------------- + // Get exclusive from inclusive + //--------------------------------------------------------------------- + + /// Update inclusive and exclusive using input and inclusive + template + __device__ __forceinline__ void Update( + T /*input*/, ///< [in] + T &inclusive, ///< [in, out] + T &exclusive, ///< [out] + ScanOpT /*scan_op*/, ///< [in] + IsIntegerT /*is_integer*/) ///< [in] + { + // initial value unknown + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); + + WARP_SYNC(member_mask); + + exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); + } + + /// Update inclusive and exclusive using input and inclusive (specialized for summation of integer types) + __device__ __forceinline__ void Update( + T input, + T &inclusive, + T &exclusive, + cub::Sum /*scan_op*/, + Int2Type /*is_integer*/) + { + // initial value presumed 0 + exclusive = inclusive - input; + } + + /// Update inclusive and exclusive using initial value using input, inclusive, and initial value + template + __device__ __forceinline__ void Update ( + T /*input*/, + T &inclusive, + T &exclusive, + ScanOpT scan_op, + T initial_value, + IsIntegerT /*is_integer*/) + { + inclusive = scan_op(initial_value, inclusive); + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); + + WARP_SYNC(member_mask); + + exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); + if (lane_id == 0) + exclusive = initial_value; + } + + /// Update inclusive and exclusive using initial value using input and inclusive (specialized for summation of integer types) + __device__ __forceinline__ void Update ( + T input, + T &inclusive, + T &exclusive, + cub::Sum scan_op, + T initial_value, + Int2Type /*is_integer*/) + { + inclusive = scan_op(initial_value, inclusive); + exclusive = inclusive - input; + } + + + /// Update inclusive, exclusive, and warp aggregate using input and inclusive + template + __device__ __forceinline__ void Update ( + T /*input*/, + T &inclusive, + T &exclusive, + T &warp_aggregate, + ScanOpT /*scan_op*/, + IsIntegerT /*is_integer*/) + { + // Initial value presumed to be unknown or identity (either way our padding is correct) + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); + + WARP_SYNC(member_mask); + + exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 1]); + warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); + } + + /// Update inclusive, exclusive, and warp aggregate using input and inclusive (specialized for summation of integer types) + __device__ __forceinline__ void Update ( + T input, + T &inclusive, + T &exclusive, + T &warp_aggregate, + cub::Sum /*scan_o*/, + Int2Type /*is_integer*/) + { + // Initial value presumed to be unknown or identity (either way our padding is correct) + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); + + WARP_SYNC(member_mask); + + warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); + exclusive = inclusive - input; + } + + /// Update inclusive, exclusive, and warp aggregate using input, inclusive, and initial value + template + __device__ __forceinline__ void Update ( + T /*input*/, + T &inclusive, + T &exclusive, + T &warp_aggregate, + ScanOpT scan_op, + T initial_value, + IsIntegerT /*is_integer*/) + { + // Broadcast warp aggregate + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id], (CellT) inclusive); + + WARP_SYNC(member_mask); + + warp_aggregate = (T) ThreadLoad(&temp_storage[WARP_SMEM_ELEMENTS - 1]); + + WARP_SYNC(member_mask); + + // Update inclusive with initial value + inclusive = scan_op(initial_value, inclusive); + + // Get exclusive from exclusive + ThreadStore(&temp_storage[HALF_WARP_THREADS + lane_id - 1], (CellT) inclusive); + + WARP_SYNC(member_mask); + + exclusive = (T) ThreadLoad(&temp_storage[HALF_WARP_THREADS + lane_id - 2]); + + if (lane_id == 0) + exclusive = initial_value; + } + + +}; + + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_reduce.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_reduce.cuh new file mode 100644 index 0000000..189896b --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_reduce.cuh @@ -0,0 +1,612 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "specializations/warp_reduce_shfl.cuh" +#include "specializations/warp_reduce_smem.cuh" +#include "../thread/thread_operators.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + + +/** + * \addtogroup WarpModule + * @{ + */ + +/** + * \brief The WarpReduce class provides [collective](index.html#sec0) methods for computing a parallel reduction of items partitioned across a CUDA thread warp. ![](warp_reduce_logo.png) + * + * \tparam T The reduction input/output element type + * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size of the targeted CUDA compute-capability (e.g., 32 threads for SM20). + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - A reduction (or fold) + * uses a binary combining operator to compute a single aggregate from a list of input elements. + * - Supports "logical" warps smaller than the physical warp size (e.g., logical warps of 8 threads) + * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS + * + * \par Performance Considerations + * - Uses special instructions when applicable (e.g., warp \p SHFL instructions) + * - Uses synchronization-free communication between warp lanes when applicable + * - Incurs zero bank conflicts for most types + * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: + * - Summation (vs. generic reduction) + * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS + * + * \par Simple Examples + * \warpcollective{WarpReduce} + * \par + * The code snippet below illustrates four concurrent warp sum reductions within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for 4 warps + * __shared__ typename WarpReduce::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Return the warp-wide sums to each lane0 (threads 0, 32, 64, and 96) + * int warp_id = threadIdx.x / 32; + * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. + * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, + * \p 2544, and \p 3568, respectively (and is undefined in other threads). + * + * \par + * The code snippet below illustrates a single warp sum reduction within a block of + * 128 threads. + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * ... + * + * // Only the first warp performs a reduction + * if (threadIdx.x < 32) + * { + * // Obtain one input item per thread + * int thread_data = ... + * + * // Return the warp-wide sum to lane0 + * int aggregate = WarpReduce(temp_storage).Sum(thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the warp of threads is {0, 1, 2, 3, ..., 31}. + * The corresponding output \p aggregate in thread0 will be \p 496 (and is undefined in other threads). + * + */ +template < + typename T, + int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, + int PTX_ARCH = CUB_PTX_ARCH> +class WarpReduce +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// Whether the logical warp size is a power-of-two + IS_POW_OF_TWO = PowerOfTwo::VALUE, + }; + +public: + + #ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document + + /// Internal specialization. Use SHFL-based reduction if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) + typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), + WarpReduceShfl, + WarpReduceSmem >::Type InternalWarpReduce; + + #endif // DOXYGEN_SHOULD_SKIP_THIS + + +private: + + /// Shared memory storage layout type for WarpReduce + typedef typename InternalWarpReduce::TempStorage _TempStorage; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + + + /****************************************************************************** + * Utility methods + ******************************************************************************/ + +public: + + /// \smemstorage{WarpReduce} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. + */ + __device__ __forceinline__ WarpReduce( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()) + {} + + + //@} end member group + /******************************************************************//** + * \name Summation reductions + *********************************************************************/ + //@{ + + + /** + * \brief Computes a warp-wide sum in the calling warp. The output is valid in warp lane0. + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp sum reductions within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for 4 warps + * __shared__ typename WarpReduce::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Return the warp-wide sums to each lane0 + * int warp_id = threadIdx.x / 32; + * int aggregate = WarpReduce(temp_storage[warp_id]).Sum(thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. + * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 496, \p 1520, + * \p 2544, and \p 3568, respectively (and is undefined in other threads). + * + */ + __device__ __forceinline__ T Sum( + T input) ///< [in] Calling thread's input + { + return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, cub::Sum()); + } + + /** + * \brief Computes a partially-full warp-wide sum in the calling warp. The output is valid in warp lane0. + * + * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a sum reduction within a single, partially-full + * block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(int *d_data, int valid_items) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item per thread if in range + * int thread_data; + * if (threadIdx.x < valid_items) + * thread_data = d_data[threadIdx.x]; + * + * // Return the warp-wide sums to each lane0 + * int aggregate = WarpReduce(temp_storage).Sum( + * thread_data, valid_items); + * + * \endcode + * \par + * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items + * is \p 4. The corresponding output \p aggregate in thread0 is \p 6 (and is + * undefined in other threads). + * + */ + __device__ __forceinline__ T Sum( + T input, ///< [in] Calling thread's input + int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) + { + // Determine if we don't need bounds checking + return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, cub::Sum()); + } + + + /** + * \brief Computes a segmented sum in the calling warp where segments are defined by head-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a head-segmented warp sum + * reduction within a block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item and flag per thread + * int thread_data = ... + * int head_flag = ... + * + * // Return the warp-wide sums to each lane0 + * int aggregate = WarpReduce(temp_storage).HeadSegmentedSum( + * thread_data, head_flag); + * + * \endcode + * \par + * Suppose the set of input \p thread_data and \p head_flag across the block of threads + * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, + * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be + * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + * + */ + template < + typename FlagT> + __device__ __forceinline__ T HeadSegmentedSum( + T input, ///< [in] Calling thread's input + FlagT head_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment + { + return HeadSegmentedReduce(input, head_flag, cub::Sum()); + } + + + /** + * \brief Computes a segmented sum in the calling warp where segments are defined by tail-flags. The sum of each segment is returned to the first lane in that segment (which always includes lane0). + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a tail-segmented warp sum + * reduction within a block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item and flag per thread + * int thread_data = ... + * int tail_flag = ... + * + * // Return the warp-wide sums to each lane0 + * int aggregate = WarpReduce(temp_storage).TailSegmentedSum( + * thread_data, tail_flag); + * + * \endcode + * \par + * Suppose the set of input \p thread_data and \p tail_flag across the block of threads + * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, + * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be + * \p 6, \p 22, \p 38, etc. (and is undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template < + typename FlagT> + __device__ __forceinline__ T TailSegmentedSum( + T input, ///< [in] Calling thread's input + FlagT tail_flag) ///< [in] Head flag denoting whether or not \p input is the start of a new segment + { + return TailSegmentedReduce(input, tail_flag, cub::Sum()); + } + + + + //@} end member group + /******************************************************************//** + * \name Generic reductions + *********************************************************************/ + //@{ + + /** + * \brief Computes a warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. + * + * Supports non-commutative reduction operators + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp max reductions within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for 4 warps + * __shared__ typename WarpReduce::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Return the warp-wide reductions to each lane0 + * int warp_id = threadIdx.x / 32; + * int aggregate = WarpReduce(temp_storage[warp_id]).Reduce( + * thread_data, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. + * The corresponding output \p aggregate in threads 0, 32, 64, and 96 will \p 31, \p 63, + * \p 95, and \p 127, respectively (and is undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + ReductionOp reduction_op) ///< [in] Binary reduction operator + { + return InternalWarpReduce(temp_storage).template Reduce(input, LOGICAL_WARP_THREADS, reduction_op); + } + + /** + * \brief Computes a partially-full warp-wide reduction in the calling warp using the specified binary reduction functor. The output is valid in warp lane0. + * + * All threads across the calling warp must agree on the same value for \p valid_items. Otherwise the result is undefined. + * + * Supports non-commutative reduction operators + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a max reduction within a single, partially-full + * block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(int *d_data, int valid_items) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item per thread if in range + * int thread_data; + * if (threadIdx.x < valid_items) + * thread_data = d_data[threadIdx.x]; + * + * // Return the warp-wide reductions to each lane0 + * int aggregate = WarpReduce(temp_storage).Reduce( + * thread_data, cub::Max(), valid_items); + * + * \endcode + * \par + * Suppose the input \p d_data is {0, 1, 2, 3, 4, ... and \p valid_items + * is \p 4. The corresponding output \p aggregate in thread0 is \p 3 (and is + * undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ T Reduce( + T input, ///< [in] Calling thread's input + ReductionOp reduction_op, ///< [in] Binary reduction operator + int valid_items) ///< [in] Total number of valid items in the calling thread's logical warp (may be less than \p LOGICAL_WARP_THREADS) + { + return InternalWarpReduce(temp_storage).template Reduce(input, valid_items, reduction_op); + } + + + /** + * \brief Computes a segmented reduction in the calling warp where segments are defined by head-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). + * + * Supports non-commutative reduction operators + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a head-segmented warp max + * reduction within a block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item and flag per thread + * int thread_data = ... + * int head_flag = ... + * + * // Return the warp-wide reductions to each lane0 + * int aggregate = WarpReduce(temp_storage).HeadSegmentedReduce( + * thread_data, head_flag, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data and \p head_flag across the block of threads + * is {0, 1, 2, 3, ..., 31 and is {1, 0, 0, 0, 1, 0, 0, 0, ..., 1, 0, 0, 0, + * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be + * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template < + typename ReductionOp, + typename FlagT> + __device__ __forceinline__ T HeadSegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT head_flag, ///< [in] Head flag denoting whether or not \p input is the start of a new segment + ReductionOp reduction_op) ///< [in] Reduction operator + { + return InternalWarpReduce(temp_storage).template SegmentedReduce(input, head_flag, reduction_op); + } + + + /** + * \brief Computes a segmented reduction in the calling warp where segments are defined by tail-flags. The reduction of each segment is returned to the first lane in that segment (which always includes lane0). + * + * Supports non-commutative reduction operators + * + * \smemreuse + * + * \par Snippet + * The code snippet below illustrates a tail-segmented warp max + * reduction within a block of 32 threads (one warp). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpReduce for type int + * typedef cub::WarpReduce WarpReduce; + * + * // Allocate WarpReduce shared memory for one warp + * __shared__ typename WarpReduce::TempStorage temp_storage; + * + * // Obtain one input item and flag per thread + * int thread_data = ... + * int tail_flag = ... + * + * // Return the warp-wide reductions to each lane0 + * int aggregate = WarpReduce(temp_storage).TailSegmentedReduce( + * thread_data, tail_flag, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data and \p tail_flag across the block of threads + * is {0, 1, 2, 3, ..., 31 and is {0, 0, 0, 1, 0, 0, 0, 1, ..., 0, 0, 0, 1, + * respectively. The corresponding output \p aggregate in threads 0, 4, 8, etc. will be + * \p 3, \p 7, \p 11, etc. (and is undefined in other threads). + * + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template < + typename ReductionOp, + typename FlagT> + __device__ __forceinline__ T TailSegmentedReduce( + T input, ///< [in] Calling thread's input + FlagT tail_flag, ///< [in] Tail flag denoting whether or not \p input is the end of the current segment + ReductionOp reduction_op) ///< [in] Reduction operator + { + return InternalWarpReduce(temp_storage).template SegmentedReduce(input, tail_flag, reduction_op); + } + + + + //@} end member group +}; + +/** @} */ // end group WarpModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_scan.cuh b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_scan.cuh new file mode 100644 index 0000000..c7af0d3 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/cub/warp/warp_scan.cuh @@ -0,0 +1,936 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/** + * \file + * The cub::WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. + */ + +#pragma once + +#include "specializations/warp_scan_shfl.cuh" +#include "specializations/warp_scan_smem.cuh" +#include "../thread/thread_operators.cuh" +#include "../util_arch.cuh" +#include "../util_type.cuh" +#include "../util_namespace.cuh" + +/// Optional outer namespace(s) +CUB_NS_PREFIX + +/// CUB namespace +namespace cub { + +/** + * \addtogroup WarpModule + * @{ + */ + +/** + * \brief The WarpScan class provides [collective](index.html#sec0) methods for computing a parallel prefix scan of items partitioned across a CUDA thread warp. ![](warp_scan_logo.png) + * + * \tparam T The scan input/output element type + * \tparam LOGICAL_WARP_THREADS [optional] The number of threads per "logical" warp (may be less than the number of hardware warp threads). Default is the warp size associated with the CUDA Compute Capability targeted by the compiler (e.g., 32 threads for SM20). + * \tparam PTX_ARCH [optional] \ptxversion + * + * \par Overview + * - Given a list of input elements and a binary reduction operator, a [prefix scan](http://en.wikipedia.org/wiki/Prefix_sum) + * produces an output list where each element is computed to be the reduction + * of the elements occurring earlier in the input list. Prefix sum + * connotes a prefix scan with the addition operator. The term \em inclusive indicates + * that the ith output reduction incorporates the ith input. + * The term \em exclusive indicates the ith input is not incorporated into + * the ith output reduction. + * - Supports non-commutative scan operators + * - Supports "logical" warps smaller than the physical warp size (e.g., a logical warp of 8 threads) + * - The number of entrant threads must be an multiple of \p LOGICAL_WARP_THREADS + * + * \par Performance Considerations + * - Uses special instructions when applicable (e.g., warp \p SHFL) + * - Uses synchronization-free communication between warp lanes when applicable + * - Incurs zero bank conflicts for most types + * - Computation is slightly more efficient (i.e., having lower instruction overhead) for: + * - Summation (vs. generic scan) + * - The architecture's warp size is a whole multiple of \p LOGICAL_WARP_THREADS + * + * \par Simple Examples + * \warpcollective{WarpScan} + * \par + * The code snippet below illustrates four concurrent warp prefix sums within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute warp-wide prefix sums + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data in each of the four warps of threads will be + * 0, 1, 2, 3, ..., 31}. + * + * \par + * The code snippet below illustrates a single warp prefix sum within a block of + * 128 threads. + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for one warp + * __shared__ typename WarpScan::TempStorage temp_storage; + * ... + * + * // Only the first warp performs a prefix sum + * if (threadIdx.x < 32) + * { + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute warp-wide prefix sums + * WarpScan(temp_storage).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the warp of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data will be {0, 1, 2, 3, ..., 31}. + * + */ +template < + typename T, + int LOGICAL_WARP_THREADS = CUB_PTX_WARP_THREADS, + int PTX_ARCH = CUB_PTX_ARCH> +class WarpScan +{ +private: + + /****************************************************************************** + * Constants and type definitions + ******************************************************************************/ + + enum + { + /// Whether the logical warp size and the PTX warp size coincide + IS_ARCH_WARP = (LOGICAL_WARP_THREADS == CUB_WARP_THREADS(PTX_ARCH)), + + /// Whether the logical warp size is a power-of-two + IS_POW_OF_TWO = ((LOGICAL_WARP_THREADS & (LOGICAL_WARP_THREADS - 1)) == 0), + + /// Whether the data type is an integer (which has fully-associative addition) + IS_INTEGER = ((Traits::CATEGORY == SIGNED_INTEGER) || (Traits::CATEGORY == UNSIGNED_INTEGER)) + }; + + /// Internal specialization. Use SHFL-based scan if (architecture is >= SM30) and (LOGICAL_WARP_THREADS is a power-of-two) + typedef typename If<(PTX_ARCH >= 300) && (IS_POW_OF_TWO), + WarpScanShfl, + WarpScanSmem >::Type InternalWarpScan; + + /// Shared memory storage layout type for WarpScan + typedef typename InternalWarpScan::TempStorage _TempStorage; + + + /****************************************************************************** + * Thread fields + ******************************************************************************/ + + /// Shared storage reference + _TempStorage &temp_storage; + unsigned int lane_id; + + + + /****************************************************************************** + * Public types + ******************************************************************************/ + +public: + + /// \smemstorage{WarpScan} + struct TempStorage : Uninitialized<_TempStorage> {}; + + + /******************************************************************//** + * \name Collective constructors + *********************************************************************/ + //@{ + + /** + * \brief Collective constructor using the specified memory allocation as temporary storage. Logical warp and lane identifiers are constructed from threadIdx.x. + */ + __device__ __forceinline__ WarpScan( + TempStorage &temp_storage) ///< [in] Reference to memory allocation having layout type TempStorage + : + temp_storage(temp_storage.Alias()), + lane_id(IS_ARCH_WARP ? + LaneId() : + LaneId() % LOGICAL_WARP_THREADS) + {} + + + //@} end member group + /******************************************************************//** + * \name Inclusive prefix sums + *********************************************************************/ + //@{ + + + /** + * \brief Computes an inclusive prefix sum across the calling warp. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute inclusive warp-wide prefix sums + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data in each of the four warps of threads will be + * 1, 2, 3, ..., 32}. + */ + __device__ __forceinline__ void InclusiveSum( + T input, ///< [in] Calling thread's input item. + T &inclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. + { + InclusiveScan(input, inclusive_output, cub::Sum()); + } + + + /** + * \brief Computes an inclusive prefix sum across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide inclusive prefix sums within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute inclusive warp-wide prefix sums + * int warp_aggregate; + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).InclusiveSum(thread_data, thread_data, warp_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data in each of the four warps of threads will be + * 1, 2, 3, ..., 32}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. + */ + __device__ __forceinline__ void InclusiveSum( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InclusiveScan(input, inclusive_output, cub::Sum(), warp_aggregate); + } + + + //@} end member group + /******************************************************************//** + * \name Exclusive prefix sums + *********************************************************************/ + //@{ + + + /** + * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. + * + * \par + * - \identityzero + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix sums + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data in each of the four warps of threads will be + * 0, 1, 2, ..., 31}. + * + */ + __device__ __forceinline__ void ExclusiveSum( + T input, ///< [in] Calling thread's input item. + T &exclusive_output) ///< [out] Calling thread's output item. May be aliased with \p input. + { + T initial_value = 0; + ExclusiveScan(input, exclusive_output, initial_value, cub::Sum()); + } + + + /** + * \brief Computes an exclusive prefix sum across the calling warp. The value of 0 is applied as the initial value, and is assigned to \p exclusive_output in thread0. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. + * + * \par + * - \identityzero + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix sums within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix sums + * int warp_aggregate; + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveSum(thread_data, thread_data, warp_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {1, 1, 1, 1, ...}. + * The corresponding output \p thread_data in each of the four warps of threads will be + * 0, 1, 2, ..., 31}. Furthermore, \p warp_aggregate for all threads in all warps will be \p 32. + */ + __device__ __forceinline__ void ExclusiveSum( + T input, ///< [in] Calling thread's input item. + T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + T initial_value = 0; + ExclusiveScan(input, exclusive_output, initial_value, cub::Sum(), warp_aggregate); + } + + + //@} end member group + /******************************************************************//** + * \name Inclusive prefix scans + *********************************************************************/ + //@{ + + /** + * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute inclusive warp-wide prefix max scans + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).InclusiveScan(thread_data, thread_data, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op) ///< [in] Binary scan operator + { + InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op); + } + + + /** + * \brief Computes an inclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide inclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute inclusive warp-wide prefix max scans + * int warp_aggregate; + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).InclusiveScan( + * thread_data, thread_data, cub::Max(), warp_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. + * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads + * in the second warp, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void InclusiveScan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op, ///< [in] Binary scan operator + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InternalWarpScan(temp_storage).InclusiveScan(input, inclusive_output, scan_op, warp_aggregate); + } + + + //@} end member group + /******************************************************************//** + * \name Exclusive prefix scans + *********************************************************************/ + //@{ + + /** + * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix max scans + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. + * (The output \p thread_data in warp lane0 is undefined.) + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item. + T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op) ///< [in] Binary scan operator + { + InternalWarpScan internal(temp_storage); + + T inclusive_output; + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + scan_op, + Int2Type()); + } + + + /** + * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix max scans + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item. + T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + T initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + InternalWarpScan internal(temp_storage); + + T inclusive_output; + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + scan_op, + initial_value, + Int2Type()); + } + + + /** + * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p output computed for warp-lane0 is undefined. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix max scans + * int warp_aggregate; + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, cub::Max(), warp_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. + * (The output \p thread_data in warp lane0 is undefined.) Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads + * in the second warp, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item. + T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + ScanOp scan_op, ///< [in] Binary scan operator + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InternalWarpScan internal(temp_storage); + + T inclusive_output; + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + warp_aggregate, + scan_op, + Int2Type()); + } + + + /** + * \brief Computes an exclusive prefix scan using the specified binary scan functor across the calling warp. Also provides every thread with the warp-wide \p warp_aggregate of all inputs. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix max scans + * int warp_aggregate; + * int warp_id = threadIdx.x / 32; + * WarpScan(temp_storage[warp_id]).ExclusiveScan(thread_data, thread_data, INT_MIN, cub::Max(), warp_aggregate); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p thread_data in the first warp would be + * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. + * Furthermore, \p warp_aggregate would be assigned \p 30 for threads in the first warp, \p 62 for threads + * in the second warp, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void ExclusiveScan( + T input, ///< [in] Calling thread's input item. + T &exclusive_output, ///< [out] Calling thread's output item. May be aliased with \p input. + T initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op, ///< [in] Binary scan operator + T &warp_aggregate) ///< [out] Warp-wide aggregate reduction of input items. + { + InternalWarpScan internal(temp_storage); + + T inclusive_output; + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + warp_aggregate, + scan_op, + initial_value, + Int2Type()); + } + + + //@} end member group + /******************************************************************//** + * \name Combination (inclusive & exclusive) prefix scans + *********************************************************************/ + //@{ + + + /** + * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. Because no initial value is supplied, the \p exclusive_output computed for warp-lane0 is undefined. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide exclusive prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute exclusive warp-wide prefix max scans + * int inclusive_partial, exclusive_partial; + * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p inclusive_partial in the first warp would be + * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. + * The corresponding output \p exclusive_partial in the first warp would be + * ?, 0, 0, 2, ..., 28, 30, the output for the second warp would be ?, 32, 32, 34, ..., 60, 62, etc. + * (The output \p thread_data in warp lane0 is undefined.) + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void Scan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. + T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. + ScanOp scan_op) ///< [in] Binary scan operator + { + InternalWarpScan internal(temp_storage); + + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + scan_op, + Int2Type()); + } + + + /** + * \brief Computes both inclusive and exclusive prefix scans using the specified binary scan functor across the calling warp. + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates four concurrent warp-wide prefix max scans within a block of + * 128 threads (one per each of the 32-thread warps). + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Compute inclusive warp-wide prefix max scans + * int warp_id = threadIdx.x / 32; + * int inclusive_partial, exclusive_partial; + * WarpScan(temp_storage[warp_id]).Scan(thread_data, inclusive_partial, exclusive_partial, INT_MIN, cub::Max()); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, -1, 2, -3, ..., 126, -127}. + * The corresponding output \p inclusive_partial in the first warp would be + * 0, 0, 2, 2, ..., 30, 30, the output for the second warp would be 32, 32, 34, 34, ..., 62, 62, etc. + * The corresponding output \p exclusive_partial in the first warp would be + * INT_MIN, 0, 0, 2, ..., 28, 30, the output for the second warp would be 30, 32, 32, 34, ..., 60, 62, etc. + * + * \tparam ScanOp [inferred] Binary scan operator type having member T operator()(const T &a, const T &b) + */ + template + __device__ __forceinline__ void Scan( + T input, ///< [in] Calling thread's input item. + T &inclusive_output, ///< [out] Calling thread's inclusive-scan output item. + T &exclusive_output, ///< [out] Calling thread's exclusive-scan output item. + T initial_value, ///< [in] Initial value to seed the exclusive scan + ScanOp scan_op) ///< [in] Binary scan operator + { + InternalWarpScan internal(temp_storage); + + internal.InclusiveScan(input, inclusive_output, scan_op); + + internal.Update( + input, + inclusive_output, + exclusive_output, + scan_op, + initial_value, + Int2Type()); + } + + + + //@} end member group + /******************************************************************//** + * \name Data exchange + *********************************************************************/ + //@{ + + /** + * \brief Broadcast the value \p input from warp-lanesrc_lane to all lanes in the warp + * + * \par + * - \smemreuse + * + * \par Snippet + * The code snippet below illustrates the warp-wide broadcasts of values from + * lanes0 in each of four warps to all other threads in those warps. + * \par + * \code + * #include + * + * __global__ void ExampleKernel(...) + * { + * // Specialize WarpScan for type int + * typedef cub::WarpScan WarpScan; + * + * // Allocate WarpScan shared memory for 4 warps + * __shared__ typename WarpScan::TempStorage temp_storage[4]; + * + * // Obtain one input item per thread + * int thread_data = ... + * + * // Broadcast from lane0 in each warp to all other threads in the warp + * int warp_id = threadIdx.x / 32; + * thread_data = WarpScan(temp_storage[warp_id]).Broadcast(thread_data, 0); + * + * \endcode + * \par + * Suppose the set of input \p thread_data across the block of threads is {0, 1, 2, 3, ..., 127}. + * The corresponding output \p thread_data will be + * {0, 0, ..., 0} in warp0, + * {32, 32, ..., 32} in warp1, + * {64, 64, ..., 64} in warp2, etc. + */ + __device__ __forceinline__ T Broadcast( + T input, ///< [in] The value to broadcast + unsigned int src_lane) ///< [in] Which warp lane is to do the broadcasting + { + return InternalWarpScan(temp_storage).Broadcast(input, src_lane); + } + + //@} end member group + +}; + +/** @} */ // end group WarpModule + +} // CUB namespace +CUB_NS_POSTFIX // Optional outer namespace(s) diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/eclipse code style profile.xml b/hash-graph-dehornetify/externals/cub-1.8.0/eclipse code style profile.xml new file mode 100644 index 0000000..3ca7f77 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/eclipse code style profile.xml @@ -0,0 +1,155 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/.gitignore new file mode 100644 index 0000000..9dad963 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/.gitignore @@ -0,0 +1,7 @@ +/bin +/Debug +/Release +/cuda55.sdf +/cuda55.suo +/cuda60.sdf +/cuda60.suo diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/Makefile b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/Makefile new file mode 100644 index 0000000..b173c2a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/Makefile @@ -0,0 +1,128 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + +#------------------------------------------------------------------------------- +# +# Makefile usage +# +# make [sm=] [cdp=<0|1>] [force32=<0|1>] [abi=<0|1>] [open64=<0|1>] [verbose=<0|1>] [keep=<0|1>] +# +#------------------------------------------------------------------------------- + +include ../../common.mk + + +#------------------------------------------------------------------------------- +# Includes +#------------------------------------------------------------------------------- + +INC += -I$(CUB_DIR) -I$(CUB_DIR)test + + + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +DEPS = $(CUB_DEPS) \ + $(CUB_DIR)test/Makefile \ + $(CUB_DIR)test/test_util.h \ + $(CUB_DIR)test/mersenne.h \ + +ALL = example_block_radix_sort \ + example_block_reduce \ + example_block_scan + + + +#------------------------------------------------------------------------------- +# make default +#------------------------------------------------------------------------------- + +default: + + +#------------------------------------------------------------------------------- +# make clean +#------------------------------------------------------------------------------- + +clean : + rm -f bin/*$(CPU_ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o + + +#------------------------------------------------------------------------------- +# make all +#------------------------------------------------------------------------------- + +all : $(ALL) + +#------------------------------------------------------------------------------- +# make run +#------------------------------------------------------------------------------- + +run : + for i in $(ALL); do ./bin/$${i}_$(BIN_SUFFIX) --device=$(device) || exit 1; done + + + + +#------------------------------------------------------------------------------- +# make example_block_reduce +#------------------------------------------------------------------------------- + +example_block_reduce: bin/example_block_reduce_$(BIN_SUFFIX) + +bin/example_block_reduce_$(BIN_SUFFIX) : example_block_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_block_reduce_$(BIN_SUFFIX) example_block_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_block_scan +#------------------------------------------------------------------------------- + +example_block_scan: bin/example_block_scan_$(BIN_SUFFIX) + +bin/example_block_scan_$(BIN_SUFFIX) : example_block_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_block_scan_$(BIN_SUFFIX) example_block_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_block_radix_sort +#------------------------------------------------------------------------------- + +example_block_radix_sort: bin/example_block_radix_sort_$(BIN_SUFFIX) + +bin/example_block_radix_sort_$(BIN_SUFFIX) : example_block_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_block_radix_sort_$(BIN_SUFFIX) example_block_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_radix_sort.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_radix_sort.cu new file mode 100644 index 0000000..2fbeda9 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_radix_sort.cu @@ -0,0 +1,323 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple demonstration of cub::BlockRadixSort + * + * To compile using the command line: + * nvcc -arch=sm_XX example_block_radix_sort.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console (define before including cub.h) +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +/// Verbose output +bool g_verbose = false; + +/// Timing iterations +int g_timing_iterations = 100; + +/// Default grid size +int g_grid_size = 1; + +/// Uniform key samples +bool g_uniform_keys; + + +//--------------------------------------------------------------------- +// Kernels +//--------------------------------------------------------------------- + +/** + * Simple kernel for performing a block-wide sorting over integers + */ +template < + typename Key, + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +__launch_bounds__ (BLOCK_THREADS) +__global__ void BlockSortKernel( + Key *d_in, // Tile of input + Key *d_out, // Tile of output + clock_t *d_elapsed) // Elapsed cycle count of block scan +{ + enum { TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD }; + + // Specialize BlockLoad type for our thread block (uses warp-striped loads for coalescing, then transposes in shared memory to a blocked arrangement) + typedef BlockLoad BlockLoadT; + + // Specialize BlockRadixSort type for our thread block + typedef BlockRadixSort BlockRadixSortT; + + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockRadixSortT::TempStorage sort; + } temp_storage; + + // Per-thread tile items + Key items[ITEMS_PER_THREAD]; + + // Our current block's offset + int block_offset = blockIdx.x * TILE_SIZE; + + // Load items into a blocked arrangement + BlockLoadT(temp_storage.load).Load(d_in + block_offset, items); + + // Barrier for smem reuse + __syncthreads(); + + // Start cycle timer + clock_t start = clock(); + + // Sort keys + BlockRadixSortT(temp_storage.sort).SortBlockedToStriped(items); + + // Stop cycle timer + clock_t stop = clock(); + + // Store output in striped fashion + StoreDirectStriped(threadIdx.x, d_out + block_offset, items); + + // Store elapsed clocks + if (threadIdx.x == 0) + { + d_elapsed[blockIdx.x] = (start > stop) ? start - stop : stop - start; + } +} + + + +//--------------------------------------------------------------------- +// Host utilities +//--------------------------------------------------------------------- + + +/** + * Initialize sorting problem (and solution). + */ +template +void Initialize( + Key *h_in, + Key *h_reference, + int num_items, + int tile_size) +{ + for (int i = 0; i < num_items; ++i) + { + if (g_uniform_keys) + { + h_in[i] = 0; + } + else + { + RandomBits(h_in[i]); + } + h_reference[i] = h_in[i]; + } + + // Only sort the first tile + std::sort(h_reference, h_reference + tile_size); +} + + +/** + * Test BlockScan + */ +template < + typename Key, + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void Test() +{ + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Allocate host arrays + Key *h_in = new Key[TILE_SIZE * g_grid_size]; + Key *h_reference = new Key[TILE_SIZE * g_grid_size]; + clock_t *h_elapsed = new clock_t[g_grid_size]; + + // Initialize problem and reference output on host + Initialize(h_in, h_reference, TILE_SIZE * g_grid_size, TILE_SIZE); + + // Initialize device arrays + Key *d_in = NULL; + Key *d_out = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(cudaMalloc((void**)&d_in, sizeof(Key) * TILE_SIZE * g_grid_size)); + CubDebugExit(cudaMalloc((void**)&d_out, sizeof(Key) * TILE_SIZE * g_grid_size)); + CubDebugExit(cudaMalloc((void**)&d_elapsed, sizeof(clock_t) * g_grid_size)); + + // Display input problem data + if (g_verbose) + { + printf("Input data: "); + for (int i = 0; i < TILE_SIZE; i++) + std::cout << h_in[i] << ", "; + printf("\n\n"); + } + + // Kernel props + int max_sm_occupancy; + CubDebugExit(MaxSmOccupancy(max_sm_occupancy, BlockSortKernel, BLOCK_THREADS)); + + // Copy problem to device + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(Key) * TILE_SIZE * g_grid_size, cudaMemcpyHostToDevice)); + + printf("BlockRadixSort %d items (%d timing iterations, %d blocks, %d threads, %d items per thread, %d SM occupancy):\n", + TILE_SIZE * g_grid_size, g_timing_iterations, g_grid_size, BLOCK_THREADS, ITEMS_PER_THREAD, max_sm_occupancy); + fflush(stdout); + + // Run kernel once to prime caches and check result + BlockSortKernel<<>>( + d_in, + d_out, + d_elapsed); + + // Check for kernel errors and STDIO from the kernel, if any + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Check results + printf("\tOutput items: "); + int compare = CompareDeviceResults(h_reference, d_out, TILE_SIZE, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + fflush(stdout); + + // Run this several times and average the performance results + GpuTimer timer; + float elapsed_millis = 0.0; + unsigned long long elapsed_clocks = 0; + + for (int i = 0; i < g_timing_iterations; ++i) + { + timer.Start(); + + // Run kernel + BlockSortKernel<<>>( + d_in, + d_out, + d_elapsed); + + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + // Copy clocks from device + CubDebugExit(cudaMemcpy(h_elapsed, d_elapsed, sizeof(clock_t) * g_grid_size, cudaMemcpyDeviceToHost)); + for (int i = 0; i < g_grid_size; i++) + elapsed_clocks += h_elapsed[i]; + } + + // Check for kernel errors and STDIO from the kernel, if any + CubDebugExit(cudaDeviceSynchronize()); + + // Display timing results + float avg_millis = elapsed_millis / g_timing_iterations; + float avg_items_per_sec = float(TILE_SIZE * g_grid_size) / avg_millis / 1000.0f; + double avg_clocks = double(elapsed_clocks) / g_timing_iterations / g_grid_size; + double avg_clocks_per_item = avg_clocks / TILE_SIZE; + + printf("\tAverage BlockRadixSort::SortBlocked clocks: %.3f\n", avg_clocks); + printf("\tAverage BlockRadixSort::SortBlocked clocks per item: %.3f\n", avg_clocks_per_item); + printf("\tAverage kernel millis: %.4f\n", avg_millis); + printf("\tAverage million items / sec: %.4f\n", avg_items_per_sec); + fflush(stdout); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (h_elapsed) delete[] h_elapsed; + if (d_in) CubDebugExit(cudaFree(d_in)); + if (d_out) CubDebugExit(cudaFree(d_out)); + if (d_elapsed) CubDebugExit(cudaFree(d_elapsed)); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + g_uniform_keys = args.CheckCmdLineFlag("uniform"); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("grid-size", g_grid_size); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--i=]" + "[--grid-size=]" + "[--v] " + "\n", argv[0], g_timing_iterations, g_grid_size); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + fflush(stdout); + + // Run tests + printf("\nuint32:\n"); fflush(stdout); + Test(); + printf("\n"); fflush(stdout); + + printf("\nfp32:\n"); fflush(stdout); + Test(); + printf("\n"); fflush(stdout); + + printf("\nuint8:\n"); fflush(stdout); + Test(); + printf("\n"); fflush(stdout); + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_reduce.cu new file mode 100644 index 0000000..bad8001 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_reduce.cu @@ -0,0 +1,290 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple demonstration of cub::BlockReduce + * + * To compile using the command line: + * nvcc -arch=sm_XX example_block_reduce.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console (define before including cub.h) +#define CUB_STDERR + +#include +#include + +#include +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +/// Verbose output +bool g_verbose = false; + +/// Timing iterations +int g_timing_iterations = 100; + +/// Default grid size +int g_grid_size = 1; + + + +//--------------------------------------------------------------------- +// Kernels +//--------------------------------------------------------------------- + +/** + * Simple kernel for performing a block-wide exclusive prefix sum over integers + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockReduceAlgorithm ALGORITHM> +__global__ void BlockSumKernel( + int *d_in, // Tile of input + int *d_out, // Tile aggregate + clock_t *d_elapsed) // Elapsed cycle count of block reduction +{ + // Specialize BlockReduce type for our thread block + typedef BlockReduce BlockReduceT; + + // Shared memory + __shared__ typename BlockReduceT::TempStorage temp_storage; + + // Per-thread tile data + int data[ITEMS_PER_THREAD]; + LoadDirectStriped(threadIdx.x, d_in, data); + + // Start cycle timer + clock_t start = clock(); + + // Compute sum + int aggregate = BlockReduceT(temp_storage).Sum(data); + + // Stop cycle timer + clock_t stop = clock(); + + // Store aggregate and elapsed clocks + if (threadIdx.x == 0) + { + *d_elapsed = (start > stop) ? start - stop : stop - start; + *d_out = aggregate; + } +} + + + +//--------------------------------------------------------------------- +// Host utilities +//--------------------------------------------------------------------- + +/** + * Initialize reduction problem (and solution). + * Returns the aggregate + */ +int Initialize(int *h_in, int num_items) +{ + int inclusive = 0; + + for (int i = 0; i < num_items; ++i) + { + h_in[i] = i % 17; + inclusive += h_in[i]; + } + + return inclusive; +} + + +/** + * Test thread block reduction + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockReduceAlgorithm ALGORITHM> +void Test() +{ + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Allocate host arrays + int *h_in = new int[TILE_SIZE]; + int *h_gpu = new int[TILE_SIZE + 1]; + + // Initialize problem and reference output on host + int h_aggregate = Initialize(h_in, TILE_SIZE); + + // Initialize device arrays + int *d_in = NULL; + int *d_out = NULL; + clock_t *d_elapsed = NULL; + cudaMalloc((void**)&d_in, sizeof(int) * TILE_SIZE); + cudaMalloc((void**)&d_out, sizeof(int) * 1); + cudaMalloc((void**)&d_elapsed, sizeof(clock_t)); + + // Display input problem data + if (g_verbose) + { + printf("Input data: "); + for (int i = 0; i < TILE_SIZE; i++) + printf("%d, ", h_in[i]); + printf("\n\n"); + } + + // Kernel props + int max_sm_occupancy; + CubDebugExit(MaxSmOccupancy(max_sm_occupancy, BlockSumKernel, BLOCK_THREADS)); + + // Copy problem to device + cudaMemcpy(d_in, h_in, sizeof(int) * TILE_SIZE, cudaMemcpyHostToDevice); + + printf("BlockReduce algorithm %s on %d items (%d timing iterations, %d blocks, %d threads, %d items per thread, %d SM occupancy):\n", + (ALGORITHM == BLOCK_REDUCE_RAKING) ? "BLOCK_REDUCE_RAKING" : "BLOCK_REDUCE_WARP_REDUCTIONS", + TILE_SIZE, g_timing_iterations, g_grid_size, BLOCK_THREADS, ITEMS_PER_THREAD, max_sm_occupancy); + + // Run aggregate/prefix kernel + BlockSumKernel<<>>( + d_in, + d_out, + d_elapsed); + + // Check total aggregate + printf("\tAggregate: "); + int compare = CompareDeviceResults(&h_aggregate, d_out, 1, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Run this several times and average the performance results + GpuTimer timer; + float elapsed_millis = 0.0; + clock_t elapsed_clocks = 0; + + for (int i = 0; i < g_timing_iterations; ++i) + { + // Copy problem to device + cudaMemcpy(d_in, h_in, sizeof(int) * TILE_SIZE, cudaMemcpyHostToDevice); + + timer.Start(); + + // Run aggregate/prefix kernel + BlockSumKernel<<>>( + d_in, + d_out, + d_elapsed); + + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + // Copy clocks from device + clock_t clocks; + CubDebugExit(cudaMemcpy(&clocks, d_elapsed, sizeof(clock_t), cudaMemcpyDeviceToHost)); + elapsed_clocks += clocks; + + } + + // Check for kernel errors and STDIO from the kernel, if any + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Display timing results + float avg_millis = elapsed_millis / g_timing_iterations; + float avg_items_per_sec = float(TILE_SIZE * g_grid_size) / avg_millis / 1000.0f; + float avg_clocks = float(elapsed_clocks) / g_timing_iterations; + float avg_clocks_per_item = avg_clocks / TILE_SIZE; + + printf("\tAverage BlockReduce::Sum clocks: %.3f\n", avg_clocks); + printf("\tAverage BlockReduce::Sum clocks per item: %.3f\n", avg_clocks_per_item); + printf("\tAverage kernel millis: %.4f\n", avg_millis); + printf("\tAverage million items / sec: %.4f\n", avg_items_per_sec); + + // Cleanup + if (h_in) delete[] h_in; + if (h_gpu) delete[] h_gpu; + if (d_in) cudaFree(d_in); + if (d_out) cudaFree(d_out); + if (d_elapsed) cudaFree(d_elapsed); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("grid-size", g_grid_size); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--i=] " + "[--grid-size=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Run tests + Test<1024, 1, BLOCK_REDUCE_RAKING>(); + Test<512, 2, BLOCK_REDUCE_RAKING>(); + Test<256, 4, BLOCK_REDUCE_RAKING>(); + Test<128, 8, BLOCK_REDUCE_RAKING>(); + Test<64, 16, BLOCK_REDUCE_RAKING>(); + Test<32, 32, BLOCK_REDUCE_RAKING>(); + Test<16, 64, BLOCK_REDUCE_RAKING>(); + + printf("-------------\n"); + + Test<1024, 1, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<512, 2, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<256, 4, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<128, 8, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<64, 16, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<32, 32, BLOCK_REDUCE_WARP_REDUCTIONS>(); + Test<16, 64, BLOCK_REDUCE_WARP_REDUCTIONS>(); + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_scan.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_scan.cu new file mode 100644 index 0000000..fa709a5 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/example_block_scan.cu @@ -0,0 +1,334 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple demonstration of cub::BlockScan + * + * To compile using the command line: + * nvcc -arch=sm_XX example_block_scan.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console (define before including cub.h) +#define CUB_STDERR + +#include +#include + +#include +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +/// Verbose output +bool g_verbose = false; + +/// Timing iterations +int g_timing_iterations = 100; + +/// Default grid size +int g_grid_size = 1; + + + +//--------------------------------------------------------------------- +// Kernels +//--------------------------------------------------------------------- + +/** + * Simple kernel for performing a block-wide exclusive prefix sum over integers + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockScanAlgorithm ALGORITHM> +__global__ void BlockPrefixSumKernel( + int *d_in, // Tile of input + int *d_out, // Tile of output + clock_t *d_elapsed) // Elapsed cycle count of block scan +{ + // Specialize BlockLoad type for our thread block (uses warp-striped loads for coalescing, then transposes in shared memory to a blocked arrangement) + typedef BlockLoad BlockLoadT; + + // Specialize BlockStore type for our thread block (uses warp-striped loads for coalescing, then transposes in shared memory to a blocked arrangement) + typedef BlockStore BlockStoreT; + + // Specialize BlockScan type for our thread block + typedef BlockScan BlockScanT; + + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockStoreT::TempStorage store; + typename BlockScanT::TempStorage scan; + } temp_storage; + + // Per-thread tile data + int data[ITEMS_PER_THREAD]; + + // Load items into a blocked arrangement + BlockLoadT(temp_storage.load).Load(d_in, data); + + // Barrier for smem reuse + __syncthreads(); + + // Start cycle timer + clock_t start = clock(); + + // Compute exclusive prefix sum + int aggregate; + BlockScanT(temp_storage.scan).ExclusiveSum(data, data, aggregate); + + // Stop cycle timer + clock_t stop = clock(); + + // Barrier for smem reuse + __syncthreads(); + + // Store items from a blocked arrangement + BlockStoreT(temp_storage.store).Store(d_out, data); + + // Store aggregate and elapsed clocks + if (threadIdx.x == 0) + { + *d_elapsed = (start > stop) ? start - stop : stop - start; + d_out[BLOCK_THREADS * ITEMS_PER_THREAD] = aggregate; + } +} + + + +//--------------------------------------------------------------------- +// Host utilities +//--------------------------------------------------------------------- + +/** + * Initialize exclusive prefix sum problem (and solution). + * Returns the aggregate + */ +int Initialize( + int *h_in, + int *h_reference, + int num_items) +{ + int inclusive = 0; + + for (int i = 0; i < num_items; ++i) + { + h_in[i] = i % 17; + + h_reference[i] = inclusive; + inclusive += h_in[i]; + } + + return inclusive; +} + + +/** + * Test thread block scan + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockScanAlgorithm ALGORITHM> +void Test() +{ + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Allocate host arrays + int *h_in = new int[TILE_SIZE]; + int *h_reference = new int[TILE_SIZE]; + int *h_gpu = new int[TILE_SIZE + 1]; + + // Initialize problem and reference output on host + int h_aggregate = Initialize(h_in, h_reference, TILE_SIZE); + + // Initialize device arrays + int *d_in = NULL; + int *d_out = NULL; + clock_t *d_elapsed = NULL; + cudaMalloc((void**)&d_in, sizeof(int) * TILE_SIZE); + cudaMalloc((void**)&d_out, sizeof(int) * (TILE_SIZE + 1)); + cudaMalloc((void**)&d_elapsed, sizeof(clock_t)); + + // Display input problem data + if (g_verbose) + { + printf("Input data: "); + for (int i = 0; i < TILE_SIZE; i++) + printf("%d, ", h_in[i]); + printf("\n\n"); + } + + // Kernel props + int max_sm_occupancy; + CubDebugExit(MaxSmOccupancy(max_sm_occupancy, BlockPrefixSumKernel, BLOCK_THREADS)); + + // Copy problem to device + cudaMemcpy(d_in, h_in, sizeof(int) * TILE_SIZE, cudaMemcpyHostToDevice); + + printf("BlockScan algorithm %s on %d items (%d timing iterations, %d blocks, %d threads, %d items per thread, %d SM occupancy):\n", + (ALGORITHM == BLOCK_SCAN_RAKING) ? "BLOCK_SCAN_RAKING" : (ALGORITHM == BLOCK_SCAN_RAKING_MEMOIZE) ? "BLOCK_SCAN_RAKING_MEMOIZE" : "BLOCK_SCAN_WARP_SCANS", + TILE_SIZE, g_timing_iterations, g_grid_size, BLOCK_THREADS, ITEMS_PER_THREAD, max_sm_occupancy); + + // Run aggregate/prefix kernel + BlockPrefixSumKernel<<>>( + d_in, + d_out, + d_elapsed); + + // Check results + printf("\tOutput items: "); + int compare = CompareDeviceResults(h_reference, d_out, TILE_SIZE, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check total aggregate + printf("\tAggregate: "); + compare = CompareDeviceResults(&h_aggregate, d_out + TILE_SIZE, 1, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Run this several times and average the performance results + GpuTimer timer; + float elapsed_millis = 0.0; + clock_t elapsed_clocks = 0; + + for (int i = 0; i < g_timing_iterations; ++i) + { + // Copy problem to device + cudaMemcpy(d_in, h_in, sizeof(int) * TILE_SIZE, cudaMemcpyHostToDevice); + + timer.Start(); + + // Run aggregate/prefix kernel + BlockPrefixSumKernel<<>>( + d_in, + d_out, + d_elapsed); + + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + // Copy clocks from device + clock_t clocks; + CubDebugExit(cudaMemcpy(&clocks, d_elapsed, sizeof(clock_t), cudaMemcpyDeviceToHost)); + elapsed_clocks += clocks; + + } + + // Check for kernel errors and STDIO from the kernel, if any + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Display timing results + float avg_millis = elapsed_millis / g_timing_iterations; + float avg_items_per_sec = float(TILE_SIZE * g_grid_size) / avg_millis / 1000.0f; + float avg_clocks = float(elapsed_clocks) / g_timing_iterations; + float avg_clocks_per_item = avg_clocks / TILE_SIZE; + + printf("\tAverage BlockScan::Sum clocks: %.3f\n", avg_clocks); + printf("\tAverage BlockScan::Sum clocks per item: %.3f\n", avg_clocks_per_item); + printf("\tAverage kernel millis: %.4f\n", avg_millis); + printf("\tAverage million items / sec: %.4f\n", avg_items_per_sec); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (h_gpu) delete[] h_gpu; + if (d_in) cudaFree(d_in); + if (d_out) cudaFree(d_out); + if (d_elapsed) cudaFree(d_elapsed); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("grid-size", g_grid_size); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--i=]" + "[--grid-size=]" + "[--v] " + "\n", argv[0], g_timing_iterations, g_grid_size); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Run tests + Test<1024, 1, BLOCK_SCAN_RAKING>(); + Test<512, 2, BLOCK_SCAN_RAKING>(); + Test<256, 4, BLOCK_SCAN_RAKING>(); + Test<128, 8, BLOCK_SCAN_RAKING>(); + Test<64, 16, BLOCK_SCAN_RAKING>(); + Test<32, 32, BLOCK_SCAN_RAKING>(); + + printf("-------------\n"); + + Test<1024, 1, BLOCK_SCAN_RAKING_MEMOIZE>(); + Test<512, 2, BLOCK_SCAN_RAKING_MEMOIZE>(); + Test<256, 4, BLOCK_SCAN_RAKING_MEMOIZE>(); + Test<128, 8, BLOCK_SCAN_RAKING_MEMOIZE>(); + Test<64, 16, BLOCK_SCAN_RAKING_MEMOIZE>(); + Test<32, 32, BLOCK_SCAN_RAKING_MEMOIZE>(); + + printf("-------------\n"); + + Test<1024, 1, BLOCK_SCAN_WARP_SCANS>(); + Test<512, 2, BLOCK_SCAN_WARP_SCANS>(); + Test<256, 4, BLOCK_SCAN_WARP_SCANS>(); + Test<128, 8, BLOCK_SCAN_WARP_SCANS>(); + Test<64, 16, BLOCK_SCAN_WARP_SCANS>(); + Test<32, 32, BLOCK_SCAN_WARP_SCANS>(); + + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/reduce_by_key.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/reduce_by_key.cu new file mode 100644 index 0000000..d74e162 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/block/reduce_by_key.cu @@ -0,0 +1,57 @@ + + +#include + + +template < + int BLOCK_THREADS, ///< Number of CTA threads + typename KeyT, ///< Key type + typename ValueT> ///< Value type +__global__ void Kernel() +{ + // Tuple type for scanning (pairs accumulated segment-value with segment-index) + typedef cub::KeyValuePair OffsetValuePairT; + + // Reduce-value-by-segment scan operator + typedef cub::ReduceBySegmentOp ReduceBySegmentOpT; + + // Parameterized BlockDiscontinuity type for setting head flags + typedef cub::BlockDiscontinuity< + KeyT, + BLOCK_THREADS> + BlockDiscontinuityKeysT; + + // Parameterized BlockScan type + typedef cub::BlockScan< + OffsetValuePairT, + BLOCK_THREADS, + cub::BLOCK_SCAN_WARP_SCANS> + BlockScanT; + + // Shared memory + __shared__ union TempStorage + { + typename BlockScanT::TempStorage scan; // Scan storage + typename BlockDiscontinuityKeysT::TempStorage discontinuity; // Discontinuity storage + } temp_storage; + + + // Read data (each thread gets 3 items each, every 9 items is a segment) + KeyT my_keys[3] = {threadIdx.x / 3, threadIdx.x / 3, threadIdx.x / 3}; + ValueT my_values[3] = {1, 1, 1}; + + // Set head segment head flags + int my_flags[3]; + BlockDiscontinuityKeysT(temp_storage.discontinuity).FlagHeads( + my_flags, + my_keys, + cub::Inequality()); + + __syncthreads(); + + + + + + +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/.gitignore new file mode 100644 index 0000000..7032b5a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/.gitignore @@ -0,0 +1,8 @@ +/bin +/Debug +/ipch +/Release +/cuda55.sdf +/cuda55.suo +/cuda60.sdf +/cuda60.suo diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/Makefile b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/Makefile new file mode 100644 index 0000000..fea1494 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/Makefile @@ -0,0 +1,197 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + +#------------------------------------------------------------------------------- +# +# Makefile usage +# +# make [sm=] [cdp=<0|1>] [force32=<0|1>] [abi=<0|1>] [open64=<0|1>] [verbose=<0|1>] [keep=<0|1>] +# +#------------------------------------------------------------------------------- + +include ../../common.mk + + +#------------------------------------------------------------------------------- +# Includes +#------------------------------------------------------------------------------- + +INC += -I$(CUB_DIR) -I$(CUB_DIR)test + + + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +DEPS = $(CUB_DEPS) \ + $(CUB_DIR)test/Makefile \ + $(CUB_DIR)test/test_util.h \ + $(CUB_DIR)test/mersenne.h \ + +ALL = example_device_partition_flagged \ + example_device_partition_if \ + example_device_radix_sort \ + example_device_reduce \ + example_device_scan \ + example_device_select_unique \ + example_device_select_flagged \ + example_device_select_if \ + example_device_sort_find_non_trivial_runs + + + +#------------------------------------------------------------------------------- +# make default +#------------------------------------------------------------------------------- + +default: + + +#------------------------------------------------------------------------------- +# make clean +#------------------------------------------------------------------------------- + +clean : + rm -f bin/*$(CPU_ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o + + +#------------------------------------------------------------------------------- +# make all +#------------------------------------------------------------------------------- + +all : $(ALL) + +#------------------------------------------------------------------------------- +# make run +#------------------------------------------------------------------------------- + +run : + for i in $(ALL); do ./bin/$${i}_$(BIN_SUFFIX) --device=$(device) || exit 1; done + + +#------------------------------------------------------------------------------- +# make example_device_reduce +#------------------------------------------------------------------------------- + +example_device_reduce: bin/example_device_reduce_$(BIN_SUFFIX) + +bin/example_device_reduce_$(BIN_SUFFIX) : example_device_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_reduce_$(BIN_SUFFIX) example_device_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_device_partition_flagged +#------------------------------------------------------------------------------- + +example_device_partition_flagged: bin/example_device_partition_flagged_$(BIN_SUFFIX) + +bin/example_device_partition_flagged_$(BIN_SUFFIX) : example_device_partition_flagged.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_partition_flagged_$(BIN_SUFFIX) example_device_partition_flagged.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make example_device_partition_if +#------------------------------------------------------------------------------- + +example_device_partition_if: bin/example_device_partition_if_$(BIN_SUFFIX) + +bin/example_device_partition_if_$(BIN_SUFFIX) : example_device_partition_if.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_partition_if_$(BIN_SUFFIX) example_device_partition_if.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make example_device_scan +#------------------------------------------------------------------------------- + +example_device_scan: bin/example_device_scan_$(BIN_SUFFIX) + +bin/example_device_scan_$(BIN_SUFFIX) : example_device_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_scan_$(BIN_SUFFIX) example_device_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_device_radix_sort +#------------------------------------------------------------------------------- + +example_device_radix_sort: bin/example_device_radix_sort_$(BIN_SUFFIX) + +bin/example_device_radix_sort_$(BIN_SUFFIX) : example_device_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_radix_sort_$(BIN_SUFFIX) example_device_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_device_select_unique +#------------------------------------------------------------------------------- + +example_device_select_unique: bin/example_device_select_unique_$(BIN_SUFFIX) + +bin/example_device_select_unique_$(BIN_SUFFIX) : example_device_select_unique.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_select_unique_$(BIN_SUFFIX) example_device_select_unique.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_device_select_flagged +#------------------------------------------------------------------------------- + +example_device_select_flagged: bin/example_device_select_flagged_$(BIN_SUFFIX) + +bin/example_device_select_flagged_$(BIN_SUFFIX) : example_device_select_flagged.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_select_flagged_$(BIN_SUFFIX) example_device_select_flagged.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make example_device_select_if +#------------------------------------------------------------------------------- + +example_device_select_if: bin/example_device_select_if_$(BIN_SUFFIX) + +bin/example_device_select_if_$(BIN_SUFFIX) : example_device_select_if.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_select_if_$(BIN_SUFFIX) example_device_select_if.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make example_device_sort_find_non_trivial_runs +#------------------------------------------------------------------------------- + +example_device_sort_find_non_trivial_runs: bin/example_device_sort_find_non_trivial_runs_$(BIN_SUFFIX) + +bin/example_device_sort_find_non_trivial_runs_$(BIN_SUFFIX) : example_device_sort_find_non_trivial_runs.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/example_device_sort_find_non_trivial_runs_$(BIN_SUFFIX) example_device_sort_find_non_trivial_runs.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_flagged.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_flagged.cu new file mode 100644 index 0000000..ae02b3c --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_flagged.cu @@ -0,0 +1,233 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DevicePartition::Flagged(). + * + * Partition flagged items from from a sequence of int keys using a + * corresponding sequence of unsigned char flags. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_partition_flagged.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem, setting flags at distances of random length + * chosen from [1..max_segment] + */ +void Initialize( + int *h_in, + unsigned char *h_flags, + int num_items, + int max_segment) +{ + unsigned short max_short = (unsigned short) -1; + + int key = 0; + int i = 0; + while (i < num_items) + { + // Select number of repeating occurrences + unsigned short repeat; + RandomBits(repeat); + repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short)))); + repeat = CUB_MAX(1, repeat); + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + h_flags[j] = 0; + h_in[j] = key; + j++; + } + + h_flags[i] = 1; + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("Flags:\n"); + DisplayResults(h_flags, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +int Solve( + int *h_in, + unsigned char *h_flags, + int *h_reference, + int num_items) +{ + int num_selected = 0; + for (int i = 0; i < num_items; ++i) + { + if (h_flags[i]) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + else + { + h_reference[num_items - (i - num_selected) - 1] = h_in[i]; + } + } + + return num_selected; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + int max_segment = 40; // Maximum segment length + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxseg", max_segment); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--maxseg=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays + int *h_in = new int[num_items]; + int *h_reference = new int[num_items]; + unsigned char *h_flags = new unsigned char[num_items]; + + // Initialize problem and solution + Initialize(h_in, h_flags, num_items, max_segment); + int num_selected = Solve(h_in, h_flags, h_reference, num_items); + + printf("cub::DevicePartition::Flagged %d items, %d selected (avg distance %d), %d-byte elements\n", + num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int)); + fflush(stdout); + + // Allocate problem device arrays + int *d_in = NULL; + unsigned char *d_flags = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(unsigned char) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(unsigned char) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array and num selected + int *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose); + printf("\t Data %s ", compare ? "FAIL" : "PASS"); + compare |= CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_if.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_if.cu new file mode 100644 index 0000000..7bf1c16 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_partition_if.cu @@ -0,0 +1,244 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DevicePartition::If(). + * + * Partitions items from from a sequence of int keys using a + * section functor (greater-than) + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_select_if.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +/// Selection functor type +struct GreaterThan +{ + int compare; + + __host__ __device__ __forceinline__ + GreaterThan(int compare) : compare(compare) {} + + __host__ __device__ __forceinline__ + bool operator()(const int &a) const { + return (a > compare); + } +}; + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Initialize problem, setting runs of random length chosen from [1..max_segment] + */ +void Initialize( + int *h_in, + int num_items, + int max_segment) +{ + int key = 0; + int i = 0; + while (i < num_items) + { + // Randomly select number of repeating occurrences uniformly from [1..max_segment] + unsigned short max_short = (unsigned short) -1; + unsigned short repeat; + RandomBits(repeat); + repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short)))); + repeat = CUB_MAX(1, repeat); + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + h_in[j] = key; + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +template +int Solve( + int *h_in, + SelectOp select_op, + int *h_reference, + int num_items) +{ + int num_selected = 0; + for (int i = 0; i < num_items; ++i) + { + if (select_op(h_in[i])) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + else + { + h_reference[num_items - (i - num_selected) - 1] = h_in[i]; + } + } + + return num_selected; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + int max_segment = 40; // Maximum segment length + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxseg", max_segment); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--maxseg=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays + int *h_in = new int[num_items]; + int *h_reference = new int[num_items]; + + // DevicePartition a pivot index + unsigned int pivot_index; + unsigned int max_int = (unsigned int) -1; + RandomBits(pivot_index); + pivot_index = (unsigned int) ((float(pivot_index) * (float(num_items - 1) / float(max_int)))); + printf("Pivot idx: %d\n", pivot_index); fflush(stdout); + + // Initialize problem and solution + Initialize(h_in, num_items, max_segment); + GreaterThan select_op(h_in[pivot_index]); + + int num_selected = Solve(h_in, select_op, h_reference, num_items); + + printf("cub::DevicePartition::If %d items, %d selected (avg run length %d), %d-byte elements\n", + num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int)); + fflush(stdout); + + // Allocate problem device arrays + int *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array and num selected + int *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose); + printf("\t Data %s ", compare ? "FAIL" : "PASS"); + compare = compare | CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_radix_sort.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_radix_sort.cu new file mode 100644 index 0000000..1494ccb --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_radix_sort.cu @@ -0,0 +1,226 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceRadixSort::SortPairs(). + * + * Sorts an array of float keys paired with a corresponding array of int values. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_radix_sort.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Simple key-value pairing for floating point types. Distinguishes + * between positive and negative zero. + */ +struct Pair +{ + float key; + int value; + + bool operator<(const Pair &b) const + { + if (key < b.key) + return true; + + if (key > b.key) + return false; + + // Return true if key is negative zero and b.key is positive zero + unsigned int key_bits = *reinterpret_cast(const_cast(&key)); + unsigned int b_key_bits = *reinterpret_cast(const_cast(&b.key)); + unsigned int HIGH_BIT = 1u << 31; + + return ((key_bits & HIGH_BIT) != 0) && ((b_key_bits & HIGH_BIT) == 0); + } +}; + + +/** + * Initialize key-value sorting problem. + */ +void Initialize( + float *h_keys, + int *h_values, + float *h_reference_keys, + int *h_reference_values, + int num_items) +{ + Pair *h_pairs = new Pair[num_items]; + + for (int i = 0; i < num_items; ++i) + { + RandomBits(h_keys[i]); + RandomBits(h_values[i]); + h_pairs[i].key = h_keys[i]; + h_pairs[i].value = h_values[i]; + } + + if (g_verbose) + { + printf("Input keys:\n"); + DisplayResults(h_keys, num_items); + printf("\n\n"); + + printf("Input values:\n"); + DisplayResults(h_values, num_items); + printf("\n\n"); + } + + std::stable_sort(h_pairs, h_pairs + num_items); + + for (int i = 0; i < num_items; ++i) + { + h_reference_keys[i] = h_pairs[i].key; + h_reference_values[i] = h_pairs[i].value; + } + + delete[] h_pairs; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + printf("cub::DeviceRadixSort::SortPairs() %d items (%d-byte keys %d-byte values)\n", + num_items, int(sizeof(float)), int(sizeof(int))); + fflush(stdout); + + // Allocate host arrays + float *h_keys = new float[num_items]; + float *h_reference_keys = new float[num_items]; + int *h_values = new int[num_items]; + int *h_reference_values = new int[num_items]; + + // Initialize problem and solution on host + Initialize(h_keys, h_values, h_reference_keys, h_reference_values, num_items); + + // Allocate device arrays + DoubleBuffer d_keys; + DoubleBuffer d_values; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[0], sizeof(float) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[1], sizeof(float) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[0], sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[1], sizeof(int) * num_items)); + + // Allocate temporary storage + size_t temp_storage_bytes = 0; + void *d_temp_storage = NULL; + + CubDebugExit(DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Initialize device arrays + CubDebugExit(cudaMemcpy(d_keys.d_buffers[d_keys.selector], h_keys, sizeof(float) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_values.d_buffers[d_values.selector], h_values, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Run + CubDebugExit(DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference_keys, d_keys.Current(), num_items, true, g_verbose); + printf("\t Compare keys (selector %d): %s\n", d_keys.selector, compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + compare = CompareDeviceResults(h_reference_values, d_values.Current(), num_items, true, g_verbose); + printf("\t Compare values (selector %d): %s\n", d_values.selector, compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_keys) delete[] h_keys; + if (h_reference_keys) delete[] h_reference_keys; + if (h_values) delete[] h_values; + if (h_reference_values) delete[] h_reference_values; + + if (d_keys.d_buffers[0]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[0])); + if (d_keys.d_buffers[1]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[1])); + if (d_values.d_buffers[0]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[0])); + if (d_values.d_buffers[1]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[1])); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_reduce.cu new file mode 100644 index 0000000..fc8fddb --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_reduce.cu @@ -0,0 +1,180 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceReduce::Sum(). + * + * Sums an array of int keys. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_reduce.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Initialize problem + */ +void Initialize( + int *h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + h_in[i] = i; + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Compute solution + */ +void Solve( + int *h_in, + int &h_reference, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + if (i == 0) + h_reference = h_in[0]; + else + h_reference += h_in[i]; + } +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + printf("cub::DeviceReduce::Sum() %d items (%d-byte elements)\n", + num_items, (int) sizeof(int)); + fflush(stdout); + + // Allocate host arrays + int* h_in = new int[num_items]; + int h_reference; + + // Initialize problem and solution + Initialize(h_in, num_items); + Solve(h_in, h_reference, num_items); + + // Allocate problem device arrays + int *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array + int *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * 1)); + + // Request and allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(&h_reference, d_out, 1, g_verbose, g_verbose); + printf("\t%s", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_scan.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_scan.cu new file mode 100644 index 0000000..3c85526 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_scan.cu @@ -0,0 +1,186 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceScan::ExclusiveSum(). + * + * Computes an exclusive sum of int keys. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_scan.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +void Initialize( + int *h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + h_in[i] = i; + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + +/** + * Solve exclusive-scan problem + */ +int Solve( + int *h_in, + int *h_reference, + int num_items) +{ + int inclusive = 0; + int aggregate = 0; + + for (int i = 0; i < num_items; ++i) + { + h_reference[i] = inclusive; + inclusive += h_in[i]; + aggregate += h_in[i]; + } + + return aggregate; +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + printf("cub::DeviceScan::ExclusiveSum %d items (%d-byte elements)\n", + num_items, (int) sizeof(int)); + fflush(stdout); + + // Allocate host arrays + int* h_in = new int[num_items]; + int* h_reference = new int[num_items]; + + // Initialize problem and solution + Initialize(h_in, num_items); + Solve(h_in, h_reference, num_items); + + // Allocate problem device arrays + int *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array + int *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose); + printf("\t%s", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_flagged.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_flagged.cu new file mode 100644 index 0000000..12581f8 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_flagged.cu @@ -0,0 +1,233 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceSelect::Flagged(). + * + * Selects flagged items from from a sequence of int keys using a + * corresponding sequence of unsigned char flags. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_select_flagged.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem, setting flags at distances of random length + * chosen from [1..max_segment] + */ +void Initialize( + int *h_in, + unsigned char *h_flags, + int num_items, + int max_segment) +{ + unsigned short max_short = (unsigned short) -1; + + int key = 0; + int i = 0; + while (i < num_items) + { + // Select number of repeating occurrences + unsigned short repeat; + RandomBits(repeat); + repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short)))); + repeat = CUB_MAX(1, repeat); + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + h_flags[j] = 0; + h_in[j] = key; + j++; + } + + h_flags[i] = 1; + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("Flags:\n"); + DisplayResults(h_flags, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +int Solve( + int *h_in, + unsigned char *h_flags, + int *h_reference, + int num_items) +{ + int num_selected = 0; + for (int i = 0; i < num_items; ++i) + { + if (h_flags[i]) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + else + { + h_reference[num_items - (i - num_selected) - 1] = h_in[i]; + } + } + + return num_selected; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + int max_segment = 40; // Maximum segment length + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxseg", max_segment); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--maxseg=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays + int *h_in = new int[num_items]; + int *h_reference = new int[num_items]; + unsigned char *h_flags = new unsigned char[num_items]; + + // Initialize problem and solution + Initialize(h_in, h_flags, num_items, max_segment); + int num_selected = Solve(h_in, h_flags, h_reference, num_items); + + printf("cub::DeviceSelect::Flagged %d items, %d selected (avg distance %d), %d-byte elements\n", + num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int)); + fflush(stdout); + + // Allocate problem device arrays + int *d_in = NULL; + unsigned char *d_flags = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(unsigned char) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(unsigned char) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array and num selected + int *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose); + printf("\t Data %s ", compare ? "FAIL" : "PASS"); + compare |= CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_if.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_if.cu new file mode 100644 index 0000000..689c99b --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_if.cu @@ -0,0 +1,242 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceSelect::If(). + * + * Selects items from from a sequence of int keys using a + * section functor (greater-than) + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_select_if.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +/// Selection functor type +struct GreaterThan +{ + int compare; + + __host__ __device__ __forceinline__ + GreaterThan(int compare) : compare(compare) {} + + __host__ __device__ __forceinline__ + bool operator()(const int &a) const { + return (a > compare); + } +}; + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Initialize problem, setting runs of random length chosen from [1..max_segment] + */ +void Initialize( + int *h_in, + int num_items, + int max_segment) +{ + int key = 0; + int i = 0; + while (i < num_items) + { + // Randomly select number of repeating occurrences uniformly from [1..max_segment] + unsigned short max_short = (unsigned short) -1; + unsigned short repeat; + RandomBits(repeat); + repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short)))); + repeat = CUB_MAX(1, repeat); + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + h_in[j] = key; + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +template +int Solve( + int *h_in, + SelectOp select_op, + int *h_reference, + int num_items) +{ + int num_selected = 0; + for (int i = 0; i < num_items; ++i) + { + if (select_op(h_in[i])) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + else + { + h_reference[num_items - (i - num_selected) - 1] = h_in[i]; + } + } + + return num_selected; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + int max_segment = 40; // Maximum segment length + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxseg", max_segment); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--maxseg=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays + int *h_in = new int[num_items]; + int *h_reference = new int[num_items]; + + // Select a pivot index + unsigned int pivot_index; + unsigned int max_int = (unsigned int) -1; + RandomBits(pivot_index); + pivot_index = (unsigned int) ((float(pivot_index) * (float(num_items - 1) / float(max_int)))); + printf("Pivot idx: %d\n", pivot_index); fflush(stdout); + + // Initialize problem and solution + Initialize(h_in, num_items, max_segment); + GreaterThan select_op(h_in[pivot_index]); + + int num_selected = Solve(h_in, select_op, h_reference, num_items); + + printf("cub::DeviceSelect::If %d items, %d selected (avg run length %d), %d-byte elements\n", + num_items, num_selected, (num_selected > 0) ? num_items / num_selected : 0, (int) sizeof(int)); + fflush(stdout); + + // Allocate problem device arrays + int *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array and num selected + int *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose); + printf("\t Data %s ", compare ? "FAIL" : "PASS"); + compare = compare | CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_unique.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_unique.cu new file mode 100644 index 0000000..e9cefd5 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_select_unique.cu @@ -0,0 +1,221 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of DeviceSelect::Unique(). + * + * Selects the first element from each run of identical values from a sequence + * of int keys. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_select_unique.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem, setting runs of random length chosen from [1..max_segment] + */ +void Initialize( + int *h_in, + int num_items, + int max_segment) +{ + int key = 0; + int i = 0; + while (i < num_items) + { + // Randomly select number of repeating occurrences uniformly from [1..max_segment] + unsigned short max_short = (unsigned short) -1; + unsigned short repeat; + RandomBits(repeat); + repeat = (unsigned short) ((float(repeat) * (float(max_segment) / float(max_short)))); + repeat = CUB_MAX(1, repeat); + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + h_in[j] = key; + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +int Solve( + int *h_in, + int *h_reference, + int num_items) +{ + int num_selected = 0; + if (num_items > 0) + { + h_reference[num_selected] = h_in[0]; + num_selected++; + } + + for (int i = 1; i < num_items; ++i) + { + if (h_in[i] != h_in[i - 1]) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + } + + return num_selected; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = 150; + int max_segment = 40; // Maximum segment length + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxseg", max_segment); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--maxseg=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays + int* h_in = new int[num_items]; + int* h_reference = new int[num_items]; + + // Initialize problem and solution + Initialize(h_in, num_items, max_segment); + int num_selected = Solve(h_in, h_reference, num_items); + + printf("cub::DeviceSelect::Unique %d items (%d-byte elements), %d selected (avg run length %d)\n", + num_items, (int) sizeof(int), num_selected, num_items / num_selected); + fflush(stdout); + + // Allocate problem device arrays + int *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(int) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Allocate device output array and num selected + int *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run + CubDebugExit(DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose); + printf("\t Data %s ", compare ? "FAIL" : "PASS"); + compare = compare | CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + printf("\n\n"); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_sort_find_non_trivial_runs.cu b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_sort_find_non_trivial_runs.cu new file mode 100644 index 0000000..ed70248 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/examples/device/example_device_sort_find_non_trivial_runs.cu @@ -0,0 +1,384 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Simple example of sorting a sequence of keys and values (each pair is a + * randomly-selected int32 paired with its original offset in the unsorted sequence), and then + * isolating all maximal, non-trivial (having length > 1) "runs" of duplicates. + * + * To compile using the command line: + * nvcc -arch=sm_XX example_device_sort_find_non_trivial_runs.cu -I../.. -lcudart -O3 + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include +#include + +#include "../../test/test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; // Whether to display input/output to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Simple key-value pairing for using std::sort on key-value pairs. + */ +template +struct Pair +{ + Key key; + Value value; + + bool operator<(const Pair &b) const + { + return (key < b.key); + } +}; + + +/** + * Pair ostream operator + */ +template +std::ostream& operator<<(std::ostream& os, const Pair& val) +{ + os << '<' << val.key << ',' << val.value << '>'; + return os; +} + + +/** + * Initialize problem + */ +template +void Initialize( + Key *h_keys, + Value *h_values, + int num_items, + int max_key) +{ + float scale = float(max_key) / float(UINT_MAX); + for (int i = 0; i < num_items; ++i) + { + Key sample; + RandomBits(sample); + h_keys[i] = (max_key == -1) ? i : (Key) (scale * sample); + h_values[i] = i; + } + + if (g_verbose) + { + printf("Keys:\n"); + DisplayResults(h_keys, num_items); + printf("\n\n"); + + printf("Values:\n"); + DisplayResults(h_values, num_items); + printf("\n\n"); + } +} + + +/** + * Solve sorted non-trivial subrange problem. Returns the number + * of non-trivial runs found. + */ +template +int Solve( + Key *h_keys, + Value *h_values, + int num_items, + int *h_offsets_reference, + int *h_lengths_reference) +{ + // Sort + + Pair *h_pairs = new Pair[num_items]; + for (int i = 0; i < num_items; ++i) + { + h_pairs[i].key = h_keys[i]; + h_pairs[i].value = h_values[i]; + } + + std::stable_sort(h_pairs, h_pairs + num_items); + + if (g_verbose) + { + printf("Sorted pairs:\n"); + DisplayResults(h_pairs, num_items); + printf("\n\n"); + } + + // Find non-trivial runs + + Key previous = h_pairs[0].key; + int length = 1; + int num_runs = 0; + int run_begin = 0; + + for (int i = 1; i < num_items; ++i) + { + if (previous != h_pairs[i].key) + { + if (length > 1) + { + h_offsets_reference[num_runs] = run_begin; + h_lengths_reference[num_runs] = length; + num_runs++; + } + length = 1; + run_begin = i; + } + else + { + length++; + } + previous = h_pairs[i].key; + } + + if (length > 1) + { + h_offsets_reference[num_runs] = run_begin; + h_lengths_reference[num_runs] = length; + num_runs++; + } + + delete[] h_pairs; + + return num_runs; +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + typedef unsigned int Key; + typedef int Value; + + int timing_iterations = 0; + int num_items = 40; + Key max_key = 20; // Max item + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("maxkey", max_key); + args.GetCmdLineArgument("i", timing_iterations); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--i= " + "[--n= " + "[--maxkey=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Allocate host arrays (problem and reference solution) + + Key *h_keys = new Key[num_items]; + Value *h_values = new Value[num_items]; + int *h_offsets_reference = new int[num_items]; + int *h_lengths_reference = new int[num_items]; + + // Initialize key-value pairs and compute reference solution (sort them, and identify non-trivial runs) + printf("Computing reference solution on CPU for %d items (max key %d)\n", num_items, max_key); + fflush(stdout); + + Initialize(h_keys, h_values, num_items, max_key); + int num_runs = Solve(h_keys, h_values, num_items, h_offsets_reference, h_lengths_reference); + + printf("%d non-trivial runs\n", num_runs); + fflush(stdout); + + // Repeat for performance timing + GpuTimer gpu_timer; + GpuTimer gpu_rle_timer; + float elapsed_millis = 0.0; + float elapsed_rle_millis = 0.0; + for (int i = 0; i <= timing_iterations; ++i) + { + + // Allocate and initialize device arrays for sorting + DoubleBuffer d_keys; + DoubleBuffer d_values; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[0], sizeof(Key) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[1], sizeof(Key) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[0], sizeof(Value) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[1], sizeof(Value) * num_items)); + + CubDebugExit(cudaMemcpy(d_keys.d_buffers[d_keys.selector], h_keys, sizeof(float) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_values.d_buffers[d_values.selector], h_values, sizeof(int) * num_items, cudaMemcpyHostToDevice)); + + // Start timer + gpu_timer.Start(); + + // Allocate temporary storage for sorting + size_t temp_storage_bytes = 0; + void *d_temp_storage = NULL; + CubDebugExit(DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Do the sort + CubDebugExit(DeviceRadixSort::SortPairs(d_temp_storage, temp_storage_bytes, d_keys, d_values, num_items)); + + // Free unused buffers and sorting temporary storage + if (d_keys.d_buffers[d_keys.selector ^ 1]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[d_keys.selector ^ 1])); + if (d_values.d_buffers[d_values.selector ^ 1]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[d_values.selector ^ 1])); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Start timer + gpu_rle_timer.Start(); + + // Allocate device arrays for enumerating non-trivial runs + int *d_offests_out = NULL; + int *d_lengths_out = NULL; + int *d_num_runs = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_offests_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_lengths_out, sizeof(int) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_runs, sizeof(int) * 1)); + + // Allocate temporary storage for isolating non-trivial runs + d_temp_storage = NULL; + CubDebugExit(DeviceRunLengthEncode::NonTrivialRuns( + d_temp_storage, + temp_storage_bytes, + d_keys.d_buffers[d_keys.selector], + d_offests_out, + d_lengths_out, + d_num_runs, + num_items)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Do the isolation + CubDebugExit(DeviceRunLengthEncode::NonTrivialRuns( + d_temp_storage, + temp_storage_bytes, + d_keys.d_buffers[d_keys.selector], + d_offests_out, + d_lengths_out, + d_num_runs, + num_items)); + + // Free keys buffer + if (d_keys.d_buffers[d_keys.selector]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[d_keys.selector])); + + // + // Hypothetically do stuff with the original key-indices corresponding to non-trivial runs of identical keys + // + + // Stop sort timer + gpu_timer.Stop(); + gpu_rle_timer.Stop(); + + if (i == 0) + { + // First iteration is a warmup: // Check for correctness (and display results, if specified) + + printf("\nRUN OFFSETS: \n"); + int compare = CompareDeviceResults(h_offsets_reference, d_offests_out, num_runs, true, g_verbose); + printf("\t\t %s ", compare ? "FAIL" : "PASS"); + + printf("\nRUN LENGTHS: \n"); + compare |= CompareDeviceResults(h_lengths_reference, d_lengths_out, num_runs, true, g_verbose); + printf("\t\t %s ", compare ? "FAIL" : "PASS"); + + printf("\nNUM RUNS: \n"); + compare |= CompareDeviceResults(&num_runs, d_num_runs, 1, true, g_verbose); + printf("\t\t %s ", compare ? "FAIL" : "PASS"); + + AssertEquals(0, compare); + } + else + { + elapsed_millis += gpu_timer.ElapsedMillis(); + elapsed_rle_millis += gpu_rle_timer.ElapsedMillis(); + } + + // GPU cleanup + + if (d_values.d_buffers[d_values.selector]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[d_values.selector])); + if (d_offests_out) CubDebugExit(g_allocator.DeviceFree(d_offests_out)); + if (d_lengths_out) CubDebugExit(g_allocator.DeviceFree(d_lengths_out)); + if (d_num_runs) CubDebugExit(g_allocator.DeviceFree(d_num_runs)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + } + + // Host cleanup + if (h_keys) delete[] h_keys; + if (h_values) delete[] h_values; + if (h_offsets_reference) delete[] h_offsets_reference; + if (h_lengths_reference) delete[] h_lengths_reference; + + printf("\n\n"); + + if (timing_iterations > 0) + { + printf("%d timing iterations, average time to sort and isolate non-trivial duplicates: %.3f ms (%.3f ms spent in RLE isolation)\n", + timing_iterations, + elapsed_millis / timing_iterations, + elapsed_rle_millis / timing_iterations); + } + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/.gitignore new file mode 100644 index 0000000..5e56e04 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/.gitignore @@ -0,0 +1 @@ +/bin diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/Makefile b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/Makefile new file mode 100644 index 0000000..7165d93 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/Makefile @@ -0,0 +1,125 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + +#------------------------------------------------------------------------------- +# +# Makefile usage +# +# make [sm=] [cdp=<0|1>] [force32=<0|1>] [abi=<0|1>] [open64=<0|1>] [verbose=<0|1>] [keep=<0|1>] [quicktest=<0|1>] +# +#------------------------------------------------------------------------------- + +include ../common.mk + +#------------------------------------------------------------------------------- +# Commandline Options +#------------------------------------------------------------------------------- + +# [mkl=<0|1>] compile against Intel MKL +ifeq ($(mkl), 1) + DEFINES += -DCUB_MKL + +ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) + LIBS += mkl_intel_lp64.lib mkl_intel_thread.lib mkl_core.lib libiomp5md.lib + NVCCFLAGS += -Xcompiler /openmp +else + LIBS += -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -liomp5 -lpthread -lm + NVCCFLAGS += -Xcompiler -fopenmp + +endif + +endif + + +#------------------------------------------------------------------------------- +# Compiler and compilation platform +#------------------------------------------------------------------------------- + +# Includes +INC += -I$(CUB_DIR) -I$(CUB_DIR)test + +# detect OS +OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +exp_rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +EXP_DEPS = $(call rwildcard, ./,*.cuh) \ + $(call rwildcard, ./,*.h) + +DEPS = $(CUB_DEPS) \ + $(EXP_DEPS) \ + $(CUB_DIR)test/Makefile \ + $(CUB_DIR)test/test_util.h \ + $(CUB_DIR)test/mersenne.h \ + + + +#------------------------------------------------------------------------------- +# make default +#------------------------------------------------------------------------------- + +default: + + +#------------------------------------------------------------------------------- +# make clean +#------------------------------------------------------------------------------- + +clean : + rm -f bin/*$(CPU_ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o + + + +#------------------------------------------------------------------------------- +# make histogram_compare +#------------------------------------------------------------------------------- + +histogram_compare: bin/histogram_compare_$(BIN_SUFFIX) + +bin/histogram_compare_$(BIN_SUFFIX) : histogram_compare.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/histogram_compare_$(BIN_SUFFIX) histogram_compare.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + + +#------------------------------------------------------------------------------- +# make spmv_compare +#------------------------------------------------------------------------------- + +spmv_compare: bin/spmv_compare_$(BIN_SUFFIX) + +bin/spmv_compare_$(BIN_SUFFIX) : spmv_compare.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/spmv_compare_$(BIN_SUFFIX) spmv_compare.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -lcusparse $(MKL_LIBS) -O3 + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/example_coo_spmv.cu b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/example_coo_spmv.cu new file mode 100644 index 0000000..6b33e1f --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/example_coo_spmv.cu @@ -0,0 +1,1070 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * An implementation of COO SpMV using prefix scan to implement a + * reduce-value-by-row strategy + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include +#include + +#include + +#include "coo_graph.cuh" +#include "../test/test_util.h" + +using namespace cub; +using namespace std; + + +/****************************************************************************** + * Globals, constants, and typedefs + ******************************************************************************/ + +typedef int VertexId; // uint32s as vertex ids +typedef double Value; // double-precision floating point values + +bool g_verbose = false; +int g_timing_iterations = 1; +CachingDeviceAllocator g_allocator; + + +/****************************************************************************** + * Texture referencing + ******************************************************************************/ + +/** + * Templated texture reference type for multiplicand vector + */ +template +struct TexVector +{ + // Texture type to actually use (e.g., because CUDA doesn't load doubles as texture items) + typedef typename If<(Equals::VALUE), uint2, Value>::Type CastType; + + // Texture reference type + typedef texture TexRef; + + static TexRef ref; + + /** + * Bind textures + */ + static void BindTexture(void *d_in, int elements) + { + cudaChannelFormatDesc tex_desc = cudaCreateChannelDesc(); + if (d_in) + { + size_t offset; + size_t bytes = sizeof(CastType) * elements; + CubDebugExit(cudaBindTexture(&offset, ref, d_in, tex_desc, bytes)); + } + } + + /** + * Unbind textures + */ + static void UnbindTexture() + { + CubDebugExit(cudaUnbindTexture(ref)); + } + + /** + * Load + */ + static __device__ __forceinline__ Value Load(int offset) + { + Value output; + reinterpret_cast::CastType &>(output) = tex1Dfetch(TexVector::ref, offset); + return output; + } +}; + +// Texture reference definitions +template +typename TexVector::TexRef TexVector::ref = 0; + + +/****************************************************************************** + * Utility types + ******************************************************************************/ + + +/** + * A partial dot-product sum paired with a corresponding row-id + */ +template +struct PartialProduct +{ + VertexId row; /// Row-id + Value partial; /// PartialProduct sum +}; + + +/** + * A partial dot-product sum paired with a corresponding row-id (specialized for double-int pairings) + */ +template <> +struct PartialProduct +{ + long long row; /// Row-id + double partial; /// PartialProduct sum +}; + + +/** + * Reduce-value-by-row scan operator + */ +struct ReduceByKeyOp +{ + template + __device__ __forceinline__ PartialProduct operator()( + const PartialProduct &first, + const PartialProduct &second) + { + PartialProduct retval; + + retval.partial = (second.row != first.row) ? + second.partial : + first.partial + second.partial; + + retval.row = second.row; + return retval; + } +}; + + +/** + * Stateful block-wide prefix operator for BlockScan + */ +template +struct BlockPrefixCallbackOp +{ + // Running block-wide prefix + PartialProduct running_prefix; + + /** + * Returns the block-wide running_prefix in thread-0 + */ + __device__ __forceinline__ PartialProduct operator()( + const PartialProduct &block_aggregate) ///< The aggregate sum of the BlockScan inputs + { + ReduceByKeyOp scan_op; + + PartialProduct retval = running_prefix; + running_prefix = scan_op(running_prefix, block_aggregate); + return retval; + } +}; + + +/** + * Operator for detecting discontinuities in a list of row identifiers. + */ +struct NewRowOp +{ + /// Returns true if row_b is the start of a new row + template + __device__ __forceinline__ bool operator()( + const VertexId& row_a, + const VertexId& row_b) + { + return (row_a != row_b); + } +}; + + + +/****************************************************************************** + * Persistent thread block types + ******************************************************************************/ + +/** + * SpMV thread block abstraction for processing a contiguous segment of + * sparse COO tiles. + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename VertexId, + typename Value> +struct PersistentBlockSpmv +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // Constants + enum + { + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + // Head flag type + typedef int HeadFlag; + + // Partial dot product type + typedef PartialProduct PartialProduct; + + // Parameterized BlockScan type for reduce-value-by-row scan + typedef BlockScan BlockScan; + + // Parameterized BlockExchange type for exchanging rows between warp-striped -> blocked arrangements + typedef BlockExchange BlockExchangeRows; + + // Parameterized BlockExchange type for exchanging values between warp-striped -> blocked arrangements + typedef BlockExchange BlockExchangeValues; + + // Parameterized BlockDiscontinuity type for setting head-flags for each new row segment + typedef BlockDiscontinuity BlockDiscontinuity; + + // Shared memory type for this thread block + struct TempStorage + { + union + { + typename BlockExchangeRows::TempStorage exchange_rows; // Smem needed for BlockExchangeRows + typename BlockExchangeValues::TempStorage exchange_values; // Smem needed for BlockExchangeValues + struct + { + typename BlockScan::TempStorage scan; // Smem needed for BlockScan + typename BlockDiscontinuity::TempStorage discontinuity; // Smem needed for BlockDiscontinuity + }; + }; + + VertexId first_block_row; ///< The first row-ID seen by this thread block + VertexId last_block_row; ///< The last row-ID seen by this thread block + Value first_product; ///< The first dot-product written by this thread block + }; + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + TempStorage &temp_storage; + BlockPrefixCallbackOp prefix_op; + VertexId *d_rows; + VertexId *d_columns; + Value *d_values; + Value *d_vector; + Value *d_result; + PartialProduct *d_block_partials; + int block_offset; + int block_end; + + + //--------------------------------------------------------------------- + // Operations + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ + PersistentBlockSpmv( + TempStorage &temp_storage, + VertexId *d_rows, + VertexId *d_columns, + Value *d_values, + Value *d_vector, + Value *d_result, + PartialProduct *d_block_partials, + int block_offset, + int block_end) + : + temp_storage(temp_storage), + d_rows(d_rows), + d_columns(d_columns), + d_values(d_values), + d_vector(d_vector), + d_result(d_result), + d_block_partials(d_block_partials), + block_offset(block_offset), + block_end(block_end) + { + // Initialize scalar shared memory values + if (threadIdx.x == 0) + { + VertexId first_block_row = d_rows[block_offset]; + VertexId last_block_row = d_rows[block_end - 1]; + + temp_storage.first_block_row = first_block_row; + temp_storage.last_block_row = last_block_row; + temp_storage.first_product = Value(0); + + // Initialize prefix_op to identity + prefix_op.running_prefix.row = first_block_row; + prefix_op.running_prefix.partial = Value(0); + } + + __syncthreads(); + } + + + /** + * Processes a COO input tile of edges, outputting dot products for each row + */ + template + __device__ __forceinline__ void ProcessTile( + int block_offset, + int guarded_items = 0) + { + VertexId columns[ITEMS_PER_THREAD]; + VertexId rows[ITEMS_PER_THREAD]; + Value values[ITEMS_PER_THREAD]; + PartialProduct partial_sums[ITEMS_PER_THREAD]; + HeadFlag head_flags[ITEMS_PER_THREAD]; + + // Load a thread block-striped tile of A (sparse row-ids, column-ids, and values) + if (FULL_TILE) + { + // Unguarded loads + LoadDirectWarpStriped(threadIdx.x, d_columns + block_offset, columns); + LoadDirectWarpStriped(threadIdx.x, d_values + block_offset, values); + LoadDirectWarpStriped(threadIdx.x, d_rows + block_offset, rows); + } + else + { + // This is a partial-tile (e.g., the last tile of input). Extend the coordinates of the last + // vertex for out-of-bound items, but zero-valued + LoadDirectWarpStriped(threadIdx.x, d_columns + block_offset, columns, guarded_items, VertexId(0)); + LoadDirectWarpStriped(threadIdx.x, d_values + block_offset, values, guarded_items, Value(0)); + LoadDirectWarpStriped(threadIdx.x, d_rows + block_offset, rows, guarded_items, temp_storage.last_block_row); + } + + // Load the referenced values from x and compute the dot product partials sums + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { +#if CUB_PTX_ARCH >= 350 + values[ITEM] *= ThreadLoad(d_vector + columns[ITEM]); +#else + values[ITEM] *= TexVector::Load(columns[ITEM]); +#endif + } + + // Transpose from warp-striped to blocked arrangement + BlockExchangeValues(temp_storage.exchange_values).WarpStripedToBlocked(values); + + __syncthreads(); + + // Transpose from warp-striped to blocked arrangement + BlockExchangeRows(temp_storage.exchange_rows).WarpStripedToBlocked(rows); + + // Barrier for smem reuse and coherence + __syncthreads(); + + // FlagT row heads by looking for discontinuities + BlockDiscontinuity(temp_storage.discontinuity).FlagHeads( + head_flags, // (Out) Head flags + rows, // Original row ids + NewRowOp(), // Functor for detecting start of new rows + prefix_op.running_prefix.row); // Last row ID from previous tile to compare with first row ID in this tile + + // Assemble partial product structures + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + partial_sums[ITEM].partial = values[ITEM]; + partial_sums[ITEM].row = rows[ITEM]; + } + + // Reduce reduce-value-by-row across partial_sums using exclusive prefix scan + PartialProduct block_aggregate; + BlockScan(temp_storage.scan).ExclusiveScan( + partial_sums, // Scan input + partial_sums, // Scan output + ReduceByKeyOp(), // Scan operator + block_aggregate, // Block-wide total (unused) + prefix_op); // Prefix operator for seeding the block-wide scan with the running total + + // Barrier for smem reuse and coherence + __syncthreads(); + + // Scatter an accumulated dot product if it is the head of a valid row + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (head_flags[ITEM]) + { + d_result[partial_sums[ITEM].row] = partial_sums[ITEM].partial; + + // Save off the first partial product that this thread block will scatter + if (partial_sums[ITEM].row == temp_storage.first_block_row) + { + temp_storage.first_product = partial_sums[ITEM].partial; + } + } + } + } + + + /** + * Iterate over input tiles belonging to this thread block + */ + __device__ __forceinline__ + void ProcessTiles() + { + // Process full tiles + while (block_offset <= block_end - TILE_ITEMS) + { + ProcessTile(block_offset); + block_offset += TILE_ITEMS; + } + + // Process the last, partially-full tile (if present) + int guarded_items = block_end - block_offset; + if (guarded_items) + { + ProcessTile(block_offset, guarded_items); + } + + if (threadIdx.x == 0) + { + if (gridDim.x == 1) + { + // Scatter the final aggregate (this kernel contains only 1 thread block) + d_result[prefix_op.running_prefix.row] = prefix_op.running_prefix.partial; + } + else + { + // Write the first and last partial products from this thread block so + // that they can be subsequently "fixed up" in the next kernel. + + PartialProduct first_product; + first_product.row = temp_storage.first_block_row; + first_product.partial = temp_storage.first_product; + + d_block_partials[blockIdx.x * 2] = first_product; + d_block_partials[(blockIdx.x * 2) + 1] = prefix_op.running_prefix; + } + } + } +}; + + +/** + * Threadblock abstraction for "fixing up" an array of interblock SpMV partial products. + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename VertexId, + typename Value> +struct FinalizeSpmvBlock +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // Constants + enum + { + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + // Head flag type + typedef int HeadFlag; + + // Partial dot product type + typedef PartialProduct PartialProduct; + + // Parameterized BlockScan type for reduce-value-by-row scan + typedef BlockScan BlockScan; + + // Parameterized BlockDiscontinuity type for setting head-flags for each new row segment + typedef BlockDiscontinuity BlockDiscontinuity; + + // Shared memory type for this thread block + struct TempStorage + { + typename BlockScan::TempStorage scan; // Smem needed for reduce-value-by-row scan + typename BlockDiscontinuity::TempStorage discontinuity; // Smem needed for head-flagging + + VertexId last_block_row; + }; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + TempStorage &temp_storage; + BlockPrefixCallbackOp prefix_op; + Value *d_result; + PartialProduct *d_block_partials; + int num_partials; + + + //--------------------------------------------------------------------- + // Operations + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ + FinalizeSpmvBlock( + TempStorage &temp_storage, + Value *d_result, + PartialProduct *d_block_partials, + int num_partials) + : + temp_storage(temp_storage), + d_result(d_result), + d_block_partials(d_block_partials), + num_partials(num_partials) + { + // Initialize scalar shared memory values + if (threadIdx.x == 0) + { + VertexId first_block_row = d_block_partials[0].row; + VertexId last_block_row = d_block_partials[num_partials - 1].row; + temp_storage.last_block_row = last_block_row; + + // Initialize prefix_op to identity + prefix_op.running_prefix.row = first_block_row; + prefix_op.running_prefix.partial = Value(0); + } + + __syncthreads(); + } + + + /** + * Processes a COO input tile of edges, outputting dot products for each row + */ + template + __device__ __forceinline__ + void ProcessTile( + int block_offset, + int guarded_items = 0) + { + VertexId rows[ITEMS_PER_THREAD]; + PartialProduct partial_sums[ITEMS_PER_THREAD]; + HeadFlag head_flags[ITEMS_PER_THREAD]; + + // Load a tile of block partials from previous kernel + if (FULL_TILE) + { + // Full tile +#if CUB_PTX_ARCH >= 350 + LoadDirectBlocked(threadIdx.x, d_block_partials + block_offset, partial_sums); +#else + LoadDirectBlocked(threadIdx.x, d_block_partials + block_offset, partial_sums); +#endif + } + else + { + // Partial tile (extend zero-valued coordinates of the last partial-product for out-of-bounds items) + PartialProduct default_sum; + default_sum.row = temp_storage.last_block_row; + default_sum.partial = Value(0); + +#if CUB_PTX_ARCH >= 350 + LoadDirectBlocked(threadIdx.x, d_block_partials + block_offset, partial_sums, guarded_items, default_sum); +#else + LoadDirectBlocked(threadIdx.x, d_block_partials + block_offset, partial_sums, guarded_items, default_sum); +#endif + } + + // Copy out row IDs for row-head flagging + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + rows[ITEM] = partial_sums[ITEM].row; + } + + // FlagT row heads by looking for discontinuities + BlockDiscontinuity(temp_storage.discontinuity).FlagHeads( + rows, // Original row ids + head_flags, // (Out) Head flags + NewRowOp(), // Functor for detecting start of new rows + prefix_op.running_prefix.row); // Last row ID from previous tile to compare with first row ID in this tile + + // Reduce reduce-value-by-row across partial_sums using exclusive prefix scan + PartialProduct block_aggregate; + BlockScan(temp_storage.scan).ExclusiveScan( + partial_sums, // Scan input + partial_sums, // Scan output + ReduceByKeyOp(), // Scan operator + block_aggregate, // Block-wide total (unused) + prefix_op); // Prefix operator for seeding the block-wide scan with the running total + + // Scatter an accumulated dot product if it is the head of a valid row + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (head_flags[ITEM]) + { + d_result[partial_sums[ITEM].row] = partial_sums[ITEM].partial; + } + } + } + + + /** + * Iterate over input tiles belonging to this thread block + */ + __device__ __forceinline__ + void ProcessTiles() + { + // Process full tiles + int block_offset = 0; + while (block_offset <= num_partials - TILE_ITEMS) + { + ProcessTile(block_offset); + block_offset += TILE_ITEMS; + } + + // Process final partial tile (if present) + int guarded_items = num_partials - block_offset; + if (guarded_items) + { + ProcessTile(block_offset, guarded_items); + } + + // Scatter the final aggregate (this kernel contains only 1 thread block) + if (threadIdx.x == 0) + { + d_result[prefix_op.running_prefix.row] = prefix_op.running_prefix.partial; + } + } +}; + + +/****************************************************************************** + * Kernel entrypoints + ******************************************************************************/ + + + +/** + * SpMV kernel whose thread blocks each process a contiguous segment of sparse COO tiles. + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename VertexId, + typename Value> +__launch_bounds__ (BLOCK_THREADS) +__global__ void CooKernel( + GridEvenShare even_share, + PartialProduct *d_block_partials, + VertexId *d_rows, + VertexId *d_columns, + Value *d_values, + Value *d_vector, + Value *d_result) +{ + // Specialize SpMV thread block abstraction type + typedef PersistentBlockSpmv PersistentBlockSpmv; + + // Shared memory allocation + __shared__ typename PersistentBlockSpmv::TempStorage temp_storage; + + // Initialize thread block even-share to tell us where to start and stop our tile-processing + even_share.BlockInit(); + + // Construct persistent thread block + PersistentBlockSpmv persistent_block( + temp_storage, + d_rows, + d_columns, + d_values, + d_vector, + d_result, + d_block_partials, + even_share.block_offset, + even_share.block_end); + + // Process input tiles + persistent_block.ProcessTiles(); +} + + +/** + * Kernel for "fixing up" an array of interblock SpMV partial products. + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename VertexId, + typename Value> +__launch_bounds__ (BLOCK_THREADS, 1) +__global__ void CooFinalizeKernel( + PartialProduct *d_block_partials, + int num_partials, + Value *d_result) +{ + // Specialize "fix-up" thread block abstraction type + typedef FinalizeSpmvBlock FinalizeSpmvBlock; + + // Shared memory allocation + __shared__ typename FinalizeSpmvBlock::TempStorage temp_storage; + + // Construct persistent thread block + FinalizeSpmvBlock persistent_block(temp_storage, d_result, d_block_partials, num_partials); + + // Process input tiles + persistent_block.ProcessTiles(); +} + + + +//--------------------------------------------------------------------- +// Host subroutines +//--------------------------------------------------------------------- + + +/** + * Simple test of device + */ +template < + int COO_BLOCK_THREADS, + int COO_ITEMS_PER_THREAD, + int COO_SUBSCRIPTION_FACTOR, + int FINALIZE_BLOCK_THREADS, + int FINALIZE_ITEMS_PER_THREAD, + typename VertexId, + typename Value> +void TestDevice( + CooGraph& coo_graph, + Value* h_vector, + Value* h_reference) +{ + typedef PartialProduct PartialProduct; + + const int COO_TILE_SIZE = COO_BLOCK_THREADS * COO_ITEMS_PER_THREAD; + + // SOA device storage + VertexId *d_rows; // SOA graph row coordinates + VertexId *d_columns; // SOA graph col coordinates + Value *d_values; // SOA graph values + Value *d_vector; // Vector multiplicand + Value *d_result; // Output row + PartialProduct *d_block_partials; // Temporary storage for communicating dot product partials between thread blocks + + // Create SOA version of coo_graph on host + int num_edges = coo_graph.coo_tuples.size(); + VertexId *h_rows = new VertexId[num_edges]; + VertexId *h_columns = new VertexId[num_edges]; + Value *h_values = new Value[num_edges]; + for (int i = 0; i < num_edges; i++) + { + h_rows[i] = coo_graph.coo_tuples[i].row; + h_columns[i] = coo_graph.coo_tuples[i].col; + h_values[i] = coo_graph.coo_tuples[i].val; + } + + // Get CUDA properties + Device device_props; + CubDebugExit(device_props.Init()); + + // Determine launch configuration from kernel properties + int coo_sm_occupancy; + CubDebugExit(device_props.MaxSmOccupancy( + coo_sm_occupancy, + CooKernel, + COO_BLOCK_THREADS)); + int max_coo_grid_size = device_props.sm_count * coo_sm_occupancy * COO_SUBSCRIPTION_FACTOR; + + // Construct an even-share work distribution + GridEvenShare even_share(num_edges, max_coo_grid_size, COO_TILE_SIZE); + int coo_grid_size = even_share.grid_size; + int num_partials = coo_grid_size * 2; + + // Allocate COO device arrays + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_rows, sizeof(VertexId) * num_edges)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_columns, sizeof(VertexId) * num_edges)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values, sizeof(Value) * num_edges)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_vector, sizeof(Value) * coo_graph.col_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_result, sizeof(Value) * coo_graph.row_dim)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_block_partials, sizeof(PartialProduct) * num_partials)); + + // Copy host arrays to device + CubDebugExit(cudaMemcpy(d_rows, h_rows, sizeof(VertexId) * num_edges, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_columns, h_columns, sizeof(VertexId) * num_edges, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_values, h_values, sizeof(Value) * num_edges, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_vector, h_vector, sizeof(Value) * coo_graph.col_dim, cudaMemcpyHostToDevice)); + + // Bind textures + TexVector::BindTexture(d_vector, coo_graph.col_dim); + + // Print debug info + printf("CooKernel<%d, %d><<<%d, %d>>>(...), Max SM occupancy: %d\n", + COO_BLOCK_THREADS, COO_ITEMS_PER_THREAD, coo_grid_size, COO_BLOCK_THREADS, coo_sm_occupancy); + if (coo_grid_size > 1) + { + printf("CooFinalizeKernel<<<1, %d>>>(...)\n", FINALIZE_BLOCK_THREADS); + } + fflush(stdout); + + CubDebugExit(cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte)); + + // Run kernel (always run one iteration without timing) + GpuTimer gpu_timer; + float elapsed_millis = 0.0; + for (int i = 0; i <= g_timing_iterations; i++) + { + gpu_timer.Start(); + + // Initialize output + CubDebugExit(cudaMemset(d_result, 0, coo_graph.row_dim * sizeof(Value))); + + // Run the COO kernel + CooKernel<<>>( + even_share, + d_block_partials, + d_rows, + d_columns, + d_values, + d_vector, + d_result); + + if (coo_grid_size > 1) + { + // Run the COO finalize kernel + CooFinalizeKernel<<<1, FINALIZE_BLOCK_THREADS>>>( + d_block_partials, + num_partials, + d_result); + } + + gpu_timer.Stop(); + + if (i > 0) + elapsed_millis += gpu_timer.ElapsedMillis(); + } + + // Force any kernel stdio to screen + CubDebugExit(cudaThreadSynchronize()); + fflush(stdout); + + // Display timing + if (g_timing_iterations > 0) + { + float avg_elapsed = elapsed_millis / g_timing_iterations; + int total_bytes = ((sizeof(VertexId) + sizeof(VertexId)) * 2 * num_edges) + (sizeof(Value) * coo_graph.row_dim); + printf("%d iterations, average elapsed (%.3f ms), utilized bandwidth (%.3f GB/s), GFLOPS(%.3f)\n", + g_timing_iterations, + avg_elapsed, + total_bytes / avg_elapsed / 1000.0 / 1000.0, + num_edges * 2 / avg_elapsed / 1000.0 / 1000.0); + } + + // Check results + int compare = CompareDeviceResults(h_reference, d_result, coo_graph.row_dim, true, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + TexVector::UnbindTexture(); + CubDebugExit(g_allocator.DeviceFree(d_block_partials)); + CubDebugExit(g_allocator.DeviceFree(d_rows)); + CubDebugExit(g_allocator.DeviceFree(d_columns)); + CubDebugExit(g_allocator.DeviceFree(d_values)); + CubDebugExit(g_allocator.DeviceFree(d_vector)); + CubDebugExit(g_allocator.DeviceFree(d_result)); + delete[] h_rows; + delete[] h_columns; + delete[] h_values; +} + + +/** + * Compute reference answer on CPU + */ +template +void ComputeReference( + CooGraph& coo_graph, + Value* h_vector, + Value* h_reference) +{ + for (VertexId i = 0; i < coo_graph.row_dim; i++) + { + h_reference[i] = 0.0; + } + + for (VertexId i = 0; i < coo_graph.coo_tuples.size(); i++) + { + h_reference[coo_graph.coo_tuples[i].row] += + coo_graph.coo_tuples[i].val * + h_vector[coo_graph.coo_tuples[i].col]; + } +} + + +/** + * Assign arbitrary values to vector items + */ +template +void AssignVectorValues(Value *vector, int col_dim) +{ + for (int i = 0; i < col_dim; i++) + { + vector[i] = 1.0; + } +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("i", g_timing_iterations); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s\n [--device=] [--v] [--iterations=] [--grid-size=]\n" + "\t--type=wheel --spokes=\n" + "\t--type=grid2d --width= [--no-self-loops]\n" + "\t--type=grid3d --width= [--no-self-loops]\n" + "\t--type=market --file=\n" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get graph type + string type; + args.GetCmdLineArgument("type", type); + + // Generate graph structure + + CpuTimer timer; + timer.Start(); + CooGraph coo_graph; + if (type == string("grid2d")) + { + VertexId width; + args.GetCmdLineArgument("width", width); + bool self_loops = !args.CheckCmdLineFlag("no-self-loops"); + printf("Generating %s grid2d width(%d)... ", (self_loops) ? "5-pt" : "4-pt", width); fflush(stdout); + if (coo_graph.InitGrid2d(width, self_loops)) exit(1); + } else if (type == string("grid3d")) + { + VertexId width; + args.GetCmdLineArgument("width", width); + bool self_loops = !args.CheckCmdLineFlag("no-self-loops"); + printf("Generating %s grid3d width(%d)... ", (self_loops) ? "7-pt" : "6-pt", width); fflush(stdout); + if (coo_graph.InitGrid3d(width, self_loops)) exit(1); + } + else if (type == string("wheel")) + { + VertexId spokes; + args.GetCmdLineArgument("spokes", spokes); + printf("Generating wheel spokes(%d)... ", spokes); fflush(stdout); + if (coo_graph.InitWheel(spokes)) exit(1); + } + else if (type == string("market")) + { + string filename; + args.GetCmdLineArgument("file", filename); + printf("Generating MARKET for %s... ", filename.c_str()); fflush(stdout); + if (coo_graph.InitMarket(filename)) exit(1); + } + else + { + printf("Unsupported graph type\n"); + exit(1); + } + timer.Stop(); + printf("Done (%.3fs). %d non-zeros, %d rows, %d columns\n", + timer.ElapsedMillis() / 1000.0, + coo_graph.coo_tuples.size(), + coo_graph.row_dim, + coo_graph.col_dim); + fflush(stdout); + + if (g_verbose) + { + cout << coo_graph << "\n"; + } + + // Create vector + Value *h_vector = new Value[coo_graph.col_dim]; + AssignVectorValues(h_vector, coo_graph.col_dim); + if (g_verbose) + { + printf("Vector[%d]: ", coo_graph.col_dim); + DisplayResults(h_vector, coo_graph.col_dim); + printf("\n\n"); + } + + // Compute reference answer + Value *h_reference = new Value[coo_graph.row_dim]; + ComputeReference(coo_graph, h_vector, h_reference); + if (g_verbose) + { + printf("Results[%d]: ", coo_graph.row_dim); + DisplayResults(h_reference, coo_graph.row_dim); + printf("\n\n"); + } + + // Parameterization for SM35 + enum + { + COO_BLOCK_THREADS = 64, + COO_ITEMS_PER_THREAD = 10, + COO_SUBSCRIPTION_FACTOR = 4, + FINALIZE_BLOCK_THREADS = 256, + FINALIZE_ITEMS_PER_THREAD = 4, + }; + + // Run GPU version + TestDevice< + COO_BLOCK_THREADS, + COO_ITEMS_PER_THREAD, + COO_SUBSCRIPTION_FACTOR, + FINALIZE_BLOCK_THREADS, + FINALIZE_ITEMS_PER_THREAD>(coo_graph, h_vector, h_reference); + + // Cleanup + delete[] h_vector; + delete[] h_reference; + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/test_device_seg_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/test_device_seg_reduce.cu new file mode 100644 index 0000000..d2e55b9 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/defunct/test_device_seg_reduce.cu @@ -0,0 +1,2142 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * An implementation of segmented reduction using a load-balanced parallelization + * strategy based on the MergePath decision path. + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include +#include + +#include + +#include "test_util.h" + +using namespace cub; +using namespace std; + + +/****************************************************************************** + * Globals, constants, and typedefs + ******************************************************************************/ + +bool g_verbose = false; +int g_timing_iterations = 1; +CachingDeviceAllocator g_allocator(true); + + +/****************************************************************************** + * Utility routines + ******************************************************************************/ + + +/** + * An pair of index offsets + */ +template +struct IndexPair +{ + OffsetT a_idx; + OffsetT b_idx; +}; + + +/** + * Computes the begin offsets into A and B for the specified + * location (diagonal) along the merge decision path + */ +template < + int BLOCK_THREADS, + typename IteratorA, + typename IteratorB, + typename OffsetT> +__device__ __forceinline__ void ParallelMergePathSearch( + OffsetT diagonal, + IteratorA a, + IteratorB b, + IndexPair begin, // Begin offsets into a and b + IndexPair end, // End offsets into a and b + IndexPair &intersection) // [out] Intersection offsets into a and b +{ + OffsetT a_split_min = CUB_MAX(diagonal - end.b_idx, begin.a_idx); + OffsetT a_split_max = CUB_MIN(diagonal, end.a_idx); + + while (a_split_min < a_split_max) + { + OffsetT a_distance = a_split_max - a_split_min; + OffsetT a_slice = (a_distance + BLOCK_THREADS - 1) >> Log2::VALUE; + OffsetT a_split_pivot = CUB_MIN(a_split_min + (threadIdx.x * a_slice), end.a_idx - 1); + + int move_up = (a[a_split_pivot] <= b[diagonal - a_split_pivot - 1]); + int num_up = __syncthreads_count(move_up); +/* + _CubLog("a_split_min(%d), a_split_max(%d) a_distance(%d), a_slice(%d), a_split_pivot(%d), move_up(%d), num_up(%d), a_begin(%d), a_end(%d)\n", + a_split_min, a_split_max, a_distance, a_slice, a_split_pivot, move_up, num_up, a_begin, a_end); +*/ + a_split_max = CUB_MIN(num_up * a_slice, end.a_idx); + a_split_min = CUB_MAX(a_split_max - a_slice, begin.a_idx) + 1; + } + + intersection.a_idx = CUB_MIN(a_split_min, end.a_idx); + intersection.b_idx = CUB_MIN(diagonal - a_split_min, end.b_idx); +} + +/** + * Computes the begin offsets into A and B for the specified + * location (diagonal) along the merge decision path + */ +template < + typename IteratorA, + typename IteratorB, + typename OffsetT> +__device__ __forceinline__ void MergePathSearch( + OffsetT diagonal, + IteratorA a, + IteratorB b, + IndexPair begin, // Begin offsets into a and b + IndexPair end, // End offsets into a and b + IndexPair &intersection) // [out] Intersection offsets into a and b +{ + OffsetT split_min = CUB_MAX(diagonal - end.b_idx, begin.a_idx); + OffsetT split_max = CUB_MIN(diagonal, end.a_idx); + + while (split_min < split_max) + { + OffsetT split_pivot = (split_min + split_max) >> 1; + if (a[split_pivot] <= b[diagonal - split_pivot - 1]) + { + // Move candidate split range up A, down B + split_min = split_pivot + 1; + } + else + { + // Move candidate split range up B, down A + split_max = split_pivot; + } + } + + intersection.a_idx = CUB_MIN(split_min, end.a_idx); + intersection.b_idx = CUB_MIN(diagonal - split_min, end.b_idx); +} + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for BlockSegReduceRegion + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + bool _USE_SMEM_SEGMENT_CACHE, ///< Whether or not to cache incoming segment offsets in shared memory before reducing each tile + bool _USE_SMEM_VALUE_CACHE, ///< Whether or not to cache incoming values in shared memory before reducing each tile + CacheLoadModifier _LOAD_MODIFIER_SEGMENTS, ///< Cache load modifier for reading segment offsets + CacheLoadModifier _LOAD_MODIFIER_VALUES, ///< Cache load modifier for reading values + BlockReduceAlgorithm _REDUCE_ALGORITHM, ///< The BlockReduce algorithm to use + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct BlockSegReduceRegionPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + USE_SMEM_SEGMENT_CACHE = _USE_SMEM_SEGMENT_CACHE, ///< Whether or not to cache incoming segment offsets in shared memory before reducing each tile + USE_SMEM_VALUE_CACHE = _USE_SMEM_VALUE_CACHE, ///< Whether or not to cache incoming upcoming values in shared memory before reducing each tile + }; + + static const CacheLoadModifier LOAD_MODIFIER_SEGMENTS = _LOAD_MODIFIER_SEGMENTS; ///< Cache load modifier for reading segment offsets + static const CacheLoadModifier LOAD_MODIFIER_VALUES = _LOAD_MODIFIER_VALUES; ///< Cache load modifier for reading values + static const BlockReduceAlgorithm REDUCE_ALGORITHM = _REDUCE_ALGORITHM; ///< The BlockReduce algorithm to use + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + +/****************************************************************************** + * Persistent thread block types + ******************************************************************************/ + +/** + * \brief BlockSegReduceTiles implements a stateful abstraction of CUDA thread blocks for participating in device-wide segmented reduction. + */ +template < + typename BlockSegReduceRegionPolicy, ///< Parameterized BlockSegReduceRegionPolicy tuning policy + typename SegmentOffsetIterator, ///< Random-access input iterator type for reading segment end-offsets + typename ValueIterator, ///< Random-access input iterator type for reading values + typename OutputIteratorT, ///< Random-access output iterator type for writing segment reductions + typename ReductionOp, ///< Binary reduction operator type having member T operator()(const T &a, const T &b) + typename OffsetT> ///< Signed integer type for global offsets +struct BlockSegReduceRegion +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // Constants + enum + { + BLOCK_THREADS = BlockSegReduceRegionPolicy::BLOCK_THREADS, + ITEMS_PER_THREAD = BlockSegReduceRegionPolicy::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, /// Number of work items to be processed per tile + + USE_SMEM_SEGMENT_CACHE = BlockSegReduceRegionPolicy::USE_SMEM_SEGMENT_CACHE, ///< Whether or not to cache incoming segment offsets in shared memory before reducing each tile + USE_SMEM_VALUE_CACHE = BlockSegReduceRegionPolicy::USE_SMEM_VALUE_CACHE, ///< Whether or not to cache incoming upcoming values in shared memory before reducing each tile + + SMEM_SEGMENT_CACHE_ITEMS = USE_SMEM_SEGMENT_CACHE ? TILE_ITEMS : 1, + SMEM_VALUE_CACHE_ITEMS = USE_SMEM_VALUE_CACHE ? TILE_ITEMS : 1, + }; + + // Segment offset type + typedef typename std::iterator_traits::value_type SegmentOffset; + + // Value type + typedef typename std::iterator_traits::value_type Value; + + // Counting iterator type + typedef CountingInputIterator CountingIterator; + + // Segment offsets iterator wrapper type + typedef typename If<(IsPointer::VALUE), + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + SegmentOffsetIterator>::Type // Directly use the supplied input iterator type + WrappedSegmentOffsetIterator; + + // Values iterator wrapper type + typedef typename If<(IsPointer::VALUE), + CacheModifiedInputIterator, // Wrap the native input pointer with CacheModifiedInputIterator + ValueIterator>::Type // Directly use the supplied input iterator type + WrappedValueIterator; + + // Tail flag type for marking segment discontinuities + typedef int TailFlag; + + // Reduce-by-key data type tuple (segment-ID, value) + typedef KeyValuePair KeyValuePair; + + // Index pair data type + typedef IndexPair IndexPair; + + // BlockScan scan operator for reduction-by-segment + typedef ReduceByKeyOp ReduceByKeyOp; + + // Stateful BlockScan prefix callback type for managing a running total while scanning consecutive tiles + typedef RunningBlockPrefixCallbackOp< + KeyValuePair, + ReduceByKeyOp> + RunningPrefixCallbackOp; + + // Parameterized BlockShift type for exchanging index pairs + typedef BlockShift< + IndexPair, + BLOCK_THREADS> + BlockShift; + + // Parameterized BlockReduce type for block-wide reduction + typedef BlockReduce< + Value, + BLOCK_THREADS, + BlockSegReduceRegionPolicy::REDUCE_ALGORITHM> + BlockReduce; + + // Parameterized BlockScan type for block-wide reduce-value-by-key + typedef BlockScan< + KeyValuePair, + BLOCK_THREADS, + BlockSegReduceRegionPolicy::SCAN_ALGORITHM> + BlockScan; + + // Shared memory type for this thread block + struct _TempStorage + { + union + { + // Smem needed for BlockScan + typename BlockScan::TempStorage scan; + + // Smem needed for BlockReduce + typename BlockReduce::TempStorage reduce; + + struct + { + // Smem needed for communicating start/end indices between threads for a given work tile + typename BlockShift::TempStorage shift; + + // Smem needed for caching segment end-offsets + SegmentOffset cached_segment_end_offsets[SMEM_SEGMENT_CACHE_ITEMS + 1]; + }; + + // Smem needed for caching values + Value cached_values[SMEM_VALUE_CACHE_ITEMS]; + }; + + IndexPair block_region_idx[2]; // The starting [0] and ending [1] pairs of segment and value indices for the thread block's region + + // The first partial reduction tuple scattered by this thread block + KeyValuePair first_tuple; + }; + + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + _TempStorage &temp_storage; ///< Reference to shared storage + WrappedSegmentOffsetIterator d_segment_end_offsets; ///< A sequence of \p num_segments segment end-offsets + WrappedValueIterator d_values; ///< A sequence of \p num_values data to reduce + OutputIteratorT d_output; ///< A sequence of \p num_segments segment totals + CountingIterator d_value_offsets; ///< A sequence of \p num_values value-offsets + IndexPair *d_block_idx; + OffsetT num_values; ///< Total number of values to reduce + OffsetT num_segments; ///< Number of segments being reduced + Value identity; ///< Identity value (for zero-length segments) + ReductionOp reduction_op; ///< Reduction operator + ReduceByKeyOp scan_op; ///< Reduce-by-key scan operator + RunningPrefixCallbackOp prefix_op; ///< Stateful running total for block-wide prefix scan of partial reduction tuples + + + //--------------------------------------------------------------------- + // Operations + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ + BlockSegReduceRegion( + TempStorage &temp_storage, ///< Reference to shared storage + SegmentOffsetIterator d_segment_end_offsets, ///< A sequence of \p num_segments segment end-offsets + ValueIterator d_values, ///< A sequence of \p num_values values + OutputIteratorT d_output, ///< A sequence of \p num_segments segment totals + IndexPair *d_block_idx, + OffsetT num_values, ///< Number of values to reduce + OffsetT num_segments, ///< Number of segments being reduced + Value identity, ///< Identity value (for zero-length segments) + ReductionOp reduction_op) ///< Reduction operator + : + temp_storage(temp_storage.Alias()), + d_segment_end_offsets(d_segment_end_offsets), + d_values(d_values), + d_value_offsets(0), + d_output(d_output), + d_block_idx(d_block_idx), + num_values(num_values), + num_segments(num_segments), + identity(identity), + reduction_op(reduction_op), + scan_op(reduction_op), + prefix_op(scan_op) + {} + + + /** + * Fast-path single-segment tile reduction. Perform a + * simple block-wide reduction and accumulate the result into + * the running total. + */ + __device__ __forceinline__ void SingleSegmentTile( + IndexPair next_tile_idx, + IndexPair block_idx) + { + OffsetT tile_values = next_tile_idx.b_idx - block_idx.b_idx; + + // Load a tile's worth of values (using identity for out-of-bounds items) + Value values[ITEMS_PER_THREAD]; + LoadDirectStriped(threadIdx.x, d_values + block_idx.b_idx, values, tile_values, identity); + + // Barrier for smem reuse + __syncthreads(); + + // Reduce the tile of values and update the running total in thread-0 + KeyValuePair tile_aggregate; + tile_aggregate.key = block_idx.a_idx; + tile_aggregate.value = BlockReduce(temp_storage.reduce).Reduce(values, reduction_op); + + if (threadIdx.x == 0) + { + prefix_op.running_total = scan_op(prefix_op.running_total, tile_aggregate); + } + } + + /** + * Fast-path empty-segment tile reduction. Write out a tile of identity + * values to output. + */ + __device__ __forceinline__ void EmptySegmentsTile( + IndexPair next_tile_idx, + IndexPair block_idx) + { + Value segment_reductions[ITEMS_PER_THREAD]; + + if (threadIdx.x == 0) + { + // The first segment gets the running segment total + segment_reductions[0] = prefix_op.running_total.value; + + // Update the running prefix + prefix_op.running_total.value = identity; + prefix_op.running_total.key = next_tile_idx.a_idx; + } + else + { + // Remainder of segments in this tile get identity + segment_reductions[0] = identity; + } + + // Remainder of segments in this tile get identity + #pragma unroll + for (int ITEM = 1; ITEM < ITEMS_PER_THREAD; ++ITEM) + segment_reductions[ITEM] = identity; + + // Store reductions + OffsetT tile_segments = next_tile_idx.a_idx - block_idx.a_idx; + StoreDirectStriped(threadIdx.x, d_output + block_idx.a_idx, segment_reductions, tile_segments); + } + + + /** + * Multi-segment tile reduction. + */ + template + __device__ __forceinline__ void MultiSegmentTile( + IndexPair block_idx, + IndexPair thread_idx, + IndexPair next_thread_idx, + IndexPair next_tile_idx) + { + IndexPair local_thread_idx; + local_thread_idx.a_idx = thread_idx.a_idx - block_idx.a_idx; + local_thread_idx.b_idx = thread_idx.b_idx - block_idx.b_idx; + + // Check if first segment end-offset is in range + bool valid_segment = FULL_TILE || (thread_idx.a_idx < next_thread_idx.a_idx); + + // Check if first value offset is in range + bool valid_value = FULL_TILE || (thread_idx.b_idx < next_thread_idx.b_idx); + + // Load first segment end-offset + OffsetT segment_end_offset = (valid_segment) ? + (USE_SMEM_SEGMENT_CACHE)? + temp_storage.cached_segment_end_offsets[local_thread_idx.a_idx] : + d_segment_end_offsets[thread_idx.a_idx] : + -1; + + OffsetT segment_ids[ITEMS_PER_THREAD]; + OffsetT value_offsets[ITEMS_PER_THREAD]; + + KeyValuePair first_partial; + first_partial.key = thread_idx.a_idx; + first_partial.value = identity; + + // Get segment IDs and gather-offsets for values + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + segment_ids[ITEM] = -1; + value_offsets[ITEM] = -1; + + // Whether or not we slide (a) right along the segment path or (b) down the value path + if (valid_segment && (!valid_value || (segment_end_offset <= thread_idx.b_idx))) + { + // Consume this segment index + segment_ids[ITEM] = thread_idx.a_idx; + thread_idx.a_idx++; + local_thread_idx.a_idx++; + + valid_segment = FULL_TILE || (thread_idx.a_idx < next_thread_idx.a_idx); + + // Read next segment end-offset (if valid) + if (valid_segment) + { + if (USE_SMEM_SEGMENT_CACHE) + segment_end_offset = temp_storage.cached_segment_end_offsets[local_thread_idx.a_idx]; + else + segment_end_offset = d_segment_end_offsets[thread_idx.a_idx]; + } + } + else if (valid_value) + { + // Consume this value index + value_offsets[ITEM] = thread_idx.b_idx; + thread_idx.b_idx++; + local_thread_idx.b_idx++; + + valid_value = FULL_TILE || (thread_idx.b_idx < next_thread_idx.b_idx); + } + } + + // Load values + Value values[ITEMS_PER_THREAD]; + + if (USE_SMEM_VALUE_CACHE) + { + // Barrier for smem reuse + __syncthreads(); + + OffsetT tile_values = next_tile_idx.b_idx - block_idx.b_idx; + + // Load a tile's worth of values (using identity for out-of-bounds items) + LoadDirectStriped(threadIdx.x, d_values + block_idx.b_idx, values, tile_values, identity); + + // Store to shared + StoreDirectStriped(threadIdx.x, temp_storage.cached_values, values, tile_values); + + // Barrier for smem reuse + __syncthreads(); + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + values[ITEM] = (value_offsets[ITEM] == -1) ? + identity : + temp_storage.cached_values[value_offsets[ITEM] - block_idx.b_idx]; + } + } + else + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + values[ITEM] = (value_offsets[ITEM] == -1) ? + identity : + d_values[value_offsets[ITEM]]; + } + } + + // Reduce within thread segments + KeyValuePair running_total = first_partial; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + if (segment_ids[ITEM] != -1) + { + // Consume this segment index + d_output[segment_ids[ITEM]] = running_total.value; + +// _CubLog("Updating segment %d with value %lld\n", segment_ids[ITEM], running_total.value) + + if (first_partial.key == segment_ids[ITEM]) + first_partial.value = running_total.value; + + running_total.key = segment_ids[ITEM]; + running_total.value = identity; + } + + running_total.value = reduction_op(running_total.value, values[ITEM]); + } +/* + + // Barrier for smem reuse + __syncthreads(); + + // Use prefix scan to reduce values by segment-id. The segment-reductions end up in items flagged as segment-tails. + KeyValuePair block_aggregate; + BlockScan(temp_storage.scan).InclusiveScan( + pairs, // Scan input + pairs, // Scan output + scan_op, // Scan operator + block_aggregate, // Block-wide total (unused) + prefix_op); // Prefix operator for seeding the block-wide scan with the running total +*/ + +/* + // Check if first segment end-offset is in range + bool valid_segment = (thread_idx.a_idx < next_thread_idx.a_idx); + + // Check if first value offset is in range + bool valid_value = (thread_idx.b_idx < next_thread_idx.b_idx); + + // Load first segment end-offset + OffsetT segment_end_offset = (valid_segment) ? + d_segment_end_offsets[thread_idx.a_idx] : + num_values; // Out of range (the last segment end-offset is one-past the last value offset) + + // Load first value offset + OffsetT value_offset = (valid_value) ? + d_value_offsets[thread_idx.b_idx] : + num_values; // Out of range (one-past the last value offset) + + // Assemble segment-demarcating tail flags and partial reduction tuples + TailFlag tail_flags[ITEMS_PER_THREAD]; + KeyValuePair partial_reductions[ITEMS_PER_THREAD]; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + // Default tuple and flag values + partial_reductions[ITEM].key = thread_idx.a_idx; + partial_reductions[ITEM].value = identity; + tail_flags[ITEM] = 0; + + // Whether or not we slide (a) right along the segment path or (b) down the value path + if (valid_segment && (!valid_value || (segment_end_offset <= value_offset))) + { + // Consume this segment index + + // Set tail flag noting the end of the segment + tail_flags[ITEM] = 1; + + // Increment segment index + thread_idx.a_idx++; + + // Read next segment end-offset (if valid) + if ((valid_segment = (thread_idx.a_idx < next_thread_idx.a_idx))) + segment_end_offset = d_segment_end_offsets[thread_idx.a_idx]; + } + else if (valid_value) + { + // Consume this value index + + // Update the tuple's value with the value at this index. + partial_reductions[ITEM].value = d_values[value_offset]; + + // Increment value index + thread_idx.b_idx++; + + // Read next value offset (if valid) + if ((valid_value = (thread_idx.b_idx < next_thread_idx.b_idx))) + value_offset = d_value_offsets[thread_idx.b_idx]; + } + } + + // Use prefix scan to reduce values by segment-id. The segment-reductions end up in items flagged as segment-tails. + KeyValuePair block_aggregate; + BlockScan(temp_storage.scan).InclusiveScan( + partial_reductions, // Scan input + partial_reductions, // Scan output + scan_op, // Scan operator + block_aggregate, // Block-wide total (unused) + prefix_op); // Prefix operator for seeding the block-wide scan with the running total + + // The first segment index for this region (hoist?) + OffsetT first_segment_idx = temp_storage.block_idx.a_idx[0]; + + // Scatter an accumulated reduction if it is the head of a valid segment + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (tail_flags[ITEM]) + { + OffsetT segment_idx = partial_reductions[ITEM].key; + Value value = partial_reductions[ITEM].value; + + // Write value reduction to corresponding segment id + d_output[segment_idx] = value; + + // Save off the first value product that this thread block will scatter + if (segment_idx == first_segment_idx) + { + temp_storage.first_tuple.value = value; + } + } + } +*/ + } + + + + /** + * Have the thread block process the specified region of the MergePath decision path + */ + __device__ __forceinline__ void ProcessRegion( + OffsetT block_diagonal, + OffsetT next_block_diagonal, + KeyValuePair &first_tuple, // [Out] Valid in thread-0 + KeyValuePair &last_tuple) // [Out] Valid in thread-0 + { + // Thread block initialization + if (threadIdx.x < 2) + { + // Retrieve block starting and ending indices + IndexPair block_idx = {0, 0}; + if (gridDim.x > 1) + { + block_idx = d_block_idx[blockIdx.x + threadIdx.x]; + } + else if (threadIdx.x > 0) + { + block_idx.a_idx = num_segments; + block_idx.b_idx = num_values; + } + + // Share block starting and ending indices + temp_storage.block_region_idx[threadIdx.x] = block_idx; + + // Initialize the block's running prefix + if (threadIdx.x == 0) + { + prefix_op.running_total.key = block_idx.a_idx; + prefix_op.running_total.value = identity; + + // Initialize the "first scattered partial reduction tuple" to the prefix tuple (in case we don't actually scatter one) + temp_storage.first_tuple = prefix_op.running_total; + } + } + + // Ensure coherence of region indices + __syncthreads(); + + // Read block's starting indices + IndexPair block_idx = temp_storage.block_region_idx[0]; + + // Have the thread block iterate over the region + #pragma unroll 1 + while (block_diagonal < next_block_diagonal) + { + // Read block's ending indices (hoist?) + IndexPair next_block_idx = temp_storage.block_region_idx[1]; + + // Clamp the per-thread search range to within one work-tile of block's current indices + IndexPair next_tile_idx; + next_tile_idx.a_idx = CUB_MIN(next_block_idx.a_idx, block_idx.a_idx + TILE_ITEMS); + next_tile_idx.b_idx = CUB_MIN(next_block_idx.b_idx, block_idx.b_idx + TILE_ITEMS); + + // Have each thread search for the end-indices of its subranges within the segment and value inputs + IndexPair next_thread_idx; + if (USE_SMEM_SEGMENT_CACHE) + { + // Search in smem cache + OffsetT num_segments = next_tile_idx.a_idx - block_idx.a_idx; + + // Load global + SegmentOffset segment_offsets[ITEMS_PER_THREAD]; + LoadDirectStriped(threadIdx.x, d_segment_end_offsets + block_idx.a_idx, segment_offsets, num_segments, num_values); + + // Store to shared + StoreDirectStriped(threadIdx.x, temp_storage.cached_segment_end_offsets, segment_offsets); + + __syncthreads(); + + OffsetT next_thread_diagonal = block_diagonal + ((threadIdx.x + 1) * ITEMS_PER_THREAD); + + MergePathSearch( + next_thread_diagonal, // Next thread diagonal + temp_storage.cached_segment_end_offsets - block_idx.a_idx, // A (segment end-offsets) + d_value_offsets, // B (value offsets) + block_idx, // Start indices into A and B + next_tile_idx, // End indices into A and B + next_thread_idx); // [out] diagonal intersection indices into A and B + } + else + { + // Search in global + + OffsetT next_thread_diagonal = block_diagonal + ((threadIdx.x + 1) * ITEMS_PER_THREAD); + + MergePathSearch( + next_thread_diagonal, // Next thread diagonal + d_segment_end_offsets, // A (segment end-offsets) + d_value_offsets, // B (value offsets) + block_idx, // Start indices into A and B + next_tile_idx, // End indices into A and B + next_thread_idx); // [out] diagonal intersection indices into A and B + } + + // Share thread end-indices to get thread begin-indices and tile end-indices + IndexPair thread_idx; + + BlockShift(temp_storage.shift).Up( + next_thread_idx, // Input item + thread_idx, // [out] Output item + block_idx, // Prefix item to be provided to thread0 + next_tile_idx); // [out] Suffix item shifted out by the threadBLOCK_THREADS-1 to be provided to all threads + +// if (block_idx.a_idx == next_tile_idx.a_idx) +// { +// // There are no segment end-offsets in this tile. Perform a +// // simple block-wide reduction and accumulate the result into +// // the running total. +// SingleSegmentTile(next_tile_idx, block_idx); +// } +// else if (block_idx.b_idx == next_tile_idx.b_idx) +// { +// // There are no values in this tile (only empty segments). +// EmptySegmentsTile(next_tile_idx.a_idx, block_idx.a_idx); +// } +// else + if ((next_tile_idx.a_idx < num_segments) && (next_tile_idx.b_idx < num_values)) + { + // Merge the tile's segment and value indices (full tile) + MultiSegmentTile(block_idx, thread_idx, next_thread_idx, next_tile_idx); + } + else + { + // Merge the tile's segment and value indices (partially full tile) + MultiSegmentTile(block_idx, thread_idx, next_thread_idx, next_tile_idx); + } + + // Advance the block's indices in preparation for the next tile + block_idx = next_tile_idx; + + // Advance to the next region in the decision path + block_diagonal += TILE_ITEMS; + + // Barrier for smem reuse + __syncthreads(); + } + + // Get first and last tuples for the region + if (threadIdx.x == 0) + { + first_tuple = temp_storage.first_tuple; + last_tuple = prefix_op.running_total; + } + + } + + +}; + + + + + + + + +/****************************************************************************** + * Tuning policy types + ******************************************************************************/ + +/** + * Parameterizable tuning policy type for BlockSegReduceRegionByKey + */ +template < + int _BLOCK_THREADS, ///< Threads per thread block + int _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + BlockLoadAlgorithm _LOAD_ALGORITHM, ///< The BlockLoad algorithm to use + bool _LOAD_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage) + CacheLoadModifier _LOAD_MODIFIER, ///< Cache load modifier for reading input elements + BlockScanAlgorithm _SCAN_ALGORITHM> ///< The BlockScan algorithm to use +struct BlockSegReduceRegionByKeyPolicy +{ + enum + { + BLOCK_THREADS = _BLOCK_THREADS, ///< Threads per thread block + ITEMS_PER_THREAD = _ITEMS_PER_THREAD, ///< Items per thread (per tile of input) + LOAD_WARP_TIME_SLICING = _LOAD_WARP_TIME_SLICING, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage) }; + }; + + static const BlockLoadAlgorithm LOAD_ALGORITHM = _LOAD_ALGORITHM; ///< The BlockLoad algorithm to use + static const CacheLoadModifier LOAD_MODIFIER = _LOAD_MODIFIER; ///< Cache load modifier for reading input elements + static const BlockScanAlgorithm SCAN_ALGORITHM = _SCAN_ALGORITHM; ///< The BlockScan algorithm to use +}; + + +/****************************************************************************** + * Persistent thread block types + ******************************************************************************/ + +/** + * \brief BlockSegReduceRegionByKey implements a stateful abstraction of CUDA thread blocks for participating in device-wide reduce-value-by-key. + */ +template < + typename BlockSegReduceRegionByKeyPolicy, ///< Parameterized BlockSegReduceRegionByKeyPolicy tuning policy + typename InputIteratorT, ///< Random-access iterator referencing key-value input tuples + typename OutputIteratorT, ///< Random-access iterator referencing segment output totals + typename ReductionOp> ///< Binary reduction operator type having member T operator()(const T &a, const T &b) +struct BlockSegReduceRegionByKey +{ + //--------------------------------------------------------------------- + // Types and constants + //--------------------------------------------------------------------- + + // Constants + enum + { + BLOCK_THREADS = BlockSegReduceRegionByKeyPolicy::BLOCK_THREADS, + ITEMS_PER_THREAD = BlockSegReduceRegionByKeyPolicy::ITEMS_PER_THREAD, + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + // KeyValuePair input type + typedef typename std::iterator_traits::value_type KeyValuePair; + + // Signed integer type for global offsets + typedef typename KeyValuePair::Key OffsetT; + + // Value type + typedef typename KeyValuePair::Value Value; + + // Head flag type + typedef int HeadFlag; + + // Input iterator wrapper type for loading KeyValuePair elements through cache + typedef CacheModifiedInputIterator< + BlockSegReduceRegionByKeyPolicy::LOAD_MODIFIER, + KeyValuePair, + OffsetT> + WrappedInputIteratorT; + + // Parameterized BlockLoad type + typedef BlockLoad< + WrappedInputIteratorT, + BLOCK_THREADS, + ITEMS_PER_THREAD, + BlockSegReduceRegionByKeyPolicy::LOAD_ALGORITHM, + BlockSegReduceRegionByKeyPolicy::LOAD_WARP_TIME_SLICING> + BlockLoad; + + // BlockScan scan operator for reduction-by-segment + typedef ReduceByKeyOp ReduceByKeyOp; + + // Stateful BlockScan prefix callback type for managing a running total while scanning consecutive tiles + typedef RunningBlockPrefixCallbackOp< + KeyValuePair, + ReduceByKeyOp> + RunningPrefixCallbackOp; + + // Parameterized BlockScan type for block-wide reduce-value-by-key + typedef BlockScan< + KeyValuePair, + BLOCK_THREADS, + BlockSegReduceRegionByKeyPolicy::SCAN_ALGORITHM> + BlockScan; + + // Parameterized BlockDiscontinuity type for identifying key discontinuities + typedef BlockDiscontinuity< + OffsetT, + BLOCK_THREADS> + BlockDiscontinuity; + + // Operator for detecting discontinuities in a list of segment identifiers. + struct NewSegmentOp + { + /// Returns true if row_b is the start of a new row + __device__ __forceinline__ bool operator()(const OffsetT& b, const OffsetT& a) + { + return (a != b); + } + }; + + // Shared memory type for this thread block + struct _TempStorage + { + union + { + typename BlockLoad::TempStorage load; // Smem needed for tile loading + struct { + typename BlockScan::TempStorage scan; // Smem needed for reduce-value-by-segment scan + typename BlockDiscontinuity::TempStorage discontinuity; // Smem needed for head-flagging + }; + }; + }; + + // Alias wrapper allowing storage to be unioned + struct TempStorage : Uninitialized<_TempStorage> {}; + + + //--------------------------------------------------------------------- + // Thread fields + //--------------------------------------------------------------------- + + _TempStorage &temp_storage; ///< Reference to shared storage + WrappedInputIteratorT d_tuple_partials; ///< A sequence of partial reduction tuples to scan + OutputIteratorT d_output; ///< A sequence of segment totals + Value identity; ///< Identity value (for zero-length segments) + ReduceByKeyOp scan_op; ///< Reduce-by-key scan operator + RunningPrefixCallbackOp prefix_op; ///< Stateful running total for block-wide prefix scan of partial reduction tuples + + + //--------------------------------------------------------------------- + // Operations + //--------------------------------------------------------------------- + + /** + * Constructor + */ + __device__ __forceinline__ + BlockSegReduceRegionByKey( + TempStorage &temp_storage, ///< Reference to shared storage + InputIteratorT d_tuple_partials, ///< A sequence of partial reduction tuples to scan + OutputIteratorT d_output, ///< A sequence of segment totals + Value identity, ///< Identity value (for zero-length segments) + ReductionOp reduction_op) ///< Reduction operator + : + temp_storage(temp_storage.Alias()), + d_tuple_partials(d_tuple_partials), + d_output(d_output), + identity(identity), + scan_op(reduction_op), + prefix_op(scan_op) + {} + + + + /** + * Processes a reduce-value-by-key input tile, outputting reductions for each segment + */ + template + __device__ __forceinline__ + void ProcessTile( + OffsetT block_offset, + OffsetT first_segment_idx, + OffsetT last_segment_idx, + int guarded_items = TILE_ITEMS) + { + KeyValuePair partial_reductions[ITEMS_PER_THREAD]; + OffsetT segment_ids[ITEMS_PER_THREAD]; + HeadFlag head_flags[ITEMS_PER_THREAD]; + + // Load a tile of block partials from previous kernel + if (FULL_TILE) + { + // Full tile + BlockLoad(temp_storage.load).Load(d_tuple_partials + block_offset, partial_reductions); + } + else + { + KeyValuePair oob_default; + oob_default.key = last_segment_idx; // The last segment ID to be reduced + oob_default.value = identity; + + // Partially-full tile + BlockLoad(temp_storage.load).Load(d_tuple_partials + block_offset, partial_reductions, guarded_items, oob_default); + } + + // Barrier for shared memory reuse + __syncthreads(); + + // Copy the segment IDs for head-flagging + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + segment_ids[ITEM] = partial_reductions[ITEM].key; + } + + // FlagT segment heads by looking for discontinuities + BlockDiscontinuity(temp_storage.discontinuity).FlagHeads( + head_flags, // [out] Head flags + segment_ids, // Segment ids + NewSegmentOp(), // Functor for detecting start of new rows + prefix_op.running_total.key); // Last segment ID from previous tile to compare with first segment ID in this tile + + // Reduce-value-by-segment across partial_reductions using exclusive prefix scan + KeyValuePair block_aggregate; + BlockScan(temp_storage.scan).ExclusiveScan( + partial_reductions, // Scan input + partial_reductions, // Scan output + scan_op, // Scan operator + block_aggregate, // Block-wide total (unused) + prefix_op); // Prefix operator for seeding the block-wide scan with the running total + + // Scatter an accumulated reduction if it is the head of a valid segment + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ITEM++) + { + if (head_flags[ITEM]) + { + d_output[partial_reductions[ITEM].key] = partial_reductions[ITEM].value; + } + } + } + + + /** + * Iterate over input tiles belonging to this thread block + */ + __device__ __forceinline__ + void ProcessRegion( + OffsetT block_offset, + OffsetT block_end, + OffsetT first_segment_idx, + OffsetT last_segment_idx) + { + if (threadIdx.x == 0) + { + // Initialize running prefix to the first segment index paired with identity + prefix_op.running_total.key = first_segment_idx; + prefix_op.running_total.value = identity; + } + + // Process full tiles + while (block_offset + TILE_ITEMS <= block_end) + { + ProcessTile(block_offset, first_segment_idx, last_segment_idx); + __syncthreads(); + + block_offset += TILE_ITEMS; + } + + // Process final value tile (if present) + int guarded_items = block_end - block_offset; + if (guarded_items) + { + ProcessTile(block_offset, first_segment_idx, last_segment_idx, guarded_items); + } + } +}; + + + +/****************************************************************************** + * Kernel entrypoints + ******************************************************************************/ + +/** + * Segmented reduce region kernel entry point (multi-block). + */ + +template < + typename SegmentOffsetIterator, ///< Random-access input iterator type for reading segment end-offsets + typename OffsetT> ///< Signed integer type for global offsets +__global__ void SegReducePartitionKernel( + SegmentOffsetIterator d_segment_end_offsets, ///< [in] A sequence of \p num_segments segment end-offsets + IndexPair *d_block_idx, + int num_partition_samples, + OffsetT num_values, ///< [in] Number of values to reduce + OffsetT num_segments, ///< [in] Number of segments being reduced + GridEvenShare even_share) ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block +{ + // Segment offset type + typedef typename std::iterator_traits::value_type SegmentOffset; + + // Counting iterator type + typedef CountingInputIterator CountingIterator; + + // Cache-modified iterator for segment end-offsets + CacheModifiedInputIterator d_wrapped_segment_end_offsets(d_segment_end_offsets); + + // Counting iterator for value offsets + CountingIterator d_value_offsets(0); + + // Initialize even-share to tell us where to start and stop our tile-processing + int partition_id = (blockDim.x * blockIdx.x) + threadIdx.x; + even_share.Init(partition_id); + + // Search for block starting and ending indices + IndexPair start_idx = {0, 0}; + IndexPair end_idx = {num_segments, num_values}; + IndexPair block_idx; + + MergePathSearch( + even_share.block_offset, // Next thread diagonal + d_wrapped_segment_end_offsets, // A (segment end-offsets) + d_value_offsets, // B (value offsets) + start_idx, // Start indices into A and B + end_idx, // End indices into A and B + block_idx); // [out] diagonal intersection indices into A and B + + // Write output + if (partition_id < num_partition_samples) + { + d_block_idx[partition_id] = block_idx; + } +} + + +/** + * Segmented reduce region kernel entry point (multi-block). + */ +template < + typename BlockSegReduceRegionPolicy, ///< Parameterized BlockSegReduceRegionPolicy tuning policy + typename SegmentOffsetIterator, ///< Random-access input iterator type for reading segment end-offsets + typename ValueIterator, ///< Random-access input iterator type for reading values + typename OutputIteratorT, ///< Random-access output iterator type for writing segment reductions + typename ReductionOp, ///< Binary reduction operator type having member T operator()(const T &a, const T &b) + typename OffsetT, ///< Signed integer type for global offsets + typename Value> ///< Value type +__launch_bounds__ (BlockSegReduceRegionPolicy::BLOCK_THREADS) +__global__ void SegReduceRegionKernel( + SegmentOffsetIterator d_segment_end_offsets, ///< [in] A sequence of \p num_segments segment end-offsets + ValueIterator d_values, ///< [in] A sequence of \p num_values values + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + KeyValuePair *d_tuple_partials, ///< [out] A sequence of (gridDim.x * 2) partial reduction tuples + IndexPair *d_block_idx, + OffsetT num_values, ///< [in] Number of values to reduce + OffsetT num_segments, ///< [in] Number of segments being reduced + Value identity, ///< [in] Identity value (for zero-length segments) + ReductionOp reduction_op, ///< [in] Reduction operator + GridEvenShare even_share) ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block +{ + typedef KeyValuePair KeyValuePair; + + // Specialize thread block abstraction type for reducing a range of segmented values + typedef BlockSegReduceRegion< + BlockSegReduceRegionPolicy, + SegmentOffsetIterator, + ValueIterator, + OutputIteratorT, + ReductionOp, + OffsetT> + BlockSegReduceRegion; + + // Shared memory allocation + __shared__ typename BlockSegReduceRegion::TempStorage temp_storage; + + // Initialize thread block even-share to tell us where to start and stop our tile-processing + even_share.BlockInit(); + + // Construct persistent thread block + BlockSegReduceRegion thread_block( + temp_storage, + d_segment_end_offsets, + d_values, + d_output, + d_block_idx, + num_values, + num_segments, + identity, + reduction_op); + + // First and last partial reduction tuples within the range (valid in thread-0) + KeyValuePair first_tuple, last_tuple; + + // Consume block's region of work + thread_block.ProcessRegion( + even_share.block_offset, + even_share.block_end, + first_tuple, + last_tuple); + + if (threadIdx.x == 0) + { + if (gridDim.x > 1) + { + // Special case where the first segment written and the carry-out are for the same segment + if (first_tuple.key == last_tuple.key) + { + first_tuple.value = identity; + } + + // Write the first and last partial products from this thread block so + // that they can be subsequently "fixed up" in the next kernel. + d_tuple_partials[blockIdx.x * 2] = first_tuple; + d_tuple_partials[(blockIdx.x * 2) + 1] = last_tuple; + } + } + +} + + +/** + * Segmented reduce region kernel entry point (single-block). + */ +template < + typename BlockSegReduceRegionByKeyPolicy, ///< Parameterized BlockSegReduceRegionByKeyPolicy tuning policy + typename InputIteratorT, ///< Random-access iterator referencing key-value input tuples + typename OutputIteratorT, ///< Random-access iterator referencing segment output totals + typename ReductionOp, ///< Binary reduction operator type having member T operator()(const T &a, const T &b) + typename OffsetT, ///< Signed integer type for global offsets + typename Value> ///< Value type +__launch_bounds__ (BlockSegReduceRegionByKeyPolicy::BLOCK_THREADS, 1) +__global__ void SegReduceRegionByKeyKernel( + InputIteratorT d_tuple_partials, ///< [in] A sequence of partial reduction tuples + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + OffsetT num_segments, ///< [in] Number of segments in the \p d_output sequence + int num_tuple_partials, ///< [in] Number of partial reduction tuples being reduced + Value identity, ///< [in] Identity value (for zero-length segments) + ReductionOp reduction_op) ///< [in] Reduction operator +{ + // Specialize thread block abstraction type for reducing a range of values by key + typedef BlockSegReduceRegionByKey< + BlockSegReduceRegionByKeyPolicy, + InputIteratorT, + OutputIteratorT, + ReductionOp> + BlockSegReduceRegionByKey; + + // Shared memory allocation + __shared__ typename BlockSegReduceRegionByKey::TempStorage temp_storage; + + // Construct persistent thread block + BlockSegReduceRegionByKey thread_block( + temp_storage, + d_tuple_partials, + d_output, + identity, + reduction_op); + + // Process input tiles + thread_block.ProcessRegion( + 0, // Region start + num_tuple_partials, // Region end + 0, // First segment ID + num_segments); // Last segment ID (one-past) +} + + + + +/****************************************************************************** + * Dispatch + ******************************************************************************/ + +/** + * Utility class for dispatching the appropriately-tuned kernels for DeviceReduce + */ +template < + typename ValueIterator, ///< Random-access input iterator type for reading values + typename SegmentOffsetIterator, ///< Random-access input iterator type for reading segment end-offsets + typename OutputIteratorT, ///< Random-access output iterator type for writing segment reductions + typename ReductionOp, ///< Binary reduction operator type having member T operator()(const T &a, const T &b) + typename OffsetT> ///< Signed integer type for global offsets +struct DeviceSegReduceDispatch +{ + // Value type + typedef typename std::iterator_traits::value_type Value; + + // Reduce-by-key data type tuple (segment-ID, value) + typedef KeyValuePair KeyValuePair; + + // Index pair data type + typedef IndexPairIndexPair; + + + /****************************************************************************** + * Tuning policies + ******************************************************************************/ + + /// SM35 + struct Policy350 + { + // ReduceRegionPolicy + typedef BlockSegReduceRegionPolicy< + 128, ///< Threads per thread block + 6, ///< Items per thread (per tile of input) + true, ///< Whether or not to cache incoming segment offsets in shared memory before reducing each tile + false, ///< Whether or not to cache incoming values in shared memory before reducing each tile + LOAD_DEFAULT, ///< Cache load modifier for reading segment offsets + LOAD_LDG, ///< Cache load modifier for reading values + BLOCK_REDUCE_RAKING, ///< The BlockReduce algorithm to use + BLOCK_SCAN_WARP_SCANS> ///< The BlockScan algorithm to use + SegReduceRegionPolicy; + + // ReduceRegionByKeyPolicy + typedef BlockSegReduceRegionByKeyPolicy< + 256, ///< Threads per thread block + 9, ///< Items per thread (per tile of input) + BLOCK_LOAD_DIRECT, ///< The BlockLoad algorithm to use + false, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage) + LOAD_LDG, ///< Cache load modifier for reading input elements + BLOCK_SCAN_WARP_SCANS> ///< The BlockScan algorithm to use + SegReduceRegionByKeyPolicy; + }; + + + /// SM10 + struct Policy100 + { + // ReduceRegionPolicy + typedef BlockSegReduceRegionPolicy< + 128, ///< Threads per thread block + 3, ///< Items per thread (per tile of input) + false, ///< Whether or not to cache incoming segment offsets in shared memory before reducing each tile + false, ///< Whether or not to cache incoming values in shared memory before reducing each tile + LOAD_DEFAULT, ///< Cache load modifier for reading segment offsets + LOAD_DEFAULT, ///< Cache load modifier for reading values + BLOCK_REDUCE_RAKING, ///< The BlockReduce algorithm to use + BLOCK_SCAN_RAKING> ///< The BlockScan algorithm to use + SegReduceRegionPolicy; + + // ReduceRegionByKeyPolicy + typedef BlockSegReduceRegionByKeyPolicy< + 128, ///< Threads per thread block + 3, ///< Items per thread (per tile of input) + BLOCK_LOAD_WARP_TRANSPOSE, ///< The BlockLoad algorithm to use + false, ///< Whether or not only one warp's worth of shared memory should be allocated and time-sliced among block-warps during any load-related data transpositions (versus each warp having its own storage) + LOAD_DEFAULT, ///< Cache load modifier for reading input elements + BLOCK_SCAN_WARP_SCANS> ///< The BlockScan algorithm to use + SegReduceRegionByKeyPolicy; + }; + + + /****************************************************************************** + * Tuning policies of current PTX compiler pass + ******************************************************************************/ + +#if (CUB_PTX_ARCH >= 350) + typedef Policy350 PtxPolicy; +/* +#elif (CUB_PTX_ARCH >= 300) + typedef Policy300 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 200) + typedef Policy200 PtxPolicy; + +#elif (CUB_PTX_ARCH >= 130) + typedef Policy130 PtxPolicy; +*/ +#else + typedef Policy100 PtxPolicy; + +#endif + + // "Opaque" policies (whose parameterizations aren't reflected in the type signature) + struct PtxSegReduceRegionPolicy : PtxPolicy::SegReduceRegionPolicy {}; + struct PtxSegReduceRegionByKeyPolicy : PtxPolicy::SegReduceRegionByKeyPolicy {}; + + + /****************************************************************************** + * Utilities + ******************************************************************************/ + + /** + * Initialize kernel dispatch configurations with the policies corresponding to the PTX assembly we will use + */ + template < + typename SegReduceKernelConfig, + typename SegReduceByKeyKernelConfig> + __host__ __device__ __forceinline__ + static void InitConfigs( + int ptx_version, + SegReduceKernelConfig &seg_reduce_region_config, + SegReduceByKeyKernelConfig &seg_reduce_region_by_key_config) + { + #if (CUB_PTX_ARCH > 0) + + // We're on the device, so initialize the kernel dispatch configurations with the current PTX policy + seg_reduce_region_config.Init(); + seg_reduce_region_by_key_config.Init(); + + #else + + // We're on the host, so lookup and initialize the kernel dispatch configurations with the policies that match the device's PTX version + if (ptx_version >= 350) + { + seg_reduce_region_config.template Init(); + seg_reduce_region_by_key_config.template Init(); + } +/* + else if (ptx_version >= 300) + { + seg_reduce_region_config.template Init(); + seg_reduce_region_by_key_config.template Init(); + } + else if (ptx_version >= 200) + { + seg_reduce_region_config.template Init(); + seg_reduce_region_by_key_config.template Init(); + } + else if (ptx_version >= 130) + { + seg_reduce_region_config.template Init(); + seg_reduce_region_by_key_config.template Init(); + } +*/ + else + { + seg_reduce_region_config.template Init(); + seg_reduce_region_by_key_config.template Init(); + } + + #endif + } + + + /** + * SegReduceRegionKernel kernel dispatch configuration + */ + struct SegReduceKernelConfig + { + int block_threads; + int items_per_thread; + bool use_smem_segment_cache; + bool use_smem_value_cache; + CacheLoadModifier load_modifier_segments; + CacheLoadModifier load_modifier_values; + BlockReduceAlgorithm reduce_algorithm; + BlockScanAlgorithm scan_algorithm; + + template + __host__ __device__ __forceinline__ + void Init() + { + block_threads = SegReduceRegionPolicy::BLOCK_THREADS; + items_per_thread = SegReduceRegionPolicy::ITEMS_PER_THREAD; + use_smem_segment_cache = SegReduceRegionPolicy::USE_SMEM_SEGMENT_CACHE; + use_smem_value_cache = SegReduceRegionPolicy::USE_SMEM_VALUE_CACHE; + load_modifier_segments = SegReduceRegionPolicy::LOAD_MODIFIER_SEGMENTS; + load_modifier_values = SegReduceRegionPolicy::LOAD_MODIFIER_VALUES; + reduce_algorithm = SegReduceRegionPolicy::REDUCE_ALGORITHM; + scan_algorithm = SegReduceRegionPolicy::SCAN_ALGORITHM; + } + }; + + /** + * SegReduceRegionByKeyKernel kernel dispatch configuration + */ + struct SegReduceByKeyKernelConfig + { + int block_threads; + int items_per_thread; + BlockLoadAlgorithm load_algorithm; + bool load_warp_time_slicing; + CacheLoadModifier load_modifier; + BlockScanAlgorithm scan_algorithm; + + template + __host__ __device__ __forceinline__ + void Init() + { + block_threads = SegReduceRegionByKeyPolicy::BLOCK_THREADS; + items_per_thread = SegReduceRegionByKeyPolicy::ITEMS_PER_THREAD; + load_algorithm = SegReduceRegionByKeyPolicy::LOAD_ALGORITHM; + load_warp_time_slicing = SegReduceRegionByKeyPolicy::LOAD_WARP_TIME_SLICING; + load_modifier = SegReduceRegionByKeyPolicy::LOAD_MODIFIER; + scan_algorithm = SegReduceRegionByKeyPolicy::SCAN_ALGORITHM; + } + }; + + + /****************************************************************************** + * Dispatch entrypoints + ******************************************************************************/ + + /** + * Internal dispatch routine for computing a device-wide segmented reduction. + */ + template < + typename SegReducePartitionKernelPtr, + typename SegReduceRegionKernelPtr, ///< Function type of cub::SegReduceRegionKernel + typename SegReduceRegionByKeyKernelPtr> ///< Function type of cub::SegReduceRegionByKeyKernel + __host__ __device__ __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation. + ValueIterator d_values, ///< [in] A sequence of \p num_values data to reduce + SegmentOffsetIterator d_segment_offsets, ///< [in] A sequence of (\p num_segments + 1) segment offsets + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + OffsetT num_values, ///< [in] Total number of values to reduce + OffsetT num_segments, ///< [in] Number of segments being reduced + Value identity, ///< [in] Identity value (for zero-length segments) + ReductionOp reduction_op, ///< [in] Reduction operator + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous, ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + int sm_version, ///< [in] SM version of target device to use when computing SM occupancy + SegReducePartitionKernelPtr seg_reduce_partition_kernel, ///< [in] Kernel function pointer to parameterization of cub::SegReduceRegionKernel + SegReduceRegionKernelPtr seg_reduce_region_kernel, ///< [in] Kernel function pointer to parameterization of cub::SegReduceRegionKernel + SegReduceRegionByKeyKernelPtr seg_reduce_region_by_key_kernel, ///< [in] Kernel function pointer to parameterization of cub::SegReduceRegionByKeyKernel + SegReduceKernelConfig &seg_reduce_region_config, ///< [in] Dispatch parameters that match the policy that \p seg_reduce_region_kernel was compiled for + SegReduceByKeyKernelConfig &seg_reduce_region_by_key_config) ///< [in] Dispatch parameters that match the policy that \p seg_reduce_region_by_key_kernel was compiled for + { +#ifndef CUB_RUNTIME_ENABLED + + // Kernel launch not supported from this device + return CubDebug(cudaErrorNotSupported ); + +#else + + cudaError error = cudaSuccess; + do + { + // Dispatch two kernels: (1) a multi-block segmented reduction + // to reduce regions by block, and (2) a single-block reduce-by-key kernel + // to "fix up" segments spanning more than one region. + + // Tile size of seg_reduce_region_kernel + int tile_size = seg_reduce_region_config.block_threads * seg_reduce_region_config.items_per_thread; + + // Get device ordinal + int device_ordinal; + if (CubDebug(error = cudaGetDevice(&device_ordinal))) break; + + // Get SM count + int sm_count; + if (CubDebug(error = cudaDeviceGetAttribute (&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal))) break; + + // Get SM occupancy for histogram_region_kernel + int seg_reduce_region_sm_occupancy; + if (CubDebug(error = MaxSmOccupancy( + seg_reduce_region_sm_occupancy, + sm_version, + seg_reduce_region_kernel, + seg_reduce_region_config.block_threads))) break; + + // Get device occupancy for histogram_region_kernel + int seg_reduce_region_occupancy = seg_reduce_region_sm_occupancy * sm_count; + + // Even-share work distribution + int num_diagonals = num_values + num_segments; // Total number of work items + int subscription_factor = seg_reduce_region_sm_occupancy; // Amount of CTAs to oversubscribe the device beyond actively-resident (heuristic) + int max_grid_size = seg_reduce_region_occupancy * subscription_factor; + GridEvenShareeven_share( + num_diagonals, + max_grid_size, + tile_size); + + // Get grid size for seg_reduce_region_kernel + int seg_reduce_region_grid_size = even_share.grid_size; + + // Number of "fix-up" reduce-by-key tuples (2 per thread block) + int num_tuple_partials = seg_reduce_region_grid_size * 2; + int num_partition_samples = seg_reduce_region_grid_size + 1; + + // Temporary storage allocation requirements + void* allocations[2]; + size_t allocation_sizes[2] = + { + num_tuple_partials * sizeof(KeyValuePair), // bytes needed for "fix-up" reduce-by-key tuples + num_partition_samples * sizeof(IndexPair), // bytes needed block indices + }; + + // Alias the temporary allocations from the single storage blob (or set the necessary size of the blob) + if (CubDebug(error = AliasTemporaries(d_temp_storage, temp_storage_bytes, allocations, allocation_sizes))) break; + if (d_temp_storage == NULL) + { + // Return if the caller is simply requesting the size of the storage allocation + return cudaSuccess; + } + + // Alias the allocations + KeyValuePair *d_tuple_partials = (KeyValuePair*) allocations[0]; // "fix-up" tuples + IndexPair *d_block_idx = (IndexPair *) allocations[1]; // block starting/ending indices + + // Array of segment end-offsets + SegmentOffsetIterator d_segment_end_offsets = d_segment_offsets + 1; + + // Grid launch params for seg_reduce_partition_kernel + int partition_block_size = 32; + int partition_grid_size = (num_partition_samples + partition_block_size - 1) / partition_block_size; + + // Partition work among multiple thread blocks if necessary + if (seg_reduce_region_grid_size > 1) + { + // Log seg_reduce_partition_kernel configuration + if (debug_synchronous) _CubLog("Invoking seg_reduce_partition_kernel<<<%d, %d, 0, %lld>>>()\n", + partition_grid_size, partition_block_size, (long long) stream); + + // Invoke seg_reduce_partition_kernel + seg_reduce_partition_kernel<<>>( + d_segment_end_offsets, ///< [in] A sequence of \p num_segments segment end-offsets + d_block_idx, + num_partition_samples, + num_values, ///< [in] Number of values to reduce + num_segments, ///< [in] Number of segments being reduced + even_share); ///< [in] Even-share descriptor for mapping an equal number of tiles onto each thread block + + // Sync the stream if specified + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } + + // Log seg_reduce_region_kernel configuration + if (debug_synchronous) _CubLog("Invoking seg_reduce_region_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread, %d SM occupancy\n", + seg_reduce_region_grid_size, seg_reduce_region_config.block_threads, (long long) stream, seg_reduce_region_config.items_per_thread, seg_reduce_region_sm_occupancy); + + // Mooch + if (CubDebug(error = cudaDeviceSetSharedMemConfig(cudaSharedMemBankSizeEightByte))) break; + + // Invoke seg_reduce_region_kernel + seg_reduce_region_kernel<<>>( + d_segment_end_offsets, + d_values, + d_output, + d_tuple_partials, + d_block_idx, + num_values, + num_segments, + identity, + reduction_op, + even_share); + + // Sync the stream if specified + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; +/* + // Perform "fix-up" of region partial reductions if grid size is greater than one thread block + if (seg_reduce_region_grid_size > 1) + { + // Log seg_reduce_region_by_key_kernel configuration + if (debug_synchronous) _CubLog("Invoking seg_reduce_region_by_key_kernel<<<%d, %d, 0, %lld>>>(), %d items per thread\n", + 1, seg_reduce_region_by_key_config.block_threads, (long long) stream, seg_reduce_region_by_key_config.items_per_thread); + + // Invoke seg_reduce_region_by_key_kernel + seg_reduce_region_by_key_kernel<<<1, seg_reduce_region_by_key_config.block_threads, 0, stream>>>( + d_tuple_partials, + d_output, + num_segments, + num_tuple_partials, + identity, + reduction_op); + + // Sync the stream if specified + if (debug_synchronous && (CubDebug(error = SyncStream(stream)))) break; + } +*/ + } + + while (0); + + return error; + +#endif // CUB_RUNTIME_ENABLED + } + + + /** + * Internal dispatch routine for computing a device-wide segmented reduction. + */ + __host__ __device__ __forceinline__ + static cudaError_t Dispatch( + void* d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation. + ValueIterator d_values, ///< [in] A sequence of \p num_values data to reduce + SegmentOffsetIterator d_segment_offsets, ///< [in] A sequence of (\p num_segments + 1) segment offsets + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + OffsetT num_values, ///< [in] Total number of values to reduce + OffsetT num_segments, ///< [in] Number of segments being reduced + Value identity, ///< [in] Identity value (for zero-length segments) + ReductionOp reduction_op, ///< [in] Reduction operator + cudaStream_t stream, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + cudaError error = cudaSuccess; + do + { + // Get PTX version + int ptx_version; + #if (CUB_PTX_ARCH == 0) + if (CubDebug(error = PtxVersion(ptx_version))) break; + #else + ptx_version = CUB_PTX_ARCH; + #endif + + // Get kernel kernel dispatch configurations + SegReduceKernelConfig seg_reduce_region_config; + SegReduceByKeyKernelConfig seg_reduce_region_by_key_config; + + InitConfigs(ptx_version, seg_reduce_region_config, seg_reduce_region_by_key_config); + + // Dispatch + if (CubDebug(error = Dispatch( + d_temp_storage, + temp_storage_bytes, + d_values, + d_segment_offsets, + d_output, + num_values, + num_segments, + identity, + reduction_op, + stream, + debug_synchronous, + ptx_version, // Use PTX version instead of SM version because, as a statically known quantity, this improves device-side launch dramatically but at the risk of imprecise occupancy calculation for mismatches + SegReducePartitionKernel, + SegReduceRegionKernel, + SegReduceRegionByKeyKernel, + seg_reduce_region_config, + seg_reduce_region_by_key_config))) break; + } + while (0); + + return error; + + } +}; + + + + +/****************************************************************************** + * DeviceSegReduce + *****************************************************************************/ + +/** + * \brief DeviceSegReduce provides operations for computing a device-wide, parallel segmented reduction across a sequence of data items residing within global memory. + * \ingroup DeviceModule + * + * \par Overview + * A reduction (or fold) + * uses a binary combining operator to compute a single aggregate from a list of input elements. + * + * \par Usage Considerations + * \cdp_class{DeviceReduce} + * + */ +struct DeviceSegReduce +{ + /** + * \brief Computes a device-wide segmented reduction using the specified binary \p reduction_op functor. + * + * \par + * Does not support non-commutative reduction operators. + * + * \devicestorage + * + * \cdp + * + * \iterator + * + * \tparam ValueIterator [inferred] Random-access input iterator type for reading values + * \tparam SegmentOffsetIterator [inferred] Random-access input iterator type for reading segment end-offsets + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing segment reductions + * \tparam Value [inferred] Value type + * \tparam ReductionOp [inferred] Binary reduction operator type having member T operator()(const T &a, const T &b) + */ + template < + typename ValueIterator, + typename SegmentOffsetIterator, + typename OutputIteratorT, + typename Value, + typename ReductionOp> + __host__ __device__ __forceinline__ + static cudaError_t Reduce( + void* d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation. + ValueIterator d_values, ///< [in] A sequence of \p num_values data to reduce + SegmentOffsetIterator d_segment_offsets, ///< [in] A sequence of (\p num_segments + 1) segment offsets + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + int num_values, ///< [in] Total number of values to reduce + int num_segments, ///< [in] Number of segments being reduced + Value identity, ///< [in] Identity value (for zero-length segments) + ReductionOp reduction_op, ///< [in] Reduction operator + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + typedef DeviceSegReduceDispatch< + ValueIterator, + SegmentOffsetIterator, + OutputIteratorT, + ReductionOp, + OffsetT> + DeviceSegReduceDispatch; + + return DeviceSegReduceDispatch::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_values, + d_segment_offsets, + d_output, + num_values, + num_segments, + identity, + reduction_op, + stream, + debug_synchronous); + } + + + /** + * \brief Computes a device-wide segmented sum using the addition ('+') operator. + * + * \par + * Does not support non-commutative summation. + * + * \devicestorage + * + * \cdp + * + * \iterator + * + * \tparam ValueIterator [inferred] Random-access input iterator type for reading values + * \tparam SegmentOffsetIterator [inferred] Random-access input iterator type for reading segment end-offsets + * \tparam OutputIteratorT [inferred] Random-access output iterator type for writing segment reductions + */ + template < + typename ValueIterator, + typename SegmentOffsetIterator, + typename OutputIteratorT> + __host__ __device__ __forceinline__ + static cudaError_t Sum( + void* d_temp_storage, ///< [in] %Device allocation of temporary storage. When NULL, the required allocation size is returned in \p temp_storage_bytes and no work is done. + size_t &temp_storage_bytes, ///< [in,out] Reference to size in bytes of \p d_temp_storage allocation. + ValueIterator d_values, ///< [in] A sequence of \p num_values data to reduce + SegmentOffsetIterator d_segment_offsets, ///< [in] A sequence of (\p num_segments + 1) segment offsets + OutputIteratorT d_output, ///< [out] A sequence of \p num_segments segment totals + int num_values, ///< [in] Total number of values to reduce + int num_segments, ///< [in] Number of segments being reduced + cudaStream_t stream = 0, ///< [in] [optional] CUDA stream to launch kernels within. Default is stream0. + bool debug_synchronous = false) ///< [in] [optional] Whether or not to synchronize the stream after every kernel launch to check for errors. Also causes launch configurations to be printed to the console. Default is \p false. + { + // Signed integer type for global offsets + typedef int OffsetT; + + // Value type + typedef typename std::iterator_traits::value_type Value; + + Value identity = Value(); + cub::Sum reduction_op; + + typedef DeviceSegReduceDispatch< + ValueIterator, + SegmentOffsetIterator, + OutputIteratorT, + cub::Sum, + OffsetT> + DeviceSegReduceDispatch; + + return DeviceSegReduceDispatch::Dispatch( + d_temp_storage, + temp_storage_bytes, + d_values, + d_segment_offsets, + d_output, + num_values, + num_segments, + identity, + reduction_op, + stream, + debug_synchronous); + } +}; + + + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Initialize problem + */ +template +void Initialize( + GenMode gen_mode, + Value *h_values, + vector &segment_offsets, + int num_values, + int avg_segment_size) +{ + // Initialize values +// if (g_verbose) printf("Values: "); + for (int i = 0; i < num_values; ++i) + { + InitValue(gen_mode, h_values[i], i); +// if (g_verbose) std::cout << h_values[i] << ", "; + } +// if (g_verbose) printf("\n\n"); + + // Initialize segment lengths + const unsigned int MAX_INTEGER = -1u; + const unsigned int MAX_SEGMENT_LENGTH = avg_segment_size * 2; + const double SCALE_FACTOR = double(MAX_SEGMENT_LENGTH) / double(MAX_INTEGER); + + segment_offsets.push_back(0); + + OffsetT consumed = 0; + OffsetT remaining = num_values; + while (remaining > 0) + { + // Randomly sample a 32-bit unsigned int + unsigned int segment_length; + RandomBits(segment_length); + + // Scale to maximum segment length + segment_length = (unsigned int) (double(segment_length) * SCALE_FACTOR); + segment_length = CUB_MIN(segment_length, remaining); + + consumed += segment_length; + remaining -= segment_length; + + segment_offsets.push_back(consumed); + } +} + + +/** + * Compute reference answer + */ +template +void ComputeReference( + Value *h_values, + OffsetT *h_segment_offsets, + Value *h_reference, + int num_segments, + Value identity) +{ + if (g_verbose) printf("%d segment reductions: ", num_segments); + for (int segment = 0; segment < num_segments; ++segment) + { + h_reference[segment] = identity; + + for (int i = h_segment_offsets[segment]; i < h_segment_offsets[segment + 1]; ++i) + { + h_reference[segment] += h_values[i]; + } + if (g_verbose) std::cout << h_reference[segment] << ", "; + } + if (g_verbose) printf("\n\n"); +} + + +/** + * Simple test of device + */ +template < + bool CDP, + typename OffsetT, + typename Value, + typename ReductionOp> +void Test( + OffsetT num_values, + int avg_segment_size, + ReductionOp reduction_op, + Value identity, + char* type_string) +{ + Value *h_values = NULL; + Value *h_reference = NULL; + OffsetT *h_segment_offsets = NULL; + + printf("%d\n", num_values); + + // Initialize problem on host + h_values = new Value[num_values]; + vector segment_offsets; + Initialize(UNIFORM, h_values, segment_offsets, num_values, avg_segment_size); + + // Allocate simple offsets array and copy STL vector into it + h_segment_offsets = new OffsetT[segment_offsets.size()]; + for (int i = 0; i < segment_offsets.size(); ++i) + h_segment_offsets[i] = segment_offsets[i]; + + OffsetT num_segments = segment_offsets.size() - 1; + if (g_verbose) + { + printf("%d segment offsets: ", num_segments); + for (int i = 0; i < num_segments; ++i) + std::cout << h_segment_offsets[i] << "(" << h_segment_offsets[i + 1] - h_segment_offsets[i] << "), "; + if (g_verbose) std::cout << std::endl << std::endl; + } + + // Solve problem on host + h_reference = new Value[num_segments]; + ComputeReference(h_values, h_segment_offsets, h_reference, num_segments, identity); + + printf("\n\n%s cub::DeviceSegReduce::%s %d items (%d-byte %s), %d segments (%d-byte offset indices)\n", + (CDP) ? "CDP device invoked" : "Host-invoked", + (Equals::VALUE) ? "Sum" : "Reduce", + num_values, (int) sizeof(Value), type_string, + num_segments, (int) sizeof(OffsetT)); + fflush(stdout); + + // Allocate and initialize problem on device + Value *d_values = NULL; + OffsetT *d_segment_offsets = NULL; + Value *d_output = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values, sizeof(Value) * num_values)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_segment_offsets, sizeof(OffsetT) * (num_segments + 1))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_output, sizeof(Value) * num_segments)); + CubDebugExit(cudaMemcpy(d_values, h_values, sizeof(Value) * num_values, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_segment_offsets, h_segment_offsets, sizeof(OffsetT) * (num_segments + 1), cudaMemcpyHostToDevice)); + + // Request and allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceSegReduce::Sum(d_temp_storage, temp_storage_bytes, d_values, d_segment_offsets, d_output, num_values, num_segments, 0, false)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Clear device output + CubDebugExit(cudaMemset(d_output, 0, sizeof(Value) * num_segments)); + + // Run warmup/correctness iteration + CubDebugExit(DeviceSegReduce::Sum(d_temp_storage, temp_storage_bytes, d_values, d_segment_offsets, d_output, num_values, num_segments, 0, true)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_output, num_segments, true, g_verbose); + printf("\t%s", compare ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + for (int i = 0; i < g_timing_iterations; ++i) + { + CubDebugExit(DeviceSegReduce::Sum(d_temp_storage, temp_storage_bytes, d_values, d_segment_offsets, d_output, num_values, num_segments, 0, false)); + } + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_values) / avg_millis / 1000.0 / 1000.0; + float giga_bandwidth = giga_rate * + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s", avg_millis, giga_rate, giga_bandwidth); + } + + // Device cleanup + if (d_values) CubDebugExit(g_allocator.DeviceFree(d_values)); + if (d_segment_offsets) CubDebugExit(g_allocator.DeviceFree(d_segment_offsets)); + if (d_output) CubDebugExit(g_allocator.DeviceFree(d_output)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Host cleanup + if (h_values) delete[] h_values; + if (h_segment_offsets) delete[] h_segment_offsets; + if (h_reference) delete[] h_reference; +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_values = 32 * 1024 * 1024; + int avg_segment_size = 500; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_values); + args.GetCmdLineArgument("ss", avg_segment_size); + args.GetCmdLineArgument("i", g_timing_iterations); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--v] " + "[--i=] " + "[--n=]\n" + "[--ss=]\n" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + Test((int) num_values, avg_segment_size, Sum(), (long long) 0, CUB_TYPE_STRING(long long)); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_cub.h b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_cub.h new file mode 100644 index 0000000..07c2e4a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_cub.h @@ -0,0 +1,109 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +using namespace cub; + +template < + int NUM_CHANNELS, + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +double run_cub_histogram( + PixelType *d_image, + int width, + int height, + unsigned int *d_hist, + bool is_warmup) +{ + enum { + is_float = Equals::VALUE, + }; + + typedef typename If::Type SampleT; // Sample type + typedef typename If::Type LevelT; // Level type (uint32 for uchar) + + // Setup data structures + unsigned int* d_histogram[ACTIVE_CHANNELS]; + int num_levels[ACTIVE_CHANNELS]; ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[ACTIVE_CHANNELS]; ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[ACTIVE_CHANNELS]; ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + { + d_histogram[CHANNEL] = d_hist + (CHANNEL * NUM_BINS); + num_levels[CHANNEL] = NUM_BINS + 1; + lower_level[CHANNEL] = 0; + upper_level[CHANNEL] = (is_float) ? 1 : 256; + } + + // Allocate temporary storage + size_t temp_storage_bytes = 0; + void *d_temp_storage = NULL; + + SampleT* d_image_samples = (SampleT*) d_image; + + // Get amount of temporary storage needed + DeviceHistogram::MultiHistogramEven( + d_temp_storage, + temp_storage_bytes, + d_image_samples, + d_histogram, + num_levels, + lower_level, + upper_level, + width * height, + (cudaStream_t) 0, + is_warmup); + + cudaMalloc(&d_temp_storage, temp_storage_bytes); + + GpuTimer gpu_timer; + gpu_timer.Start(); + + // Compute histogram + DeviceHistogram::MultiHistogramEven( + d_temp_storage, + temp_storage_bytes, + d_image_samples, + d_histogram, + num_levels, + lower_level, + upper_level, + width * height, + (cudaStream_t) 0, + is_warmup); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + cudaFree(d_temp_storage); + + return elapsed_millis; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_gmem_atomics.h b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_gmem_atomics.h new file mode 100644 index 0000000..3308a28 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_gmem_atomics.h @@ -0,0 +1,185 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +namespace histogram_gmem_atomics +{ + // Decode float4 pixel into bins + template + __device__ __forceinline__ void DecodePixel(float4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + float* samples = reinterpret_cast(&pixel); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL] * float(NUM_BINS)); + } + + // Decode uchar4 pixel into bins + template + __device__ __forceinline__ void DecodePixel(uchar4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + unsigned char* samples = reinterpret_cast(&pixel); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL]); + } + + // Decode uchar1 pixel into bins + template + __device__ __forceinline__ void DecodePixel(uchar1 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + bins[0] = (unsigned int) pixel.x; + } + + // First-pass histogram kernel (binning into privatized counters) + template < + int NUM_PARTS, + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> + __global__ void histogram_gmem_atomics( + const PixelType *in, + int width, + int height, + unsigned int *out) + { + // global position and size + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int nx = blockDim.x * gridDim.x; + int ny = blockDim.y * gridDim.y; + + // threads in workgroup + int t = threadIdx.x + threadIdx.y * blockDim.x; // thread index in workgroup, linear in 0..nt-1 + int nt = blockDim.x * blockDim.y; // total threads in workgroup + + // group index in 0..ngroups-1 + int g = blockIdx.x + blockIdx.y * gridDim.x; + + // initialize smem + unsigned int *gmem = out + g * NUM_PARTS; + for (int i = t; i < ACTIVE_CHANNELS * NUM_BINS; i += nt) + gmem[i] = 0; + __syncthreads(); + + // process pixels (updates our group's partial histogram in gmem) + for (int col = x; col < width; col += nx) + { + for (int row = y; row < height; row += ny) + { + PixelType pixel = in[row * width + col]; + + unsigned int bins[ACTIVE_CHANNELS]; + DecodePixel(pixel, bins); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + atomicAdd(&gmem[(NUM_BINS * CHANNEL) + bins[CHANNEL]], 1); + } + } + } + + // Second pass histogram kernel (accumulation) + template < + int NUM_PARTS, + int ACTIVE_CHANNELS, + int NUM_BINS> + __global__ void histogram_gmem_accum( + const unsigned int *in, + int n, + unsigned int *out) + { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i > ACTIVE_CHANNELS * NUM_BINS) + return; // out of range + + unsigned int total = 0; + for (int j = 0; j < n; j++) + total += in[i + NUM_PARTS * j]; + + out[i] = total; + } + + +} // namespace histogram_gmem_atomics + + +template < + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +double run_gmem_atomics( + PixelType *d_image, + int width, + int height, + unsigned int *d_hist, + bool warmup) +{ + enum + { + NUM_PARTS = 1024 + }; + + cudaDeviceProp props; + cudaGetDeviceProperties(&props, 0); + + dim3 block(32, 4); + dim3 grid(16, 16); + int total_blocks = grid.x * grid.y; + + // allocate partial histogram + unsigned int *d_part_hist; + cudaMalloc(&d_part_hist, total_blocks * NUM_PARTS * sizeof(unsigned int)); + + dim3 block2(128); + dim3 grid2((3 * NUM_BINS + block.x - 1) / block.x); + + GpuTimer gpu_timer; + gpu_timer.Start(); + + histogram_gmem_atomics::histogram_gmem_atomics<<>>( + d_image, + width, + height, + d_part_hist); + + histogram_gmem_atomics::histogram_gmem_accum<<>>( + d_part_hist, + total_blocks, + d_hist); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + cudaFree(d_part_hist); + + return elapsed_millis; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_smem_atomics.h b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_smem_atomics.h new file mode 100644 index 0000000..2c70702 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram/histogram_smem_atomics.h @@ -0,0 +1,195 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include + +namespace histogram_smem_atomics +{ + // Decode float4 pixel into bins + template + __device__ __forceinline__ void DecodePixel(float4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + float* samples = reinterpret_cast(&pixel); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL] * float(NUM_BINS)); + } + + // Decode uchar4 pixel into bins + template + __device__ __forceinline__ void DecodePixel(uchar4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + unsigned char* samples = reinterpret_cast(&pixel); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL]); + } + + // Decode uchar1 pixel into bins + template + __device__ __forceinline__ void DecodePixel(uchar1 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) + { + bins[0] = (unsigned int) pixel.x; + } + + // First-pass histogram kernel (binning into privatized counters) + template < + int NUM_PARTS, + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> + __global__ void histogram_smem_atomics( + const PixelType *in, + int width, + int height, + unsigned int *out) + { + // global position and size + int x = blockIdx.x * blockDim.x + threadIdx.x; + int y = blockIdx.y * blockDim.y + threadIdx.y; + int nx = blockDim.x * gridDim.x; + int ny = blockDim.y * gridDim.y; + + // threads in workgroup + int t = threadIdx.x + threadIdx.y * blockDim.x; // thread index in workgroup, linear in 0..nt-1 + int nt = blockDim.x * blockDim.y; // total threads in workgroup + + // group index in 0..ngroups-1 + int g = blockIdx.x + blockIdx.y * gridDim.x; + + // initialize smem + __shared__ unsigned int smem[ACTIVE_CHANNELS * NUM_BINS + 3]; + for (int i = t; i < ACTIVE_CHANNELS * NUM_BINS + 3; i += nt) + smem[i] = 0; + __syncthreads(); + + // process pixels + // updates our group's partial histogram in smem + for (int col = x; col < width; col += nx) + { + for (int row = y; row < height; row += ny) + { + PixelType pixel = in[row * width + col]; + + unsigned int bins[ACTIVE_CHANNELS]; + DecodePixel(pixel, bins); + + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + atomicAdd(&smem[(NUM_BINS * CHANNEL) + bins[CHANNEL] + CHANNEL], 1); + } + } + + __syncthreads(); + + // move to our workgroup's slice of output + out += g * NUM_PARTS; + + // store local output to global + for (int i = t; i < NUM_BINS; i += nt) + { + #pragma unroll + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + out[i + NUM_BINS * CHANNEL] = smem[i + NUM_BINS * CHANNEL + CHANNEL]; + } + } + + // Second pass histogram kernel (accumulation) + template < + int NUM_PARTS, + int ACTIVE_CHANNELS, + int NUM_BINS> + __global__ void histogram_smem_accum( + const unsigned int *in, + int n, + unsigned int *out) + { + int i = blockIdx.x * blockDim.x + threadIdx.x; + if (i > ACTIVE_CHANNELS * NUM_BINS) return; // out of range + unsigned int total = 0; + for (int j = 0; j < n; j++) + total += in[i + NUM_PARTS * j]; + out[i] = total; + } + +} // namespace histogram_smem_atomics + + +template < + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +double run_smem_atomics( + PixelType *d_image, + int width, + int height, + unsigned int *d_hist, + bool warmup) +{ + enum + { + NUM_PARTS = 1024 + }; + + cudaDeviceProp props; + cudaGetDeviceProperties(&props, 0); + + dim3 block(32, 4); + dim3 grid(16, 16); + int total_blocks = grid.x * grid.y; + + // allocate partial histogram + unsigned int *d_part_hist; + cudaMalloc(&d_part_hist, total_blocks * NUM_PARTS * sizeof(unsigned int)); + + dim3 block2(128); + dim3 grid2((ACTIVE_CHANNELS * NUM_BINS + block.x - 1) / block.x); + + GpuTimer gpu_timer; + gpu_timer.Start(); + + histogram_smem_atomics::histogram_smem_atomics<<>>( + d_image, + width, + height, + d_part_hist); + + histogram_smem_atomics::histogram_smem_accum<<>>( + d_part_hist, + total_blocks, + d_hist); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + cudaFree(d_part_hist); + + return elapsed_millis; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram_compare.cu b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram_compare.cu new file mode 100644 index 0000000..7ab66a1 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/histogram_compare.cu @@ -0,0 +1,635 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#include +#include +#include +#include +#include +#include + +#include "histogram/histogram_gmem_atomics.h" +#include "histogram/histogram_smem_atomics.h" +#include "histogram/histogram_cub.h" + +#include +#include + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants, and type declarations +//--------------------------------------------------------------------- + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +bool g_verbose = false; // Whether to display input/output to console +bool g_report = false; // Whether to display a full report in CSV format +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + +struct less_than_value +{ + inline bool operator()( + const std::pair &a, + const std::pair &b) + { + return a.second < b.second; + } +}; + + +//--------------------------------------------------------------------- +// Targa (.tga) image file parsing +//--------------------------------------------------------------------- + +/** + * TGA image header info + */ +struct TgaHeader +{ + char idlength; + char colormaptype; + char datatypecode; + short colormaporigin; + short colormaplength; + char colormapdepth; + short x_origin; + short y_origin; + short width; + short height; + char bitsperpixel; + char imagedescriptor; + + void Parse (FILE *fptr) + { + idlength = fgetc(fptr); + colormaptype = fgetc(fptr); + datatypecode = fgetc(fptr); + fread(&colormaporigin, 2, 1, fptr); + fread(&colormaplength, 2, 1, fptr); + colormapdepth = fgetc(fptr); + fread(&x_origin, 2, 1, fptr); + fread(&y_origin, 2, 1, fptr); + fread(&width, 2, 1, fptr); + fread(&height, 2, 1, fptr); + bitsperpixel = fgetc(fptr); + imagedescriptor = fgetc(fptr); + } + + void Display (FILE *fptr) + { + fprintf(fptr, "ID length: %d\n", idlength); + fprintf(fptr, "Color map type: %d\n", colormaptype); + fprintf(fptr, "Image type: %d\n", datatypecode); + fprintf(fptr, "Color map offset: %d\n", colormaporigin); + fprintf(fptr, "Color map length: %d\n", colormaplength); + fprintf(fptr, "Color map depth: %d\n", colormapdepth); + fprintf(fptr, "X origin: %d\n", x_origin); + fprintf(fptr, "Y origin: %d\n", y_origin); + fprintf(fptr, "Width: %d\n", width); + fprintf(fptr, "Height: %d\n", height); + fprintf(fptr, "Bits per pixel: %d\n", bitsperpixel); + fprintf(fptr, "Descriptor: %d\n", imagedescriptor); + } +}; + + +/** + * Decode image byte data into pixel + */ +void ParseTgaPixel(uchar4 &pixel, unsigned char *tga_pixel, int bytes) +{ + if (bytes == 4) + { + pixel.x = tga_pixel[2]; + pixel.y = tga_pixel[1]; + pixel.z = tga_pixel[0]; + pixel.w = tga_pixel[3]; + } + else if (bytes == 3) + { + pixel.x = tga_pixel[2]; + pixel.y = tga_pixel[1]; + pixel.z = tga_pixel[0]; + pixel.w = 0; + } + else if (bytes == 2) + { + pixel.x = (tga_pixel[1] & 0x7c) << 1; + pixel.y = ((tga_pixel[1] & 0x03) << 6) | ((tga_pixel[0] & 0xe0) >> 2); + pixel.z = (tga_pixel[0] & 0x1f) << 3; + pixel.w = (tga_pixel[1] & 0x80); + } +} + + +/** + * Reads a .tga image file + */ +void ReadTga(uchar4* &pixels, int &width, int &height, const char *filename) +{ + // Open the file + FILE *fptr; + if ((fptr = fopen(filename, "rb")) == NULL) + { + fprintf(stderr, "File open failed\n"); + exit(-1); + } + + // Parse header + TgaHeader header; + header.Parse(fptr); +// header.Display(stdout); + width = header.width; + height = header.height; + + // Verify compatibility + if (header.datatypecode != 2 && header.datatypecode != 10) + { + fprintf(stderr, "Can only handle image type 2 and 10\n"); + exit(-1); + } + if (header.bitsperpixel != 16 && header.bitsperpixel != 24 && header.bitsperpixel != 32) + { + fprintf(stderr, "Can only handle pixel depths of 16, 24, and 32\n"); + exit(-1); + } + if (header.colormaptype != 0 && header.colormaptype != 1) + { + fprintf(stderr, "Can only handle color map types of 0 and 1\n"); + exit(-1); + } + + // Skip unnecessary header info + int skip_bytes = header.idlength + (header.colormaptype * header.colormaplength); + fseek(fptr, skip_bytes, SEEK_CUR); + + // Read the image + int pixel_bytes = header.bitsperpixel / 8; + + // Allocate and initialize pixel data + size_t image_bytes = width * height * sizeof(uchar4); + if ((pixels == NULL) && ((pixels = (uchar4*) malloc(image_bytes)) == NULL)) + { + fprintf(stderr, "malloc of image failed\n"); + exit(-1); + } + memset(pixels, 0, image_bytes); + + // Parse pixels + unsigned char tga_pixel[5]; + int current_pixel = 0; + while (current_pixel < header.width * header.height) + { + if (header.datatypecode == 2) + { + // Uncompressed + if (fread(tga_pixel, 1, pixel_bytes, fptr) != pixel_bytes) + { + fprintf(stderr, "Unexpected end of file at pixel %d (uncompressed)\n", current_pixel); + exit(-1); + } + ParseTgaPixel(pixels[current_pixel], tga_pixel, pixel_bytes); + current_pixel++; + } + else if (header.datatypecode == 10) + { + // Compressed + if (fread(tga_pixel, 1, pixel_bytes + 1, fptr) != pixel_bytes + 1) + { + fprintf(stderr, "Unexpected end of file at pixel %d (compressed)\n", current_pixel); + exit(-1); + } + int run_length = tga_pixel[0] & 0x7f; + ParseTgaPixel(pixels[current_pixel], &(tga_pixel[1]), pixel_bytes); + current_pixel++; + + if (tga_pixel[0] & 0x80) + { + // RLE chunk + for (int i = 0; i < run_length; i++) + { + ParseTgaPixel(pixels[current_pixel], &(tga_pixel[1]), pixel_bytes); + current_pixel++; + } + } + else + { + // Normal chunk + for (int i = 0; i < run_length; i++) + { + if (fread(tga_pixel, 1, pixel_bytes, fptr) != pixel_bytes) + { + fprintf(stderr, "Unexpected end of file at pixel %d (normal)\n", current_pixel); + exit(-1); + } + ParseTgaPixel(pixels[current_pixel], tga_pixel, pixel_bytes); + current_pixel++; + } + } + } + } + + // Close file + fclose(fptr); +} + + + +//--------------------------------------------------------------------- +// Random image generation +//--------------------------------------------------------------------- + +/** + * Generate a random image with specified entropy + */ +void GenerateRandomImage(uchar4* &pixels, int width, int height, int entropy_reduction) +{ + int num_pixels = width * height; + size_t image_bytes = num_pixels * sizeof(uchar4); + if ((pixels == NULL) && ((pixels = (uchar4*) malloc(image_bytes)) == NULL)) + { + fprintf(stderr, "malloc of image failed\n"); + exit(-1); + } + + for (int i = 0; i < num_pixels; ++i) + { + RandomBits(pixels[i].x, entropy_reduction); + RandomBits(pixels[i].y, entropy_reduction); + RandomBits(pixels[i].z, entropy_reduction); + RandomBits(pixels[i].w, entropy_reduction); + } +} + + + +//--------------------------------------------------------------------- +// Histogram verification +//--------------------------------------------------------------------- + +// Decode float4 pixel into bins +template +void DecodePixelGold(float4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) +{ + float* samples = reinterpret_cast(&pixel); + + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL] * float(NUM_BINS)); +} + +// Decode uchar4 pixel into bins +template +void DecodePixelGold(uchar4 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) +{ + unsigned char* samples = reinterpret_cast(&pixel); + + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + bins[CHANNEL] = (unsigned int) (samples[CHANNEL]); +} + +// Decode uchar1 pixel into bins +template +void DecodePixelGold(uchar1 pixel, unsigned int (&bins)[ACTIVE_CHANNELS]) +{ + bins[0] = (unsigned int) pixel.x; +} + + +// Compute reference histogram. Specialized for uchar4 +template < + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +void HistogramGold(PixelType *image, int width, int height, unsigned int* hist) +{ + memset(hist, 0, ACTIVE_CHANNELS * NUM_BINS * sizeof(unsigned int)); + + for (int i = 0; i < width; i++) + { + for (int j = 0; j < height; j++) + { + PixelType pixel = image[i + j * width]; + + unsigned int bins[ACTIVE_CHANNELS]; + DecodePixelGold(pixel, bins); + + for (int CHANNEL = 0; CHANNEL < ACTIVE_CHANNELS; ++CHANNEL) + { + hist[(NUM_BINS * CHANNEL) + bins[CHANNEL]]++; + } + } + } +} + + +//--------------------------------------------------------------------- +// Test execution +//--------------------------------------------------------------------- + +/** + * Run a specific histogram implementation + */ +template < + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +void RunTest( + std::vector >& timings, + PixelType* d_pixels, + const int width, + const int height, + unsigned int * d_hist, + unsigned int * h_hist, + int timing_iterations, + const char * long_name, + const char * short_name, + double (*f)(PixelType*, int, int, unsigned int*, bool)) +{ + if (!g_report) printf("%s ", long_name); fflush(stdout); + + // Run single test to verify (and code cache) + (*f)(d_pixels, width, height, d_hist, !g_report); + + int compare = CompareDeviceResults(h_hist, d_hist, ACTIVE_CHANNELS * NUM_BINS, true, g_verbose); + if (!g_report) printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + + double elapsed_ms = 0; + for (int i = 0; i < timing_iterations; i++) + { + elapsed_ms += (*f)(d_pixels, width, height, d_hist, false); + } + double avg_us = (elapsed_ms / timing_iterations) * 1000; // average in us + timings.push_back(std::pair(short_name, avg_us)); + + if (!g_report) + { + printf("Avg time %.3f us (%d iterations)\n", avg_us, timing_iterations); fflush(stdout); + } + else + { + printf("%.3f, ", avg_us); fflush(stdout); + } + + AssertEquals(0, compare); +} + + +/** + * Evaluate corpus of histogram implementations + */ +template < + int NUM_CHANNELS, + int ACTIVE_CHANNELS, + int NUM_BINS, + typename PixelType> +void TestMethods( + PixelType* h_pixels, + int height, + int width, + int timing_iterations, + double bandwidth_GBs) +{ + // Copy data to gpu + PixelType* d_pixels; + size_t pixel_bytes = width * height * sizeof(PixelType); + CubDebugExit(g_allocator.DeviceAllocate((void**) &d_pixels, pixel_bytes)); + CubDebugExit(cudaMemcpy(d_pixels, h_pixels, pixel_bytes, cudaMemcpyHostToDevice)); + + if (g_report) printf("%.3f, ", double(pixel_bytes) / bandwidth_GBs / 1000); + + // Allocate results arrays on cpu/gpu + unsigned int *h_hist; + unsigned int *d_hist; + size_t histogram_bytes = NUM_BINS * ACTIVE_CHANNELS * sizeof(unsigned int); + h_hist = (unsigned int *) malloc(histogram_bytes); + g_allocator.DeviceAllocate((void **) &d_hist, histogram_bytes); + + // Compute reference cpu histogram + HistogramGold(h_pixels, width, height, h_hist); + + // Store timings + std::vector > timings; + + // Run experiments + RunTest(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations, + "CUB", "CUB", run_cub_histogram); + RunTest(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations, + "Shared memory atomics", "smem atomics", run_smem_atomics); + RunTest(timings, d_pixels, width, height, d_hist, h_hist, timing_iterations, + "Global memory atomics", "gmem atomics", run_gmem_atomics); + + // Report timings + if (!g_report) + { + std::sort(timings.begin(), timings.end(), less_than_value()); + printf("Timings (us):\n"); + for (int i = 0; i < timings.size(); i++) + { + double bandwidth = height * width * sizeof(PixelType) / timings[i].second / 1000; + printf("\t %.3f %s (%.3f GB/s, %.3f%% peak)\n", timings[i].second, timings[i].first.c_str(), bandwidth, bandwidth / bandwidth_GBs * 100); + } + printf("\n"); + } + + // Free data + CubDebugExit(g_allocator.DeviceFree(d_pixels)); + CubDebugExit(g_allocator.DeviceFree(d_hist)); + free(h_hist); +} + + +/** + * Test different problem genres + */ +void TestGenres( + uchar4* uchar4_pixels, + int height, + int width, + int timing_iterations, + double bandwidth_GBs) +{ + int num_pixels = width * height; + + { + if (!g_report) printf("1 channel uchar1 tests (256-bin):\n\n"); fflush(stdout); + + size_t image_bytes = num_pixels * sizeof(uchar1); + uchar1* uchar1_pixels = (uchar1*) malloc(image_bytes); + + // Convert to 1-channel (averaging first 3 channels) + for (int i = 0; i < num_pixels; ++i) + { + uchar1_pixels[i].x = (unsigned char) + (((unsigned int) uchar4_pixels[i].x + + (unsigned int) uchar4_pixels[i].y + + (unsigned int) uchar4_pixels[i].z) / 3); + } + + TestMethods<1, 1, 256>(uchar1_pixels, width, height, timing_iterations, bandwidth_GBs); + free(uchar1_pixels); + if (g_report) printf(", "); + } + + { + if (!g_report) printf("3/4 channel uchar4 tests (256-bin):\n\n"); fflush(stdout); + TestMethods<4, 3, 256>(uchar4_pixels, width, height, timing_iterations, bandwidth_GBs); + if (g_report) printf(", "); + } + + { + if (!g_report) printf("3/4 channel float4 tests (256-bin):\n\n"); fflush(stdout); + size_t image_bytes = num_pixels * sizeof(float4); + float4* float4_pixels = (float4*) malloc(image_bytes); + + // Convert to float4 with range [0.0, 1.0) + for (int i = 0; i < num_pixels; ++i) + { + float4_pixels[i].x = float(uchar4_pixels[i].x) / 256; + float4_pixels[i].y = float(uchar4_pixels[i].y) / 256; + float4_pixels[i].z = float(uchar4_pixels[i].z) / 256; + float4_pixels[i].w = float(uchar4_pixels[i].w) / 256; + } + TestMethods<4, 3, 256>(float4_pixels, width, height, timing_iterations, bandwidth_GBs); + free(float4_pixels); + if (g_report) printf("\n"); + } +} + + +/** + * Main + */ +int main(int argc, char **argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + if (args.CheckCmdLineFlag("help")) + { + printf( + "%s " + "[--device=] " + "[--v] " + "[--i=] " + "\n\t" + "--file=<.tga filename> " + "\n\t" + "--entropy=<-1 (0%), 0 (100%), 1 (81%), 2 (54%), 3 (34%), 4 (20%), ..." + "[--height=] " + "[--width=] " + "\n", argv[0]); + exit(0); + } + + std::string filename; + int timing_iterations = 100; + int entropy_reduction = 0; + int height = 1080; + int width = 1920; + + g_verbose = args.CheckCmdLineFlag("v"); + g_report = args.CheckCmdLineFlag("report"); + args.GetCmdLineArgument("i", timing_iterations); + args.GetCmdLineArgument("file", filename); + args.GetCmdLineArgument("height", height); + args.GetCmdLineArgument("width", width); + args.GetCmdLineArgument("entropy", entropy_reduction); + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get GPU device bandwidth (GB/s) + int device_ordinal, bus_width, mem_clock_khz; + CubDebugExit(cudaGetDevice(&device_ordinal)); + CubDebugExit(cudaDeviceGetAttribute(&bus_width, cudaDevAttrGlobalMemoryBusWidth, device_ordinal)); + CubDebugExit(cudaDeviceGetAttribute(&mem_clock_khz, cudaDevAttrMemoryClockRate, device_ordinal)); + double bandwidth_GBs = double(bus_width) * mem_clock_khz * 2 / 8 / 1000 / 1000; + + // Run test(s) + uchar4* uchar4_pixels = NULL; + if (!g_report) + { + if (!filename.empty()) + { + // Parse targa file + ReadTga(uchar4_pixels, width, height, filename.c_str()); + printf("File %s: width(%d) height(%d)\n\n", filename.c_str(), width, height); fflush(stdout); + } + else + { + // Generate image + GenerateRandomImage(uchar4_pixels, width, height, entropy_reduction); + printf("Random image: entropy-reduction(%d) width(%d) height(%d)\n\n", entropy_reduction, width, height); fflush(stdout); + } + + TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs); + } + else + { + // Run test suite + printf("Test, MIN, RLE CUB, SMEM, GMEM, , MIN, RLE_CUB, SMEM, GMEM, , MIN, RLE_CUB, SMEM, GMEM\n"); + + // Entropy reduction tests + for (entropy_reduction = 0; entropy_reduction < 5; ++entropy_reduction) + { + printf("entropy reduction %d, ", entropy_reduction); + GenerateRandomImage(uchar4_pixels, width, height, entropy_reduction); + TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs); + } + printf("entropy reduction -1, "); + GenerateRandomImage(uchar4_pixels, width, height, -1); + TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs); + printf("\n"); + + // File image tests + std::vector file_tests; + file_tests.push_back("animals"); + file_tests.push_back("apples"); + file_tests.push_back("sunset"); + file_tests.push_back("cheetah"); + file_tests.push_back("nature"); + file_tests.push_back("operahouse"); + file_tests.push_back("austin"); + file_tests.push_back("cityscape"); + + for (int i = 0; i < file_tests.size(); ++i) + { + printf("%s, ", file_tests[i].c_str()); + std::string filename = std::string("histogram/benchmark/") + file_tests[i] + ".tga"; + ReadTga(uchar4_pixels, width, height, filename.c_str()); + TestGenres(uchar4_pixels, height, width, timing_iterations, bandwidth_GBs); + } + } + + free(uchar4_pixels); + + CubDebugExit(cudaDeviceSynchronize()); + printf("\n\n"); + + return 0; +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/sparse_matrix.h b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/sparse_matrix.h new file mode 100644 index 0000000..1fb5233 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/sparse_matrix.h @@ -0,0 +1,1244 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Matrix data structures and parsing logic + ******************************************************************************/ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CUB_MKL + #include + #include +#endif + +using namespace std; + +/****************************************************************************** + * COO matrix type + ******************************************************************************/ + +struct GraphStats +{ + int num_rows; + int num_cols; + int num_nonzeros; + + double diag_dist_mean; // mean + double diag_dist_std_dev; // sample std dev + double pearson_r; // coefficient of variation + + double row_length_mean; // mean + double row_length_std_dev; // sample std_dev + double row_length_variation; // coefficient of variation + double row_length_skewness; // skewness + + void Display(bool show_labels = true) + { + if (show_labels) + printf("\n" + "\t num_rows: %d\n" + "\t num_cols: %d\n" + "\t num_nonzeros: %d\n" + "\t diag_dist_mean: %.2f\n" + "\t diag_dist_std_dev: %.2f\n" + "\t pearson_r: %f\n" + "\t row_length_mean: %.5f\n" + "\t row_length_std_dev: %.5f\n" + "\t row_length_variation: %.5f\n" + "\t row_length_skewness: %.5f\n", + num_rows, + num_cols, + num_nonzeros, + diag_dist_mean, + diag_dist_std_dev, + pearson_r, + row_length_mean, + row_length_std_dev, + row_length_variation, + row_length_skewness); + else + printf( + "%d, " + "%d, " + "%d, " + "%.2f, " + "%.2f, " + "%f, " + "%.5f, " + "%.5f, " + "%.5f, " + "%.5f, ", + num_rows, + num_cols, + num_nonzeros, + diag_dist_mean, + diag_dist_std_dev, + pearson_r, + row_length_mean, + row_length_std_dev, + row_length_variation, + row_length_skewness); + } +}; + + + +/****************************************************************************** + * COO matrix type + ******************************************************************************/ + + +/** + * COO matrix type. A COO matrix is just a vector of edge tuples. Tuples are sorted + * first by row, then by column. + */ +template +struct CooMatrix +{ + //--------------------------------------------------------------------- + // Type definitions and constants + //--------------------------------------------------------------------- + + // COO edge tuple + struct CooTuple + { + OffsetT row; + OffsetT col; + ValueT val; + + CooTuple() {} + CooTuple(OffsetT row, OffsetT col) : row(row), col(col) {} + CooTuple(OffsetT row, OffsetT col, ValueT val) : row(row), col(col), val(val) {} + + /** + * Comparator for sorting COO sparse format num_nonzeros + */ + bool operator<(const CooTuple &other) const + { + if ((row < other.row) || ((row == other.row) && (col < other.col))) + { + return true; + } + + return false; + } + }; + + + //--------------------------------------------------------------------- + // Data members + //--------------------------------------------------------------------- + + // Fields + int num_rows; + int num_cols; + int num_nonzeros; + CooTuple* coo_tuples; + + //--------------------------------------------------------------------- + // Methods + //--------------------------------------------------------------------- + + // Constructor + CooMatrix() : num_rows(0), num_cols(0), num_nonzeros(0), coo_tuples(NULL) {} + + + /** + * Clear + */ + void Clear() + { + if (coo_tuples) delete[] coo_tuples; + coo_tuples = NULL; + } + + + // Destructor + ~CooMatrix() + { + Clear(); + } + + + // Display matrix to stdout + void Display() + { + cout << "COO Matrix (" << num_rows << " rows, " << num_cols << " columns, " << num_nonzeros << " non-zeros):\n"; + cout << "Ordinal, Row, Column, Value\n"; + for (int i = 0; i < num_nonzeros; i++) + { + cout << '\t' << i << ',' << coo_tuples[i].row << ',' << coo_tuples[i].col << ',' << coo_tuples[i].val << "\n"; + } + } + + + /** + * Builds a symmetric COO sparse from an asymmetric CSR matrix. + */ + template + void InitCsrSymmetric(CsrMatrixT &csr_matrix) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + num_rows = csr_matrix.num_cols; + num_cols = csr_matrix.num_rows; + num_nonzeros = csr_matrix.num_nonzeros * 2; + coo_tuples = new CooTuple[num_nonzeros]; + + for (OffsetT row = 0; row < csr_matrix.num_rows; ++row) + { + for (OffsetT nonzero = csr_matrix.row_offsets[row]; nonzero < csr_matrix.row_offsets[row + 1]; ++nonzero) + { + coo_tuples[nonzero].row = row; + coo_tuples[nonzero].col = csr_matrix.column_indices[nonzero]; + coo_tuples[nonzero].val = csr_matrix.values[nonzero]; + + coo_tuples[csr_matrix.num_nonzeros + nonzero].row = coo_tuples[nonzero].col; + coo_tuples[csr_matrix.num_nonzeros + nonzero].col = coo_tuples[nonzero].row; + coo_tuples[csr_matrix.num_nonzeros + nonzero].val = csr_matrix.values[nonzero]; + + } + } + + // Sort by rows, then columns + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + } + + /** + * Builds a COO sparse from a relabeled CSR matrix. + */ + template + void InitCsrRelabel(CsrMatrixT &csr_matrix, OffsetT* relabel_indices) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + num_rows = csr_matrix.num_rows; + num_cols = csr_matrix.num_cols; + num_nonzeros = csr_matrix.num_nonzeros; + coo_tuples = new CooTuple[num_nonzeros]; + + for (OffsetT row = 0; row < num_rows; ++row) + { + for (OffsetT nonzero = csr_matrix.row_offsets[row]; nonzero < csr_matrix.row_offsets[row + 1]; ++nonzero) + { + coo_tuples[nonzero].row = relabel_indices[row]; + coo_tuples[nonzero].col = relabel_indices[csr_matrix.column_indices[nonzero]]; + coo_tuples[nonzero].val = csr_matrix.values[nonzero]; + } + } + + // Sort by rows, then columns + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + } + + + + /** + * Builds a METIS COO sparse from the given file. + */ + void InitMetis(const string &metis_filename) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + // TODO + } + + + /** + * Builds a MARKET COO sparse from the given file. + */ + void InitMarket( + const string& market_filename, + ValueT default_value = 1.0, + bool verbose = false) + { + if (verbose) { + printf("Reading... "); fflush(stdout); + } + + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + std::ifstream ifs; + ifs.open(market_filename.c_str(), std::ifstream::in); + if (!ifs.good()) + { + fprintf(stderr, "Error opening file\n"); + exit(1); + } + + bool array = false; + bool symmetric = false; + bool skew = false; + int current_edge = -1; + char line[1024]; + + if (verbose) { + printf("Parsing... "); fflush(stdout); + } + + while (true) + { + ifs.getline(line, 1024); + if (!ifs.good()) + { + // Done + break; + } + + if (line[0] == '%') + { + // Comment + if (line[1] == '%') + { + // Banner + symmetric = (strstr(line, "symmetric") != NULL); + skew = (strstr(line, "skew") != NULL); + array = (strstr(line, "array") != NULL); + + if (verbose) { + printf("(symmetric: %d, skew: %d, array: %d) ", symmetric, skew, array); fflush(stdout); + } + } + } + else if (current_edge == -1) + { + // Problem description + int nparsed = sscanf(line, "%d %d %d", &num_rows, &num_cols, &num_nonzeros); + if ((!array) && (nparsed == 3)) + { + if (symmetric) + num_nonzeros *= 2; + + // Allocate coo matrix + coo_tuples = new CooTuple[num_nonzeros]; + current_edge = 0; + + } + else if (array && (nparsed == 2)) + { + // Allocate coo matrix + num_nonzeros = num_rows * num_cols; + coo_tuples = new CooTuple[num_nonzeros]; + current_edge = 0; + } + else + { + fprintf(stderr, "Error parsing MARKET matrix: invalid problem description: %s\n", line); + exit(1); + } + + } + else + { + // Edge + if (current_edge >= num_nonzeros) + { + fprintf(stderr, "Error parsing MARKET matrix: encountered more than %d num_nonzeros\n", num_nonzeros); + exit(1); + } + + int row, col; + double val; + + if (array) + { + if (sscanf(line, "%lf", &val) != 1) + { + fprintf(stderr, "Error parsing MARKET matrix: badly formed current_edge: '%s' at edge %d\n", line, current_edge); + exit(1); + } + col = (current_edge / num_rows); + row = (current_edge - (num_rows * col)); + + coo_tuples[current_edge] = CooTuple(row, col, val); // Convert indices to zero-based + } + else + { + // Parse nonzero (note: using strtol and strtod is 2x faster than sscanf or istream parsing) + char *l = line; + char *t = NULL; + + // parse row + row = strtol(l, &t, 0); + if (t == l) + { + fprintf(stderr, "Error parsing MARKET matrix: badly formed row at edge %d\n", current_edge); + exit(1); + } + l = t; + + // parse col + col = strtol(l, &t, 0); + if (t == l) + { + fprintf(stderr, "Error parsing MARKET matrix: badly formed col at edge %d\n", current_edge); + exit(1); + } + l = t; + + // parse val + val = strtod(l, &t); + if (t == l) + { + val = default_value; + } +/* + int nparsed = sscanf(line, "%d %d %lf", &row, &col, &val); + if (nparsed == 2) + { + // No value specified + val = default_value; + + } + else if (nparsed != 3) + { + fprintf(stderr, "Error parsing MARKET matrix 1: badly formed current_edge: %d parsed at edge %d\n", nparsed, current_edge); + exit(1); + } +*/ + + coo_tuples[current_edge] = CooTuple(row - 1, col - 1, val); // Convert indices to zero-based + + } + + current_edge++; + + if (symmetric && (row != col)) + { + coo_tuples[current_edge].row = coo_tuples[current_edge - 1].col; + coo_tuples[current_edge].col = coo_tuples[current_edge - 1].row; + coo_tuples[current_edge].val = coo_tuples[current_edge - 1].val * (skew ? -1 : 1); + current_edge++; + } + } + } + + // Adjust nonzero count (nonzeros along the diagonal aren't reversed) + num_nonzeros = current_edge; + + if (verbose) { + printf("done. Ordering..."); fflush(stdout); + } + + // Sort by rows, then columns + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + + if (verbose) { + printf("done. "); fflush(stdout); + } + + ifs.close(); + } + + + /** + * Builds a dense matrix + */ + int InitDense( + OffsetT num_rows, + OffsetT num_cols, + ValueT default_value = 1.0, + bool verbose = false) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + this->num_rows = num_rows; + this->num_cols = num_cols; + + num_nonzeros = num_rows * num_cols; + coo_tuples = new CooTuple[num_nonzeros]; + + for (OffsetT row = 0; row < num_rows; ++row) + { + for (OffsetT col = 0; col < num_cols; ++col) + { + coo_tuples[(row * num_cols) + col] = CooTuple(row, col, default_value); + } + } + + // Sort by rows, then columns + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + + return 0; + } + + /** + * Builds a wheel COO sparse matrix having spokes spokes. + */ + int InitWheel( + OffsetT spokes, + ValueT default_value = 1.0, + bool verbose = false) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + num_rows = spokes + 1; + num_cols = num_rows; + num_nonzeros = spokes * 2; + coo_tuples = new CooTuple[num_nonzeros]; + + // Add spoke num_nonzeros + int current_edge = 0; + for (OffsetT i = 0; i < spokes; i++) + { + coo_tuples[current_edge] = CooTuple(0, i + 1, default_value); + current_edge++; + } + + // Add rim + for (OffsetT i = 0; i < spokes; i++) + { + OffsetT dest = (i + 1) % spokes; + coo_tuples[current_edge] = CooTuple(i + 1, dest + 1, default_value); + current_edge++; + } + + // Sort by rows, then columns + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + + return 0; + } + + + /** + * Builds a square 2D grid CSR matrix. Interior num_vertices have degree 5 when including + * a self-loop. + * + * Returns 0 on success, 1 on failure. + */ + int InitGrid2d(OffsetT width, bool self_loop, ValueT default_value = 1.0) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + exit(1); + } + + int interior_nodes = (width - 2) * (width - 2); + int edge_nodes = (width - 2) * 4; + int corner_nodes = 4; + num_rows = width * width; + num_cols = num_rows; + num_nonzeros = (interior_nodes * 4) + (edge_nodes * 3) + (corner_nodes * 2); + + if (self_loop) + num_nonzeros += num_rows; + + coo_tuples = new CooTuple[num_nonzeros]; + int current_edge = 0; + + for (OffsetT j = 0; j < width; j++) + { + for (OffsetT k = 0; k < width; k++) + { + OffsetT me = (j * width) + k; + + // West + OffsetT neighbor = (j * width) + (k - 1); + if (k - 1 >= 0) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // East + neighbor = (j * width) + (k + 1); + if (k + 1 < width) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // North + neighbor = ((j - 1) * width) + k; + if (j - 1 >= 0) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // South + neighbor = ((j + 1) * width) + k; + if (j + 1 < width) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + if (self_loop) + { + neighbor = me; + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + } + } + + // Sort by rows, then columns, update dims + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + + return 0; + } + + + /** + * Builds a square 3D grid COO sparse matrix. Interior num_vertices have degree 7 when including + * a self-loop. Values are unintialized, coo_tuples are sorted. + */ + int InitGrid3d(OffsetT width, bool self_loop, ValueT default_value = 1.0) + { + if (coo_tuples) + { + fprintf(stderr, "Matrix already constructed\n"); + return -1; + } + + OffsetT interior_nodes = (width - 2) * (width - 2) * (width - 2); + OffsetT face_nodes = (width - 2) * (width - 2) * 6; + OffsetT edge_nodes = (width - 2) * 12; + OffsetT corner_nodes = 8; + num_cols = width * width * width; + num_rows = num_cols; + num_nonzeros = (interior_nodes * 6) + (face_nodes * 5) + (edge_nodes * 4) + (corner_nodes * 3); + + if (self_loop) + num_nonzeros += num_rows; + + coo_tuples = new CooTuple[num_nonzeros]; + int current_edge = 0; + + for (OffsetT i = 0; i < width; i++) + { + for (OffsetT j = 0; j < width; j++) + { + for (OffsetT k = 0; k < width; k++) + { + + OffsetT me = (i * width * width) + (j * width) + k; + + // Up + OffsetT neighbor = (i * width * width) + (j * width) + (k - 1); + if (k - 1 >= 0) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // Down + neighbor = (i * width * width) + (j * width) + (k + 1); + if (k + 1 < width) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // West + neighbor = (i * width * width) + ((j - 1) * width) + k; + if (j - 1 >= 0) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // East + neighbor = (i * width * width) + ((j + 1) * width) + k; + if (j + 1 < width) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // North + neighbor = ((i - 1) * width * width) + (j * width) + k; + if (i - 1 >= 0) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + // South + neighbor = ((i + 1) * width * width) + (j * width) + k; + if (i + 1 < width) { + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + + if (self_loop) + { + neighbor = me; + coo_tuples[current_edge] = CooTuple(me, neighbor, default_value); + current_edge++; + } + } + } + } + + // Sort by rows, then columns, update dims + std::stable_sort(coo_tuples, coo_tuples + num_nonzeros); + + return 0; + } +}; + + + +/****************************************************************************** + * COO matrix type + ******************************************************************************/ + + +/** + * CSR sparse format matrix + */ +template< + typename ValueT, + typename OffsetT> +struct CsrMatrix +{ + int num_rows; + int num_cols; + int num_nonzeros; + OffsetT* row_offsets; + OffsetT* column_indices; + ValueT* values; + bool numa_malloc; + + /** + * Constructor + */ + CsrMatrix() : num_rows(0), num_cols(0), num_nonzeros(0), row_offsets(NULL), column_indices(NULL), values(NULL) + { +#ifdef CUB_MKL + numa_malloc = ((numa_available() >= 0) && (numa_num_task_nodes() > 1)); +#else + numa_malloc = false; +#endif + } + + + /** + * Clear + */ + void Clear() + { +#ifdef CUB_MKL + if (numa_malloc) + { + numa_free(row_offsets, sizeof(OffsetT) * (num_rows + 1)); + numa_free(values, sizeof(ValueT) * num_nonzeros); + numa_free(column_indices, sizeof(OffsetT) * num_nonzeros); + } + else + { + if (row_offsets) mkl_free(row_offsets); + if (column_indices) mkl_free(column_indices); + if (values) mkl_free(values); + } + +#else + if (row_offsets) delete[] row_offsets; + if (column_indices) delete[] column_indices; + if (values) delete[] values; +#endif + + row_offsets = NULL; + column_indices = NULL; + values = NULL; + } + + /** + * Destructor + */ + ~CsrMatrix() + { + Clear(); + } + + GraphStats Stats() + { + GraphStats stats; + stats.num_rows = num_rows; + stats.num_cols = num_cols; + stats.num_nonzeros = num_nonzeros; + + // + // Compute diag-distance statistics + // + + OffsetT samples = 0; + double mean = 0.0; + double ss_tot = 0.0; + + for (OffsetT row = 0; row < num_rows; ++row) + { + OffsetT nz_idx_start = row_offsets[row]; + OffsetT nz_idx_end = row_offsets[row + 1]; + + for (int nz_idx = nz_idx_start; nz_idx < nz_idx_end; ++nz_idx) + { + OffsetT col = column_indices[nz_idx]; + double x = (col > row) ? col - row : row - col; + + samples++; + double delta = x - mean; + mean = mean + (delta / samples); + ss_tot += delta * (x - mean); + } + } + stats.diag_dist_mean = mean; + double variance = ss_tot / samples; + stats.diag_dist_std_dev = sqrt(variance); + + + // + // Compute deming statistics + // + + samples = 0; + double mean_x = 0.0; + double mean_y = 0.0; + double ss_x = 0.0; + double ss_y = 0.0; + + for (OffsetT row = 0; row < num_rows; ++row) + { + OffsetT nz_idx_start = row_offsets[row]; + OffsetT nz_idx_end = row_offsets[row + 1]; + + for (int nz_idx = nz_idx_start; nz_idx < nz_idx_end; ++nz_idx) + { + OffsetT col = column_indices[nz_idx]; + + samples++; + double x = col; + double y = row; + double delta; + + delta = x - mean_x; + mean_x = mean_x + (delta / samples); + ss_x += delta * (x - mean_x); + + delta = y - mean_y; + mean_y = mean_y + (delta / samples); + ss_y += delta * (y - mean_y); + } + } + + samples = 0; + double s_xy = 0.0; + double s_xxy = 0.0; + double s_xyy = 0.0; + for (OffsetT row = 0; row < num_rows; ++row) + { + OffsetT nz_idx_start = row_offsets[row]; + OffsetT nz_idx_end = row_offsets[row + 1]; + + for (int nz_idx = nz_idx_start; nz_idx < nz_idx_end; ++nz_idx) + { + OffsetT col = column_indices[nz_idx]; + + samples++; + double x = col; + double y = row; + + double xy = (x - mean_x) * (y - mean_y); + double xxy = (x - mean_x) * (x - mean_x) * (y - mean_y); + double xyy = (x - mean_x) * (y - mean_y) * (y - mean_y); + double delta; + + delta = xy - s_xy; + s_xy = s_xy + (delta / samples); + + delta = xxy - s_xxy; + s_xxy = s_xxy + (delta / samples); + + delta = xyy - s_xyy; + s_xyy = s_xyy + (delta / samples); + } + } + + double s_xx = ss_x / num_nonzeros; + double s_yy = ss_y / num_nonzeros; + + double deming_slope = (s_yy - s_xx + sqrt(((s_yy - s_xx) * (s_yy - s_xx)) + (4 * s_xy * s_xy))) / (2 * s_xy); + + stats.pearson_r = (num_nonzeros * s_xy) / (sqrt(ss_x) * sqrt(ss_y)); + + + // + // Compute row-length statistics + // + + // Sample mean + stats.row_length_mean = double(num_nonzeros) / num_rows; + variance = 0.0; + stats.row_length_skewness = 0.0; + for (OffsetT row = 0; row < num_rows; ++row) + { + OffsetT length = row_offsets[row + 1] - row_offsets[row]; + double delta = double(length) - stats.row_length_mean; + variance += (delta * delta); + stats.row_length_skewness += (delta * delta * delta); + } + variance /= num_rows; + stats.row_length_std_dev = sqrt(variance); + stats.row_length_skewness = (stats.row_length_skewness / num_rows) / pow(stats.row_length_std_dev, 3.0); + stats.row_length_variation = stats.row_length_std_dev / stats.row_length_mean; + + return stats; + } + + /** + * Build CSR matrix from sorted COO matrix + */ + void FromCoo(const CooMatrix &coo_matrix) + { + num_rows = coo_matrix.num_rows; + num_cols = coo_matrix.num_cols; + num_nonzeros = coo_matrix.num_nonzeros; + +#ifdef CUB_MKL + + if (numa_malloc) + { + numa_set_strict(1); +// numa_set_bind_policy(1); + +// values = (ValueT*) numa_alloc_interleaved(sizeof(ValueT) * num_nonzeros); +// row_offsets = (OffsetT*) numa_alloc_interleaved(sizeof(OffsetT) * (num_rows + 1)); +// column_indices = (OffsetT*) numa_alloc_interleaved(sizeof(OffsetT) * num_nonzeros); + + row_offsets = (OffsetT*) numa_alloc_onnode(sizeof(OffsetT) * (num_rows + 1), 0); + column_indices = (OffsetT*) numa_alloc_onnode(sizeof(OffsetT) * num_nonzeros, 0); + values = (ValueT*) numa_alloc_onnode(sizeof(ValueT) * num_nonzeros, 1); + } + else + { + values = (ValueT*) mkl_malloc(sizeof(ValueT) * num_nonzeros, 4096); + row_offsets = (OffsetT*) mkl_malloc(sizeof(OffsetT) * (num_rows + 1), 4096); + column_indices = (OffsetT*) mkl_malloc(sizeof(OffsetT) * num_nonzeros, 4096); + + } + +#else + row_offsets = new OffsetT[num_rows + 1]; + column_indices = new OffsetT[num_nonzeros]; + values = new ValueT[num_nonzeros]; +#endif + + OffsetT prev_row = -1; + for (OffsetT current_edge = 0; current_edge < num_nonzeros; current_edge++) + { + OffsetT current_row = coo_matrix.coo_tuples[current_edge].row; + + // Fill in rows up to and including the current row + for (OffsetT row = prev_row + 1; row <= current_row; row++) + { + row_offsets[row] = current_edge; + } + prev_row = current_row; + + column_indices[current_edge] = coo_matrix.coo_tuples[current_edge].col; + values[current_edge] = coo_matrix.coo_tuples[current_edge].val; + } + + // Fill out any trailing edgeless vertices (and the end-of-list element) + for (OffsetT row = prev_row + 1; row <= num_rows; row++) + { + row_offsets[row] = num_nonzeros; + } + } + + + /** + * Display log-histogram to stdout + */ + void DisplayHistogram() + { + // Initialize + int log_counts[9]; + for (int i = 0; i < 9; i++) + { + log_counts[i] = 0; + } + + // Scan + int max_log_length = -1; + for (OffsetT row = 0; row < num_rows; row++) + { + OffsetT length = row_offsets[row + 1] - row_offsets[row]; + + int log_length = -1; + while (length > 0) + { + length /= 10; + log_length++; + } + if (log_length > max_log_length) + { + max_log_length = log_length; + } + + log_counts[log_length + 1]++; + } + printf("CSR matrix (%d rows, %d columns, %d non-zeros):\n", (int) num_rows, (int) num_cols, (int) num_nonzeros); + for (int i = -1; i < max_log_length + 1; i++) + { + printf("\tDegree 1e%d: \t%d (%.2f%%)\n", i, log_counts[i + 1], (float) log_counts[i + 1] * 100.0 / num_cols); + } + fflush(stdout); + } + + + /** + * Display matrix to stdout + */ + void Display() + { + printf("Input Matrix:\n"); + for (OffsetT row = 0; row < num_rows; row++) + { + printf("%d [@%d, #%d]: ", row, row_offsets[row], row_offsets[row + 1] - row_offsets[row]); + for (OffsetT current_edge = row_offsets[row]; current_edge < row_offsets[row + 1]; current_edge++) + { + printf("%d (%f), ", column_indices[current_edge], values[current_edge]); + } + printf("\n"); + } + fflush(stdout); + } + + +}; + + + +/****************************************************************************** + * Matrix transformations + ******************************************************************************/ + +// Comparator for ordering rows by degree (lowest first), then by row-id (lowest first) +template +struct OrderByLow +{ + OffsetT* row_degrees; + OrderByLow(OffsetT* row_degrees) : row_degrees(row_degrees) {} + + bool operator()(const OffsetT &a, const OffsetT &b) + { + if (row_degrees[a] < row_degrees[b]) + return true; + else if (row_degrees[a] > row_degrees[b]) + return false; + else + return (a < b); + } +}; + +// Comparator for ordering rows by degree (highest first), then by row-id (lowest first) +template +struct OrderByHigh +{ + OffsetT* row_degrees; + OrderByHigh(OffsetT* row_degrees) : row_degrees(row_degrees) {} + + bool operator()(const OffsetT &a, const OffsetT &b) + { + if (row_degrees[a] > row_degrees[b]) + return true; + else if (row_degrees[a] < row_degrees[b]) + return false; + else + return (a < b); + } +}; + + + +/** + * Reverse Cuthill-McKee + */ +template +void RcmRelabel( + CsrMatrix& matrix, + OffsetT* relabel_indices) +{ + // Initialize row degrees + OffsetT* row_degrees_in = new OffsetT[matrix.num_rows]; + OffsetT* row_degrees_out = new OffsetT[matrix.num_rows]; + for (OffsetT row = 0; row < matrix.num_rows; ++row) + { + row_degrees_in[row] = 0; + row_degrees_out[row] = matrix.row_offsets[row + 1] - matrix.row_offsets[row]; + } + for (OffsetT nonzero = 0; nonzero < matrix.num_nonzeros; ++nonzero) + { + row_degrees_in[matrix.column_indices[nonzero]]++; + } + + // Initialize unlabeled set + typedef std::set > UnlabeledSet; + typename UnlabeledSet::key_compare unlabeled_comp(row_degrees_in); + UnlabeledSet unlabeled(unlabeled_comp); + for (OffsetT row = 0; row < matrix.num_rows; ++row) + { + relabel_indices[row] = -1; + unlabeled.insert(row); + } + + // Initialize queue set + std::deque q; + + // Process unlabeled vertices (traverse connected components) + OffsetT relabel_idx = 0; + while (!unlabeled.empty()) + { + // Seed the unvisited frontier queue with the unlabeled vertex of lowest-degree + OffsetT vertex = *unlabeled.begin(); + q.push_back(vertex); + + while (!q.empty()) + { + vertex = q.front(); + q.pop_front(); + + if (relabel_indices[vertex] == -1) + { + // Update this vertex + unlabeled.erase(vertex); + relabel_indices[vertex] = relabel_idx; + relabel_idx++; + + // Sort neighbors by degree + OrderByLow neighbor_comp(row_degrees_in); + std::sort( + matrix.column_indices + matrix.row_offsets[vertex], + matrix.column_indices + matrix.row_offsets[vertex + 1], + neighbor_comp); + + // Inspect neighbors, adding to the out frontier if unlabeled + for (OffsetT neighbor_idx = matrix.row_offsets[vertex]; + neighbor_idx < matrix.row_offsets[vertex + 1]; + ++neighbor_idx) + { + OffsetT neighbor = matrix.column_indices[neighbor_idx]; + q.push_back(neighbor); + } + } + } + } + +/* + // Reverse labels + for (int row = 0; row < matrix.num_rows; ++row) + { + relabel_indices[row] = matrix.num_rows - relabel_indices[row] - 1; + } +*/ + + // Cleanup + if (row_degrees_in) delete[] row_degrees_in; + if (row_degrees_out) delete[] row_degrees_out; +} + + +/** + * Reverse Cuthill-McKee + */ +template +void RcmRelabel( + CsrMatrix& matrix, + bool verbose = false) +{ + // Do not process if not square + if (matrix.num_cols != matrix.num_rows) + { + if (verbose) { + printf("RCM transformation ignored (not square)\n"); fflush(stdout); + } + return; + } + + // Initialize relabel indices + OffsetT* relabel_indices = new OffsetT[matrix.num_rows]; + + if (verbose) { + printf("RCM relabeling... "); fflush(stdout); + } + + RcmRelabel(matrix, relabel_indices); + + if (verbose) { + printf("done. Reconstituting... "); fflush(stdout); + } + + // Create a COO matrix from the relabel indices + CooMatrix coo_matrix; + coo_matrix.InitCsrRelabel(matrix, relabel_indices); + + // Reconstitute the CSR matrix from the sorted COO tuples + if (relabel_indices) delete[] relabel_indices; + matrix.Clear(); + matrix.FromCoo(coo_matrix); + + if (verbose) { + printf("done. "); fflush(stdout); + } +} + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_compare.cu b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_compare.cu new file mode 100644 index 0000000..b64297d --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_compare.cu @@ -0,0 +1,917 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIAeBILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +//--------------------------------------------------------------------- +// SpMV comparison tool +//--------------------------------------------------------------------- + +#include +#include +#include +#include +#include +#include + +#include + +#include "sparse_matrix.h" + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants, and type declarations +//--------------------------------------------------------------------- + +bool g_quiet = false; // Whether to display stats in CSV format +bool g_verbose = false; // Whether to display output to console +bool g_verbose2 = false; // Whether to display input to console +CachingDeviceAllocator g_allocator(true); // Caching allocator for device memory + + +//--------------------------------------------------------------------- +// SpMV verification +//--------------------------------------------------------------------- + +// Compute reference SpMV y = Ax +template < + typename ValueT, + typename OffsetT> +void SpmvGold( + CsrMatrix& a, + ValueT* vector_x, + ValueT* vector_y_in, + ValueT* vector_y_out, + ValueT alpha, + ValueT beta) +{ + for (OffsetT row = 0; row < a.num_rows; ++row) + { + ValueT partial = beta * vector_y_in[row]; + for ( + OffsetT offset = a.row_offsets[row]; + offset < a.row_offsets[row + 1]; + ++offset) + { + partial += alpha * a.values[offset] * vector_x[a.column_indices[offset]]; + } + vector_y_out[row] = partial; + } +} + + +//--------------------------------------------------------------------- +// GPU I/O proxy +//--------------------------------------------------------------------- + +/** + * Read every matrix nonzero value, read every corresponding vector value + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename ValueT, + typename OffsetT, + typename VectorItr> +__launch_bounds__ (int(BLOCK_THREADS)) +__global__ void NonZeroIoKernel( + SpmvParams params, + VectorItr d_vector_x) +{ + enum + { + TILE_ITEMS = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + + + ValueT nonzero = 0.0; + + int tile_idx = blockIdx.x; + + OffsetT block_offset = tile_idx * TILE_ITEMS; + + OffsetT column_indices[ITEMS_PER_THREAD]; + ValueT values[ITEMS_PER_THREAD]; + + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + OffsetT nonzero_idx = block_offset + (ITEM * BLOCK_THREADS) + threadIdx.x; + + OffsetT* ci = params.d_column_indices + nonzero_idx; + ValueT*a = params.d_values + nonzero_idx; + + column_indices[ITEM] = (nonzero_idx < params.num_nonzeros) ? *ci : 0; + values[ITEM] = (nonzero_idx < params.num_nonzeros) ? *a : 0.0; + } + + __syncthreads(); + + // Read vector + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + ValueT vector_value = ThreadLoad(params.d_vector_x + column_indices[ITEM]); + nonzero += vector_value * values[ITEM]; + } + + __syncthreads(); + + if (block_offset < params.num_rows) + { + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + { + OffsetT row_idx = block_offset + (ITEM * BLOCK_THREADS) + threadIdx.x; + if (row_idx < params.num_rows) + { + OffsetT row_end_offset = ThreadLoad(params.d_row_end_offsets + row_idx); + + if ((row_end_offset >= 0) && (nonzero == nonzero)) + params.d_vector_y[row_idx] = nonzero; + } + } + } + +} + + +/** + * Run GPU I/O proxy + */ +template < + typename ValueT, + typename OffsetT> +float TestGpuCsrIoProxy( + SpmvParams& params, + int timing_iterations) +{ + enum { + BLOCK_THREADS = 128, + ITEMS_PER_THREAD = 7, + TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD, + }; + +// size_t smem = 1024 * 16; + size_t smem = 1024 * 0; + + unsigned int nonzero_blocks = (params.num_nonzeros + TILE_SIZE - 1) / TILE_SIZE; + unsigned int row_blocks = (params.num_rows + TILE_SIZE - 1) / TILE_SIZE; + unsigned int blocks = std::max(nonzero_blocks, row_blocks); + + typedef TexRefInputIterator TexItr; + TexItr x_itr; + CubDebugExit(x_itr.BindTexture(params.d_vector_x)); + + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get device SM version + int sm_version; + CubDebugExit(SmVersion(sm_version, device_ordinal)); + + void (*kernel)(SpmvParams, TexItr) = NonZeroIoKernel; + + + int spmv_sm_occupancy; + CubDebugExit(MaxSmOccupancy(spmv_sm_occupancy, kernel, BLOCK_THREADS, smem)); + + if (!g_quiet) + printf("NonZeroIoKernel<%d,%d><<<%d, %d>>>, sm occupancy %d\n", BLOCK_THREADS, ITEMS_PER_THREAD, blocks, BLOCK_THREADS, spmv_sm_occupancy); + + // Warmup + NonZeroIoKernel<<>>(params, x_itr); + + // Check for failures + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(SyncStream(0)); + + // Timing + GpuTimer timer; + float elapsed_millis = 0.0; + timer.Start(); + for (int it = 0; it < timing_iterations; ++it) + { + NonZeroIoKernel<<>>(params, x_itr); + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + CubDebugExit(x_itr.UnbindTexture()); + + return elapsed_millis / timing_iterations; +} + + + +//--------------------------------------------------------------------- +// cuSparse HybMV +//--------------------------------------------------------------------- + +/** + * Run cuSparse HYB SpMV (specialized for fp32) + */ +template < + typename OffsetT> +float TestCusparseHybmv( + float* vector_y_in, + float* reference_vector_y_out, + SpmvParams& params, + int timing_iterations, + cusparseHandle_t cusparse) +{ + CpuTimer cpu_timer; + cpu_timer.Start(); + + // Construct Hyb matrix + cusparseMatDescr_t mat_desc; + cusparseHybMat_t hyb_desc; + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateMatDescr(&mat_desc)); + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateHybMat(&hyb_desc)); + cusparseStatus_t status = cusparseScsr2hyb( + cusparse, + params.num_rows, params.num_cols, + mat_desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + hyb_desc, + 0, + CUSPARSE_HYB_PARTITION_AUTO); + AssertEquals(CUSPARSE_STATUS_SUCCESS, status); + + cudaDeviceSynchronize(); + cpu_timer.Stop(); + float elapsed_millis = cpu_timer.ElapsedMillis(); + printf("HYB setup ms, %.5f, ", elapsed_millis); + + // Reset input/output vector y + CubDebugExit(cudaMemcpy(params.d_vector_y, vector_y_in, sizeof(float) * params.num_rows, cudaMemcpyHostToDevice)); + + // Warmup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseShybmv( + cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + ¶ms.alpha, mat_desc, + hyb_desc, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + + if (!g_quiet) + { + int compare = CompareDeviceResults(reference_vector_y_out, params.d_vector_y, params.num_rows, true, g_verbose); + printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Timing + elapsed_millis = 0.0; + GpuTimer timer; + + timer.Start(); + for(int it = 0; it < timing_iterations; ++it) + { + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseShybmv( + cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + ¶ms.alpha, mat_desc, + hyb_desc, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + // Cleanup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyHybMat(hyb_desc)); + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyMatDescr(mat_desc)); + + return elapsed_millis / timing_iterations; +} + + +/** + * Run cuSparse HYB SpMV (specialized for fp64) + */ +template < + typename OffsetT> +float TestCusparseHybmv( + double* vector_y_in, + double* reference_vector_y_out, + SpmvParams& params, + int timing_iterations, + cusparseHandle_t cusparse) +{ + CpuTimer cpu_timer; + cpu_timer.Start(); + + // Construct Hyb matrix + cusparseMatDescr_t mat_desc; + cusparseHybMat_t hyb_desc; + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateMatDescr(&mat_desc)); + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateHybMat(&hyb_desc)); + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDcsr2hyb( + cusparse, + params.num_rows, params.num_cols, + mat_desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + hyb_desc, + 0, + CUSPARSE_HYB_PARTITION_AUTO)); + + cudaDeviceSynchronize(); + cpu_timer.Stop(); + float elapsed_millis = cpu_timer.ElapsedMillis(); + printf("HYB setup ms, %.5f, ", elapsed_millis); + + // Reset input/output vector y + CubDebugExit(cudaMemcpy(params.d_vector_y, vector_y_in, sizeof(float) * params.num_rows, cudaMemcpyHostToDevice)); + + // Warmup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDhybmv( + cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + ¶ms.alpha, mat_desc, + hyb_desc, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + + if (!g_quiet) + { + int compare = CompareDeviceResults(reference_vector_y_out, params.d_vector_y, params.num_rows, true, g_verbose); + printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Timing + elapsed_millis = 0.0; + GpuTimer timer; + + timer.Start(); + for(int it = 0; it < timing_iterations; ++it) + { + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDhybmv( + cusparse, + CUSPARSE_OPERATION_NON_TRANSPOSE, + ¶ms.alpha, mat_desc, + hyb_desc, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + // Cleanup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyHybMat(hyb_desc)); + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyMatDescr(mat_desc)); + + return elapsed_millis / timing_iterations; +} + + + +//--------------------------------------------------------------------- +// cuSparse CsrMV +//--------------------------------------------------------------------- + +/** + * Run cuSparse SpMV (specialized for fp32) + */ +template < + typename OffsetT> +float TestCusparseCsrmv( + float* vector_y_in, + float* reference_vector_y_out, + SpmvParams& params, + int timing_iterations, + cusparseHandle_t cusparse) +{ + cusparseMatDescr_t desc; + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateMatDescr(&desc)); + + // Reset input/output vector y + CubDebugExit(cudaMemcpy(params.d_vector_y, vector_y_in, sizeof(float) * params.num_rows, cudaMemcpyHostToDevice)); + + // Warmup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseScsrmv( + cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, + params.num_rows, params.num_cols, params.num_nonzeros, ¶ms.alpha, desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + + if (!g_quiet) + { + int compare = CompareDeviceResults(reference_vector_y_out, params.d_vector_y, params.num_rows, true, g_verbose); + printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Timing + float elapsed_millis = 0.0; + GpuTimer timer; + + timer.Start(); + for(int it = 0; it < timing_iterations; ++it) + { + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseScsrmv( + cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, + params.num_rows, params.num_cols, params.num_nonzeros, ¶ms.alpha, desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyMatDescr(desc)); + return elapsed_millis / timing_iterations; +} + + +/** + * Run cuSparse SpMV (specialized for fp64) + */ +template < + typename OffsetT> +float TestCusparseCsrmv( + double* vector_y_in, + double* reference_vector_y_out, + SpmvParams& params, + int timing_iterations, + cusparseHandle_t cusparse) +{ + cusparseMatDescr_t desc; + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreateMatDescr(&desc)); + + // Reset input/output vector y + CubDebugExit(cudaMemcpy(params.d_vector_y, vector_y_in, sizeof(float) * params.num_rows, cudaMemcpyHostToDevice)); + + // Warmup + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDcsrmv( + cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, + params.num_rows, params.num_cols, params.num_nonzeros, ¶ms.alpha, desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + + if (!g_quiet) + { + int compare = CompareDeviceResults(reference_vector_y_out, params.d_vector_y, params.num_rows, true, g_verbose); + printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Timing + float elapsed_millis = 0.0; + GpuTimer timer; + timer.Start(); + for(int it = 0; it < timing_iterations; ++it) + { + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDcsrmv( + cusparse, CUSPARSE_OPERATION_NON_TRANSPOSE, + params.num_rows, params.num_cols, params.num_nonzeros, ¶ms.alpha, desc, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, ¶ms.beta, params.d_vector_y)); + + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseDestroyMatDescr(desc)); + return elapsed_millis / timing_iterations; +} + +//--------------------------------------------------------------------- +// GPU Merge-based SpMV +//--------------------------------------------------------------------- + +/** + * Run CUB SpMV + */ +template < + typename ValueT, + typename OffsetT> +float TestGpuMergeCsrmv( + ValueT* vector_y_in, + ValueT* reference_vector_y_out, + SpmvParams& params, + int timing_iterations) +{ + // Allocate temporary storage + size_t temp_storage_bytes = 0; + void *d_temp_storage = NULL; + + // Get amount of temporary storage needed + CubDebugExit(DeviceSpmv::CsrMV( + d_temp_storage, temp_storage_bytes, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, params.d_vector_y, + params.num_rows, params.num_cols, params.num_nonzeros, +// params.alpha, params.beta, + (cudaStream_t) 0, false)); + + // Allocate + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Reset input/output vector y + CubDebugExit(cudaMemcpy(params.d_vector_y, vector_y_in, sizeof(ValueT) * params.num_rows, cudaMemcpyHostToDevice)); + + // Warmup + CubDebugExit(DeviceSpmv::CsrMV( + d_temp_storage, temp_storage_bytes, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, params.d_vector_y, + params.num_rows, params.num_cols, params.num_nonzeros, +// params.alpha, params.beta, + (cudaStream_t) 0, !g_quiet)); + + if (!g_quiet) + { + int compare = CompareDeviceResults(reference_vector_y_out, params.d_vector_y, params.num_rows, true, g_verbose); + printf("\t%s\n", compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Timing + GpuTimer timer; + float elapsed_millis = 0.0; + + timer.Start(); + for(int it = 0; it < timing_iterations; ++it) + { + CubDebugExit(DeviceSpmv::CsrMV( + d_temp_storage, temp_storage_bytes, + params.d_values, params.d_row_end_offsets, params.d_column_indices, + params.d_vector_x, params.d_vector_y, + params.num_rows, params.num_cols, params.num_nonzeros, +// params.alpha, params.beta, + (cudaStream_t) 0, false)); + } + timer.Stop(); + elapsed_millis += timer.ElapsedMillis(); + + return elapsed_millis / timing_iterations; +} + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +/** + * Display perf + */ +template +void DisplayPerf( + float device_giga_bandwidth, + double avg_millis, + CsrMatrix& csr_matrix) +{ + double nz_throughput, effective_bandwidth; + size_t total_bytes = (csr_matrix.num_nonzeros * (sizeof(ValueT) * 2 + sizeof(OffsetT))) + + (csr_matrix.num_rows) * (sizeof(OffsetT) + sizeof(ValueT)); + + nz_throughput = double(csr_matrix.num_nonzeros) / avg_millis / 1.0e6; + effective_bandwidth = double(total_bytes) / avg_millis / 1.0e6; + + if (!g_quiet) + printf("fp%d: %.4f avg ms, %.5f gflops, %.3lf effective GB/s (%.2f%% peak)\n", + sizeof(ValueT) * 8, + avg_millis, + 2 * nz_throughput, + effective_bandwidth, + effective_bandwidth / device_giga_bandwidth * 100); + else + printf("%.5f, %.6f, %.3lf, %.2f%%, ", + avg_millis, + 2 * nz_throughput, + effective_bandwidth, + effective_bandwidth / device_giga_bandwidth * 100); + + fflush(stdout); +} + + + +/** + * Run tests + */ +template < + typename ValueT, + typename OffsetT> +void RunTest( + bool rcm_relabel, + ValueT alpha, + ValueT beta, + CooMatrix& coo_matrix, + int timing_iterations, + CommandLineArgs& args) +{ + // Adaptive timing iterations: run 16 billion nonzeros through + if (timing_iterations == -1) + timing_iterations = std::min(50000ull, std::max(100ull, ((16ull << 30) / coo_matrix.num_nonzeros))); + + if (!g_quiet) + printf("\t%d timing iterations\n", timing_iterations); + + // Convert to CSR + CsrMatrix csr_matrix; + csr_matrix.FromCoo(coo_matrix); + if (!args.CheckCmdLineFlag("csrmv")) + coo_matrix.Clear(); + + // Relabel + if (rcm_relabel) + { + if (!g_quiet) + { + csr_matrix.Stats().Display(); + printf("\n"); + csr_matrix.DisplayHistogram(); + printf("\n"); + if (g_verbose2) + csr_matrix.Display(); + printf("\n"); + } + + RcmRelabel(csr_matrix, !g_quiet); + + if (!g_quiet) printf("\n"); + } + + // Display matrix info + csr_matrix.Stats().Display(!g_quiet); + if (!g_quiet) + { + printf("\n"); + csr_matrix.DisplayHistogram(); + printf("\n"); + if (g_verbose2) + csr_matrix.Display(); + printf("\n"); + } + fflush(stdout); + + // Allocate input and output vectors + ValueT* vector_x = new ValueT[csr_matrix.num_cols]; + ValueT* vector_y_in = new ValueT[csr_matrix.num_rows]; + ValueT* vector_y_out = new ValueT[csr_matrix.num_rows]; + + for (int col = 0; col < csr_matrix.num_cols; ++col) + vector_x[col] = 1.0; + + for (int row = 0; row < csr_matrix.num_rows; ++row) + vector_y_in[row] = 1.0; + + // Compute reference answer + SpmvGold(csr_matrix, vector_x, vector_y_in, vector_y_out, alpha, beta); + + float avg_millis; + + if (g_quiet) { + printf("%s, %s, ", args.deviceProp.name, (sizeof(ValueT) > 4) ? "fp64" : "fp32"); fflush(stdout); + } + + // Get GPU device bandwidth (GB/s) + float device_giga_bandwidth = args.device_giga_bandwidth; + + // Allocate and initialize GPU problem + SpmvParams params; + + CubDebugExit(g_allocator.DeviceAllocate((void **) ¶ms.d_values, sizeof(ValueT) * csr_matrix.num_nonzeros)); + CubDebugExit(g_allocator.DeviceAllocate((void **) ¶ms.d_row_end_offsets, sizeof(OffsetT) * (csr_matrix.num_rows + 1))); + CubDebugExit(g_allocator.DeviceAllocate((void **) ¶ms.d_column_indices, sizeof(OffsetT) * csr_matrix.num_nonzeros)); + CubDebugExit(g_allocator.DeviceAllocate((void **) ¶ms.d_vector_x, sizeof(ValueT) * csr_matrix.num_cols)); + CubDebugExit(g_allocator.DeviceAllocate((void **) ¶ms.d_vector_y, sizeof(ValueT) * csr_matrix.num_rows)); + params.num_rows = csr_matrix.num_rows; + params.num_cols = csr_matrix.num_cols; + params.num_nonzeros = csr_matrix.num_nonzeros; + params.alpha = alpha; + params.beta = beta; + + CubDebugExit(cudaMemcpy(params.d_values, csr_matrix.values, sizeof(ValueT) * csr_matrix.num_nonzeros, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(params.d_row_end_offsets, csr_matrix.row_offsets, sizeof(OffsetT) * (csr_matrix.num_rows + 1), cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(params.d_column_indices, csr_matrix.column_indices, sizeof(OffsetT) * csr_matrix.num_nonzeros, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(params.d_vector_x, vector_x, sizeof(ValueT) * csr_matrix.num_cols, cudaMemcpyHostToDevice)); + + if (!g_quiet) printf("\n\n"); + printf("GPU CSR I/O Prox, "); fflush(stdout); + avg_millis = TestGpuCsrIoProxy(params, timing_iterations); + DisplayPerf(device_giga_bandwidth, avg_millis, csr_matrix); + + if (args.CheckCmdLineFlag("csrmv")) + { + if (!g_quiet) printf("\n\n"); + printf("CUB, "); fflush(stdout); + avg_millis = TestGpuMergeCsrmv(vector_y_in, vector_y_out, params, timing_iterations); + DisplayPerf(device_giga_bandwidth, avg_millis, csr_matrix); + } + + // Initialize cuSparse + cusparseHandle_t cusparse; + AssertEquals(CUSPARSE_STATUS_SUCCESS, cusparseCreate(&cusparse)); + + if (args.CheckCmdLineFlag("csrmv")) + { + if (!g_quiet) printf("\n\n"); + printf("Cusparse CsrMV, "); fflush(stdout); + avg_millis = TestCusparseCsrmv(vector_y_in, vector_y_out, params, timing_iterations, cusparse); + DisplayPerf(device_giga_bandwidth, avg_millis, csr_matrix); + } + + if (args.CheckCmdLineFlag("hybmv")) + { + if (!g_quiet) printf("\n\n"); + printf("Cusparse HybMV, "); fflush(stdout); + + avg_millis = TestCusparseHybmv(vector_y_in, vector_y_out, params, timing_iterations, cusparse); + DisplayPerf(device_giga_bandwidth, avg_millis, csr_matrix); + } + + + // Cleanup + if (params.d_values) CubDebugExit(g_allocator.DeviceFree(params.d_values)); + if (params.d_row_end_offsets) CubDebugExit(g_allocator.DeviceFree(params.d_row_end_offsets)); + if (params.d_column_indices) CubDebugExit(g_allocator.DeviceFree(params.d_column_indices)); + if (params.d_vector_x) CubDebugExit(g_allocator.DeviceFree(params.d_vector_x)); + if (params.d_vector_y) CubDebugExit(g_allocator.DeviceFree(params.d_vector_y)); + + if (vector_x) delete[] vector_x; + if (vector_y_in) delete[] vector_y_in; + if (vector_y_out) delete[] vector_y_out; +} + +/** + * Run tests + */ +template < + typename ValueT, + typename OffsetT> +void RunTests( + bool rcm_relabel, + ValueT alpha, + ValueT beta, + const std::string& mtx_filename, + int grid2d, + int grid3d, + int wheel, + int dense, + int timing_iterations, + CommandLineArgs& args) +{ + // Initialize matrix in COO form + CooMatrix coo_matrix; + + if (!mtx_filename.empty()) + { + // Parse matrix market file + printf("%s, ", mtx_filename.c_str()); fflush(stdout); + coo_matrix.InitMarket(mtx_filename, 1.0, !g_quiet); + + if ((coo_matrix.num_rows == 1) || (coo_matrix.num_cols == 1) || (coo_matrix.num_nonzeros == 1)) + { + if (!g_quiet) printf("Trivial dataset\n"); + exit(0); + } + } + else if (grid2d > 0) + { + // Generate 2D lattice + printf("grid2d_%d, ", grid2d); fflush(stdout); + coo_matrix.InitGrid2d(grid2d, false); + } + else if (grid3d > 0) + { + // Generate 3D lattice + printf("grid3d_%d, ", grid3d); fflush(stdout); + coo_matrix.InitGrid3d(grid3d, false); + } + else if (wheel > 0) + { + // Generate wheel graph + printf("wheel_%d, ", grid2d); fflush(stdout); + coo_matrix.InitWheel(wheel); + } + else if (dense > 0) + { + // Generate dense graph + OffsetT size = 1 << 24; // 16M nnz + args.GetCmdLineArgument("size", size); + + OffsetT rows = size / dense; + printf("dense_%d_x_%d, ", rows, dense); fflush(stdout); + coo_matrix.InitDense(rows, dense); + } + else + { + fprintf(stderr, "No graph type specified.\n"); + exit(1); + } + + RunTest( + rcm_relabel, + alpha, + beta, + coo_matrix, + timing_iterations, + args); +} + + + +/** + * Main + */ +int main(int argc, char **argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + if (args.CheckCmdLineFlag("help")) + { + printf( + "%s " + "[--csrmv | --hybmv | --bsrmv ] " + "[--device=] " + "[--quiet] " + "[--v] " + "[--i=] " + "[--fp64] " + "[--rcm] " + "[--alpha=] " + "[--beta=] " + "\n\t" + "--mtx= " + "\n\t" + "--dense=" + "\n\t" + "--grid2d=" + "\n\t" + "--grid3d=" + "\n\t" + "--wheel=" + "\n", argv[0]); + exit(0); + } + + bool fp64; + bool rcm_relabel; + std::string mtx_filename; + int grid2d = -1; + int grid3d = -1; + int wheel = -1; + int dense = -1; + int timing_iterations = -1; + float alpha = 1.0; + float beta = 0.0; + + g_verbose = args.CheckCmdLineFlag("v"); + g_verbose2 = args.CheckCmdLineFlag("v2"); + g_quiet = args.CheckCmdLineFlag("quiet"); + fp64 = args.CheckCmdLineFlag("fp64"); + rcm_relabel = args.CheckCmdLineFlag("rcm"); + args.GetCmdLineArgument("i", timing_iterations); + args.GetCmdLineArgument("mtx", mtx_filename); + args.GetCmdLineArgument("grid2d", grid2d); + args.GetCmdLineArgument("grid3d", grid3d); + args.GetCmdLineArgument("wheel", wheel); + args.GetCmdLineArgument("dense", dense); + args.GetCmdLineArgument("alpha", alpha); + args.GetCmdLineArgument("beta", beta); + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Run test(s) + if (fp64) + { + RunTests(rcm_relabel, alpha, beta, mtx_filename, grid2d, grid3d, wheel, dense, timing_iterations, args); + } + else + { + RunTests(rcm_relabel, alpha, beta, mtx_filename, grid2d, grid3d, wheel, dense, timing_iterations, args); + } + + CubDebugExit(cudaDeviceSynchronize()); + printf("\n"); + + return 0; +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_script.sh b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_script.sh new file mode 100755 index 0000000..f432043 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/experimental/spmv_script.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +for i in 1 2 4 8 16 32 64 128 256 512 1024 2048 4096 8192 16384 32768 65536 131072 262144 524288 1048576 2097152 4194304 8388608 16777216 +do + echo `date`, `$1 --dense=$i $2 $3 $4 $5 $6 $7` +done + +echo +echo + +for i in `ls /home/dumerrill/graphs/spmv/*.mtx` +do + if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] + then + echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` + fi +done + +echo +echo + +for i in `ls /scratch/dumerrill/graphs/mtx/*.mtx` +#for i in `ls /cygdrive/w/Dev/UFget/mtx/*.mtx` +do + if [[ ( "`head -n 50 $i | grep complex`" = "" ) && ( "`head -n 50 $i | grep array`" = "" ) ]] + then + echo `date`, `$1 --mtx=$i $2 $3 $4 $5 $6 $7 2>/dev/null` + fi +done + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/test/.gitignore new file mode 100644 index 0000000..978ba97 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/.gitignore @@ -0,0 +1,3 @@ +/bin +/link_main.obj +/dummy/ diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/Makefile b/hash-graph-dehornetify/externals/cub-1.8.0/test/Makefile new file mode 100644 index 0000000..958760a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/Makefile @@ -0,0 +1,468 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + + +#------------------------------------------------------------------------------- +# +# Makefile usage +# +# make [sm=] [cdp=<0|1>] [force32=<0|1>] [abi=<0|1>] [open64=<0|1>] [verbose=<0|1>] [keep=<0|1>] [quicktest=<0|1>] [quickertest=<0|1>] +# +#------------------------------------------------------------------------------- + +include ../common.mk + +#------------------------------------------------------------------------------- +# Commandline Options +#------------------------------------------------------------------------------- + +# Testing mode option (quick/thorough) +ifeq ($(quickertest), 1) + NVCCFLAGS += -DQUICKER_TEST + TEST_SUFFIX = quicker +else ifeq ($(quicktest), 1) + NVCCFLAGS += -DQUICK_TEST + TEST_SUFFIX = quick +else + TEST_SUFFIX = thorough + NPPI = +endif + + +# CUDA memcheck (enabled by default) +ifeq ($(memcheck), 0) + MEMCHECK = +else + MEMCHECK = cuda-memcheck +endif + + +#------------------------------------------------------------------------------- +# Compiler and compilation platform +#------------------------------------------------------------------------------- + +# Includes +INC += -I$(CUB_DIR) -I$(CUB_DIR)test + +# Suffix to append to each binary +SUFFIX = $(BIN_SUFFIX)_$(TEST_SUFFIX) + +# Define test arch +DEFINES += -DTEST_ARCH=$(TEST_ARCH) + + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +DEPS = $(CUB_DEPS) \ + $(CUB_DIR)test/Makefile \ + $(CUB_DIR)test/test_util.h \ + $(CUB_DIR)test/mersenne.h \ + +BLOCK_REDUCE = test_block_reduce_raking \ + test_block_reduce_warp_reductions + + +BLOCK_SCAN = test_block_scan_raking \ + test_block_scan_raking_memoize \ + test_block_scan_warp_scans + + +BLOCK_RADIX_SORT = test_block_radix_sort_keys \ + test_block_radix_sort_pairs + +DEVICE_RADIX_SORT = test_device_radix_sort \ + test_device_radix_sort_segmented + +ALL = link \ + test_iterator \ + test_allocator \ + test_warp_scan \ + test_warp_reduce \ + $(BLOCK_REDUCE) \ + $(BLOCK_SCAN) \ + $(BLOCK_RADIX_SORT) \ + test_block_load_store \ + test_block_histogram \ + test_device_reduce \ + test_device_histogram \ + test_device_scan \ + $(DEVICE_RADIX_SORT) \ + test_device_reduce_by_key\ + test_device_run_length_encode\ + test_device_select_unique \ + test_device_select_if + +# test_grid_barrier \ fails on sm110 +# test_device_seg_reduce + + + +#------------------------------------------------------------------------------- +# make default +#------------------------------------------------------------------------------- + +default: + + +#------------------------------------------------------------------------------- +# make clean +#------------------------------------------------------------------------------- + +clean : + rm -f bin/*$(CPU_ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o + + +#------------------------------------------------------------------------------- +# make all +#------------------------------------------------------------------------------- + +all : $(ALL) + + +#------------------------------------------------------------------------------- +# make run +#------------------------------------------------------------------------------- + +run : + for i in $(ALL); do $(MEMCHECK) ./bin/$${i}_$(SUFFIX) --device=$(device) || exit 1; done + +run_block_reduce : + for i in $(BLOCK_REDUCE); do $(MEMCHECK) ./bin/$${i}_$(SUFFIX) --device=$(device) || exit 1; done + +run_block_scan : + for i in $(BLOCK_SCAN); do $(MEMCHECK) ./bin/$${i}_$(SUFFIX) --device=$(device) || exit 1; done + +run_block_radix_sort : + for i in $(BLOCK_RADIX_SORT); do $(MEMCHECK) ./bin/$${i}_$(SUFFIX) --device=$(device) || exit 1; done + +run_device_radix_sort : + for i in $(DEVICE_RADIX_SORT); do $(MEMCHECK) ./bin/$${i}_$(SUFFIX) --device=$(device) || exit 1; done + + +#------------------------------------------------------------------------------- +# make link +#------------------------------------------------------------------------------- + +link : bin/link_$(SUFFIX) + +bin/link_$(SUFFIX) : link_a.cu link_b.cu link_main.cpp $(DEPS) + mkdir -p bin + $(NVCC) $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(DEFINES) $(SM_TARGETS) link_a.cu -c -o bin/link_a.obj + $(NVCC) $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(DEFINES) $(SM_TARGETS) link_b.cu -c -o bin/link_b.obj + $(NVCC) $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(DEFINES) $(SM_TARGETS) link_main.cpp bin/link_a.obj bin/link_b.obj -o bin/link_$(SUFFIX) + + +#------------------------------------------------------------------------------- +# make test_iterator +#------------------------------------------------------------------------------- + +test_iterator: bin/test_iterator_$(SUFFIX) + +bin/test_iterator_$(SUFFIX) : test_iterator.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_iterator_$(SUFFIX) test_iterator.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_allocator +#------------------------------------------------------------------------------- + +test_allocator: bin/test_allocator_$(SUFFIX) + +bin/test_allocator_$(SUFFIX) : test_allocator.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_allocator_$(SUFFIX) test_allocator.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_grid_barrier +#------------------------------------------------------------------------------- + +test_grid_barrier: bin/test_grid_barrier_$(SUFFIX) + +bin/test_grid_barrier_$(SUFFIX) : test_grid_barrier.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_grid_barrier_$(SUFFIX) test_grid_barrier.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_warp_scan +#------------------------------------------------------------------------------- + +test_warp_scan: bin/test_warp_scan_$(SUFFIX) + +bin/test_warp_scan_$(SUFFIX) : test_warp_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_warp_scan_$(SUFFIX) test_warp_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_warp_reduce +#------------------------------------------------------------------------------- + +test_warp_reduce: bin/test_warp_reduce_$(SUFFIX) + +bin/test_warp_reduce_$(SUFFIX) : test_warp_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_warp_reduce_$(SUFFIX) test_warp_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_reduce_raking +#------------------------------------------------------------------------------- + +test_block_reduce_raking: bin/test_block_reduce_raking_$(SUFFIX) + +bin/test_block_reduce_raking_$(SUFFIX) : test_block_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_RAKING $(SM_TARGETS) -o bin/test_block_reduce_raking_$(SUFFIX) test_block_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_reduce_warp_reductions +#------------------------------------------------------------------------------- + +test_block_reduce_warp_reductions: bin/test_block_reduce_warp_reductions_$(SUFFIX) + +bin/test_block_reduce_warp_reductions_$(SUFFIX) : test_block_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_WARP_REDUCTIONS $(SM_TARGETS) -o bin/test_block_reduce_warp_reductions_$(SUFFIX) test_block_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_reduce +#------------------------------------------------------------------------------- + +test_block_reduce: $(BLOCK_REDUCE) + + +#------------------------------------------------------------------------------- +# make test_block_scan_raking +#------------------------------------------------------------------------------- + +test_block_scan_raking: bin/test_block_scan_raking_$(SUFFIX) + +bin/test_block_scan_raking_$(SUFFIX) : test_block_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_RAKING $(SM_TARGETS) -o bin/test_block_scan_raking_$(SUFFIX) test_block_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_scan_raking_memoize +#------------------------------------------------------------------------------- + +test_block_scan_raking_memoize: bin/test_block_scan_raking_memoize_$(SUFFIX) + +bin/test_block_scan_raking_memoize_$(SUFFIX) : test_block_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_RAKING_MEMOIZE $(SM_TARGETS) -o bin/test_block_scan_raking_memoize_$(SUFFIX) test_block_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_scan_warp_scans +#------------------------------------------------------------------------------- + +test_block_scan_warp_scans: bin/test_block_scan_warp_scans_$(SUFFIX) + +bin/test_block_scan_warp_scans_$(SUFFIX) : test_block_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_WARP_SCANS $(SM_TARGETS) -o bin/test_block_scan_warp_scans_$(SUFFIX) test_block_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_scan +#------------------------------------------------------------------------------- + +test_block_scan: $(BLOCK_SCAN) + + +#------------------------------------------------------------------------------- +# make test_block_load_store +#------------------------------------------------------------------------------- + +test_block_load_store: bin/test_block_load_store_$(SUFFIX) + +bin/test_block_load_store_$(SUFFIX) : test_block_load_store.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_block_load_store_$(SUFFIX) test_block_load_store.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_radix_sort_keys +#------------------------------------------------------------------------------- + +test_block_radix_sort_keys: bin/test_block_radix_sort_keys_$(SUFFIX) + +bin/test_block_radix_sort_keys_$(SUFFIX) : test_block_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DTEST_KEYS_ONLY $(SM_TARGETS) -o bin/test_block_radix_sort_keys_$(SUFFIX) test_block_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make test_block_radix_sort_pairs +#------------------------------------------------------------------------------- + +test_block_radix_sort_pairs: bin/test_block_radix_sort_pairs_$(SUFFIX) + +bin/test_block_radix_sort_pairs_$(SUFFIX) : test_block_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_block_radix_sort_pairs_$(SUFFIX) test_block_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_block_radix_sort +#------------------------------------------------------------------------------- + +test_block_radix_sort : $(BLOCK_RADIX_SORT) + + +#------------------------------------------------------------------------------- +# make test_block_histogram +#------------------------------------------------------------------------------- + +test_block_histogram: bin/test_block_histogram_$(SUFFIX) + +bin/test_block_histogram_$(SUFFIX) : test_block_histogram.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_block_histogram_$(SUFFIX) test_block_histogram.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_reduce +#------------------------------------------------------------------------------- + +test_device_reduce: bin/test_device_reduce_$(SUFFIX) + +bin/test_device_reduce_$(SUFFIX) : test_device_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_reduce_$(SUFFIX) test_device_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_histogram +#------------------------------------------------------------------------------- + +test_device_histogram: bin/test_device_histogram_$(SUFFIX) + +bin/test_device_histogram_$(SUFFIX) : test_device_histogram.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_histogram_$(SUFFIX) test_device_histogram.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) $(NPPI) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_scan +#------------------------------------------------------------------------------- + +test_device_scan: bin/test_device_scan_$(SUFFIX) + +bin/test_device_scan_$(SUFFIX) : test_device_scan.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_scan_$(SUFFIX) test_device_scan.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_radix_sort +#------------------------------------------------------------------------------- + +test_device_radix_sort: bin/test_device_radix_sort_$(SUFFIX) + +bin/test_device_radix_sort_$(SUFFIX) : test_device_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_radix_sort_$(SUFFIX) test_device_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_radix_sort_segmented +#------------------------------------------------------------------------------- + +test_device_radix_sort_segmented: bin/test_device_radix_sort_segmented_$(SUFFIX) + +bin/test_device_radix_sort_segmented_$(SUFFIX) : test_device_radix_sort.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) -DSEGMENTED_SORT $(SM_TARGETS) -o bin/test_device_radix_sort_segmented_$(SUFFIX) test_device_radix_sort.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_select_unique +#------------------------------------------------------------------------------- + +test_device_select_unique: bin/test_device_select_unique_$(SUFFIX) + +bin/test_device_select_unique_$(SUFFIX) : test_device_select_unique.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_select_unique_$(SUFFIX) test_device_select_unique.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + +#------------------------------------------------------------------------------- +# make test_device_select_if +#------------------------------------------------------------------------------- + +test_device_select_if: bin/test_device_select_if_$(SUFFIX) + +bin/test_device_select_if_$(SUFFIX) : test_device_select_if.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_select_if_$(SUFFIX) test_device_select_if.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make test_device_reduce_by_key +#------------------------------------------------------------------------------- + +test_device_reduce_by_key: bin/test_device_reduce_by_key_$(SUFFIX) + +bin/test_device_reduce_by_key_$(SUFFIX) : test_device_reduce_by_key.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_reduce_by_key_$(SUFFIX) test_device_reduce_by_key.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + +#------------------------------------------------------------------------------- +# make test_device_run_length_encode +#------------------------------------------------------------------------------- + +test_device_run_length_encode: bin/test_device_run_length_encode_$(SUFFIX) + +bin/test_device_run_length_encode_$(SUFFIX) : test_device_run_length_encode.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_run_length_encode_$(SUFFIX) test_device_run_length_encode.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + + + +#------------------------------------------------------------------------------- +# make test_device_seg_reduce +#------------------------------------------------------------------------------- +# +#test_device_seg_reduce: bin/test_device_seg_reduce_$(SUFFIX) +# +#bin/test_device_seg_reduce_$(SUFFIX) : test_device_seg_reduce.cu $(DEPS) +# mkdir -p bin +# $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/test_device_seg_reduce_$(SUFFIX) test_device_seg_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/half.h b/hash-graph-dehornetify/externals/cub-1.8.0/test/half.h new file mode 100644 index 0000000..e4fd65c --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/half.h @@ -0,0 +1,298 @@ +/****************************************************************************** + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are not permitted. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +#pragma once + +/** + * \file + * Utilities for interacting with the opaque CUDA __half type + */ + +#include +#include +#include + +#include + + +/****************************************************************************** + * half_t + ******************************************************************************/ + +/** + * Host-based fp16 data type compatible and convertible with __half + */ +struct half_t +{ + uint16_t __x; + + /// Constructor from __half + __host__ __device__ __forceinline__ + half_t(const __half &other) + { + __x = reinterpret_cast(other); + } + + /// Constructor from integer + __host__ __device__ __forceinline__ + half_t(int a) + { + *this = half_t(float(a)); + } + + /// Default constructor + __host__ __device__ __forceinline__ + half_t() : __x(0) + {} + + /// Constructor from float + __host__ __device__ __forceinline__ + half_t(float a) + { + // Stolen from Norbert Juffa + uint32_t ia = *reinterpret_cast(&a); + uint16_t ir; + + ir = (ia >> 16) & 0x8000; + + if ((ia & 0x7f800000) == 0x7f800000) + { + if ((ia & 0x7fffffff) == 0x7f800000) + { + ir |= 0x7c00; /* infinity */ + } + else + { + ir = 0x7fff; /* canonical NaN */ + } + } + else if ((ia & 0x7f800000) >= 0x33000000) + { + int32_t shift = (int32_t) ((ia >> 23) & 0xff) - 127; + if (shift > 15) + { + ir |= 0x7c00; /* infinity */ + } + else + { + ia = (ia & 0x007fffff) | 0x00800000; /* extract mantissa */ + if (shift < -14) + { /* denormal */ + ir |= ia >> (-1 - shift); + ia = ia << (32 - (-1 - shift)); + } + else + { /* normal */ + ir |= ia >> (24 - 11); + ia = ia << (32 - (24 - 11)); + ir = ir + ((14 + shift) << 10); + } + /* IEEE-754 round to nearest of even */ + if ((ia > 0x80000000) || ((ia == 0x80000000) && (ir & 1))) + { + ir++; + } + } + } + + this->__x = ir; + } + + /// Cast to __half + __host__ __device__ __forceinline__ + operator __half() const + { + return reinterpret_cast(__x); + } + + /// Cast to float + __host__ __device__ __forceinline__ + operator float() const + { + // Stolen from Andrew Kerr + + int sign = ((this->__x >> 15) & 1); + int exp = ((this->__x >> 10) & 0x1f); + int mantissa = (this->__x & 0x3ff); + uint32_t f = 0; + + if (exp > 0 && exp < 31) + { + // normal + exp += 112; + f = (sign << 31) | (exp << 23) | (mantissa << 13); + } + else if (exp == 0) + { + if (mantissa) + { + // subnormal + exp += 113; + while ((mantissa & (1 << 10)) == 0) + { + mantissa <<= 1; + exp--; + } + mantissa &= 0x3ff; + f = (sign << 31) | (exp << 23) | (mantissa << 13); + } + else if (sign) + { + f = 0x80000000; // negative zero + } + else + { + f = 0x0; // zero + } + } + else if (exp == 31) + { + if (mantissa) + { + f = 0x7fffffff; // not a number + } + else + { + f = (0xff << 23) | (sign << 31); // inf + } + } + return *reinterpret_cast(&f); + } + + + /// Get raw storage + __host__ __device__ __forceinline__ + uint16_t raw() + { + return this->__x; + } + + /// Equality + __host__ __device__ __forceinline__ + bool operator ==(const half_t &other) + { + return (this->__x == other.__x); + } + + /// Inequality + __host__ __device__ __forceinline__ + bool operator !=(const half_t &other) + { + return (this->__x != other.__x); + } + + /// Assignment by sum + __host__ __device__ __forceinline__ + half_t& operator +=(const half_t &rhs) + { + *this = half_t(float(*this) + float(rhs)); + return *this; + } + + /// Multiply + __host__ __device__ __forceinline__ + half_t operator*(const half_t &other) + { + return half_t(float(*this) * float(other)); + } + + /// Add + __host__ __device__ __forceinline__ + half_t operator+(const half_t &other) + { + return half_t(float(*this) + float(other)); + } + + /// Less-than + __host__ __device__ __forceinline__ + bool operator<(const half_t &other) const + { + return float(*this) < float(other); + } + + /// Less-than-equal + __host__ __device__ __forceinline__ + bool operator<=(const half_t &other) const + { + return float(*this) <= float(other); + } + + /// Greater-than + __host__ __device__ __forceinline__ + bool operator>(const half_t &other) const + { + return float(*this) > float(other); + } + + /// Greater-than-equal + __host__ __device__ __forceinline__ + bool operator>=(const half_t &other) const + { + return float(*this) >= float(other); + } + + /// numeric_traits::max + __host__ __device__ __forceinline__ + static half_t max() { + uint16_t max_word = 0x7BFF; + return reinterpret_cast(max_word); + } + + /// numeric_traits::lowest + __host__ __device__ __forceinline__ + static half_t lowest() { + uint16_t lowest_word = 0xFBFF; + return reinterpret_cast(lowest_word); + } +}; + + +/****************************************************************************** + * I/O stream overloads + ******************************************************************************/ + +/// Insert formatted \p half_t into the output stream +std::ostream& operator<<(std::ostream &out, const half_t &x) +{ + out << (float)x; + return out; +} + + +/// Insert formatted \p __half into the output stream +std::ostream& operator<<(std::ostream &out, const __half &x) +{ + return out << half_t(x); +} + + +/****************************************************************************** + * Traits overloads + ******************************************************************************/ + +template <> +struct cub::FpLimits +{ + static __host__ __device__ __forceinline__ half_t Max() { return half_t::max(); } + + static __host__ __device__ __forceinline__ half_t Lowest() { return half_t::lowest(); } +}; + +template <> struct cub::NumericTraits : cub::BaseTraits {}; + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/link_a.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_a.cu new file mode 100644 index 0000000..8a9b19f --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_a.cu @@ -0,0 +1,11 @@ +#include + +void a() +{ + printf("a() called\n"); + + cub::DoubleBuffer d_keys; + cub::DoubleBuffer d_values; + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/link_b.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_b.cu new file mode 100644 index 0000000..a19ec40 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_b.cu @@ -0,0 +1,11 @@ +#include + +void b() +{ + printf("b() called\n"); + + cub::DoubleBuffer d_keys; + cub::DoubleBuffer d_values; + size_t temp_storage_bytes = 0; + cub::DeviceRadixSort::SortPairs(NULL, temp_storage_bytes, d_keys, d_values, 1024); +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/link_main.cpp b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_main.cpp new file mode 100644 index 0000000..ef677ee --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/link_main.cpp @@ -0,0 +1,10 @@ +#include + +extern void a(); +extern void b(); + +int main() +{ + printf("hello world\n"); + return 0; +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/mersenne.h b/hash-graph-dehornetify/externals/cub-1.8.0/test/mersenne.h new file mode 100644 index 0000000..76aae80 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/mersenne.h @@ -0,0 +1,160 @@ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + */ + +#include + +namespace mersenne { + +/* Period parameters */ +const unsigned int N = 624; +const unsigned int M = 397; +const unsigned int MATRIX_A = 0x9908b0df; /* constant vector a */ +const unsigned int UPPER_MASK = 0x80000000; /* most significant w-r bits */ +const unsigned int LOWER_MASK = 0x7fffffff; /* least significant r bits */ + +static unsigned int mt[N]; /* the array for the state vector */ +static int mti = N + 1; /* mti==N+1 means mt[N] is not initialized */ + +/* initializes mt[N] with a seed */ +void init_genrand(unsigned int s) +{ + mt[0] = s & 0xffffffff; + for (mti = 1; mti < N; mti++) + { + mt[mti] = (1812433253 * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti); + + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for mtiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + + mt[mti] &= 0xffffffff; + /* for >32 bit machines */ + } +} + +/* initialize by an array with array-length */ +/* init_key is the array for initializing keys */ +/* key_length is its length */ +/* slight change for C++, 2004/2/26 */ +void init_by_array(unsigned int init_key[], int key_length) +{ + int i, j, k; + init_genrand(19650218); + i = 1; + j = 0; + k = (N > key_length ? N : key_length); + for (; k; k--) + { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525)) + + init_key[j] + j; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + j++; + if (i >= N) + { + mt[0] = mt[N - 1]; + i = 1; + } + if (j >= key_length) j = 0; + } + for (k = N - 1; k; k--) + { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941)) - i; /* non linear */ + mt[i] &= 0xffffffff; /* for WORDSIZE > 32 machines */ + i++; + if (i >= N) + { + mt[0] = mt[N - 1]; + i = 1; + } + } + + mt[0] = 0x80000000; /* MSB is 1; assuring non-zero initial array */ +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned int genrand_int32(void) +{ + unsigned int y; + static unsigned int mag01[2] = { 0x0, MATRIX_A }; + + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= N) + { /* generate N words at one time */ + int kk; + + if (mti == N + 1) /* if init_genrand() has not been called, */ + init_genrand(5489); /* a defat initial seed is used */ + + for (kk = 0; kk < N - M; kk++) + { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & 0x1]; + } + for (; kk < N - 1; kk++) + { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & 0x1]; + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680; + y ^= (y << 15) & 0xefc60000; + y ^= (y >> 18); + + return y; +} + + + +} // namespace mersenne diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_allocator.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_allocator.cu new file mode 100644 index 0000000..f771435 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_allocator.cu @@ -0,0 +1,459 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test evaluation for caching allocator of device memory + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=]" + "[--bytes=]" + "[--i=]" + "\n", argv[0]); + exit(0); + } + +#if (CUB_PTX_ARCH == 0) + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get number of GPUs and current GPU + int num_gpus; + int initial_gpu; + int timing_iterations = 10000; + int timing_bytes = 1024 * 1024; + + if (CubDebug(cudaGetDeviceCount(&num_gpus))) exit(1); + if (CubDebug(cudaGetDevice(&initial_gpu))) exit(1); + args.GetCmdLineArgument("i", timing_iterations); + args.GetCmdLineArgument("bytes", timing_bytes); + + // Create default allocator (caches up to 6MB in device allocations per GPU) + CachingDeviceAllocator allocator; + allocator.debug = true; + + printf("Running single-gpu tests...\n"); fflush(stdout); + + // + // Test0 + // + + // Create a new stream + cudaStream_t other_stream; + CubDebugExit(cudaStreamCreate(&other_stream)); + + // Allocate 999 bytes on the current gpu in stream0 + char *d_999B_stream0_a; + char *d_999B_stream0_b; + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_a, 999, 0)); + + // Run some big kernel in stream 0 + EmptyKernel<<<32000, 512, 1024 * 8, 0>>>(); + + // Free d_999B_stream0_a + CubDebugExit(allocator.DeviceFree(d_999B_stream0_a)); + + // Allocate another 999 bytes in stream 0 + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_b, 999, 0)); + + // Check that that we have 1 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 1); + + // Check that that we have no cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 0); + + // Run some big kernel in stream 0 + EmptyKernel<<<32000, 512, 1024 * 8, 0>>>(); + + // Free d_999B_stream0_b + CubDebugExit(allocator.DeviceFree(d_999B_stream0_b)); + + // Allocate 999 bytes on the current gpu in other_stream + char *d_999B_stream_other_a; + char *d_999B_stream_other_b; + allocator.DeviceAllocate((void **) &d_999B_stream_other_a, 999, other_stream); + + // Check that that we have 1 live blocks on the initial GPU (that we allocated a new one because d_999B_stream0_b is only available for stream 0 until it becomes idle) + AssertEquals(allocator.live_blocks.size(), 1); + + // Check that that we have one cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 1); + + // Run some big kernel in other_stream + EmptyKernel<<<32000, 512, 1024 * 8, other_stream>>>(); + + // Free d_999B_stream_other + CubDebugExit(allocator.DeviceFree(d_999B_stream_other_a)); + + // Check that we can now use both allocations in stream 0 after synchronizing the device + CubDebugExit(cudaDeviceSynchronize()); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_a, 999, 0)); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_b, 999, 0)); + + // Check that that we have 2 live blocks on the initial GPU + AssertEquals(allocator.live_blocks.size(), 2); + + // Check that that we have no cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 0); + + // Free d_999B_stream0_a and d_999B_stream0_b + CubDebugExit(allocator.DeviceFree(d_999B_stream0_a)); + CubDebugExit(allocator.DeviceFree(d_999B_stream0_b)); + + // Check that we can now use both allocations in other_stream + CubDebugExit(cudaDeviceSynchronize()); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream_other_a, 999, other_stream)); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream_other_b, 999, other_stream)); + + // Check that that we have 2 live blocks on the initial GPU + AssertEquals(allocator.live_blocks.size(), 2); + + // Check that that we have no cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 0); + + // Run some big kernel in other_stream + EmptyKernel<<<32000, 512, 1024 * 8, other_stream>>>(); + + // Free d_999B_stream_other_a and d_999B_stream_other_b + CubDebugExit(allocator.DeviceFree(d_999B_stream_other_a)); + CubDebugExit(allocator.DeviceFree(d_999B_stream_other_b)); + + // Check that we can now use both allocations in stream 0 after synchronizing the device and destroying the other stream + CubDebugExit(cudaDeviceSynchronize()); + CubDebugExit(cudaStreamDestroy(other_stream)); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_a, 999, 0)); + CubDebugExit(allocator.DeviceAllocate((void **) &d_999B_stream0_b, 999, 0)); + + // Check that that we have 2 live blocks on the initial GPU + AssertEquals(allocator.live_blocks.size(), 2); + + // Check that that we have no cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 0); + + // Free d_999B_stream0_a and d_999B_stream0_b + CubDebugExit(allocator.DeviceFree(d_999B_stream0_a)); + CubDebugExit(allocator.DeviceFree(d_999B_stream0_b)); + + // Free all cached + CubDebugExit(allocator.FreeAllCached()); + + // + // Test1 + // + + // Allocate 5 bytes on the current gpu + char *d_5B; + CubDebugExit(allocator.DeviceAllocate((void **) &d_5B, 5)); + + // Check that that we have zero free bytes cached on the initial GPU + AssertEquals(allocator.cached_bytes[initial_gpu].free, 0); + + // Check that that we have 1 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 1); + + // + // Test2 + // + + // Allocate 4096 bytes on the current gpu + char *d_4096B; + CubDebugExit(allocator.DeviceAllocate((void **) &d_4096B, 4096)); + + // Check that that we have 2 live blocks on the initial GPU + AssertEquals(allocator.live_blocks.size(), 2); + + // + // Test3 + // + + // DeviceFree d_5B + CubDebugExit(allocator.DeviceFree(d_5B)); + + // Check that that we have min_bin_bytes free bytes cached on the initial gpu + AssertEquals(allocator.cached_bytes[initial_gpu].free, allocator.min_bin_bytes); + + // Check that that we have 1 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 1); + + // Check that that we have 1 cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 1); + + // + // Test4 + // + + // DeviceFree d_4096B + CubDebugExit(allocator.DeviceFree(d_4096B)); + + // Check that that we have the 4096 + min_bin free bytes cached on the initial gpu + AssertEquals(allocator.cached_bytes[initial_gpu].free, allocator.min_bin_bytes + 4096); + + // Check that that we have 0 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 0); + + // Check that that we have 2 cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 2); + + // + // Test5 + // + + // Allocate 768 bytes on the current gpu + char *d_768B; + CubDebugExit(allocator.DeviceAllocate((void **) &d_768B, 768)); + + // Check that that we have the min_bin free bytes cached on the initial gpu (4096 was reused) + AssertEquals(allocator.cached_bytes[initial_gpu].free, allocator.min_bin_bytes); + + // Check that that we have 1 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 1); + + // Check that that we have 1 cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 1); + + // + // Test6 + // + + // Allocate max_cached_bytes on the current gpu + char *d_max_cached; + CubDebugExit(allocator.DeviceAllocate((void **) &d_max_cached, allocator.max_cached_bytes)); + + // DeviceFree d_max_cached + CubDebugExit(allocator.DeviceFree(d_max_cached)); + + // Check that that we have the min_bin free bytes cached on the initial gpu (max cached was not returned because we went over) + AssertEquals(allocator.cached_bytes[initial_gpu].free, allocator.min_bin_bytes); + + // Check that that we have 1 live block on the initial GPU + AssertEquals(allocator.live_blocks.size(), 1); + + // Check that that we still have 1 cached block on the initial GPU + AssertEquals(allocator.cached_blocks.size(), 1); + + // + // Test7 + // + + // Free all cached blocks on all GPUs + CubDebugExit(allocator.FreeAllCached()); + + // Check that that we have 0 bytes cached on the initial GPU + AssertEquals(allocator.cached_bytes[initial_gpu].free, 0); + + // Check that that we have 0 cached blocks across all GPUs + AssertEquals(allocator.cached_blocks.size(), 0); + + // Check that that still we have 1 live block across all GPUs + AssertEquals(allocator.live_blocks.size(), 1); + + // + // Test8 + // + + // Allocate max cached bytes + 1 on the current gpu + char *d_max_cached_plus; + CubDebugExit(allocator.DeviceAllocate((void **) &d_max_cached_plus, allocator.max_cached_bytes + 1)); + + // DeviceFree max cached bytes + CubDebugExit(allocator.DeviceFree(d_max_cached_plus)); + + // DeviceFree d_768B + CubDebugExit(allocator.DeviceFree(d_768B)); + + unsigned int power; + size_t rounded_bytes; + allocator.NearestPowerOf(power, rounded_bytes, allocator.bin_growth, 768); + + // Check that that we have 4096 free bytes cached on the initial gpu + AssertEquals(allocator.cached_bytes[initial_gpu].free, rounded_bytes); + + // Check that that we have 1 cached blocks across all GPUs + AssertEquals(allocator.cached_blocks.size(), 1); + + // Check that that still we have 0 live block across all GPUs + AssertEquals(allocator.live_blocks.size(), 0); + +#ifndef CUB_CDP + // BUG: find out why these tests fail when one GPU is CDP compliant and the other is not + + if (num_gpus > 1) + { + printf("\nRunning multi-gpu tests...\n"); fflush(stdout); + + // + // Test9 + // + + // Allocate 768 bytes on the next gpu + int next_gpu = (initial_gpu + 1) % num_gpus; + char *d_768B_2; + CubDebugExit(allocator.DeviceAllocate(next_gpu, (void **) &d_768B_2, 768)); + + // DeviceFree d_768B on the next gpu + CubDebugExit(allocator.DeviceFree(next_gpu, d_768B_2)); + + // Re-allocate 768 bytes on the next gpu + CubDebugExit(allocator.DeviceAllocate(next_gpu, (void **) &d_768B_2, 768)); + + // Re-free d_768B on the next gpu + CubDebugExit(allocator.DeviceFree(next_gpu, d_768B_2)); + + // Check that that we have 4096 free bytes cached on the initial gpu + AssertEquals(allocator.cached_bytes[initial_gpu].free, rounded_bytes); + + // Check that that we have 4096 free bytes cached on the second gpu + AssertEquals(allocator.cached_bytes[next_gpu].free, rounded_bytes); + + // Check that that we have 2 cached blocks across all GPUs + AssertEquals(allocator.cached_blocks.size(), 2); + + // Check that that still we have 0 live block across all GPUs + AssertEquals(allocator.live_blocks.size(), 0); + } +#endif // CUB_CDP + + // + // Performance + // + + printf("\nCPU Performance (%d timing iterations, %d bytes):\n", timing_iterations, timing_bytes); + fflush(stdout); fflush(stderr); + + // CPU performance comparisons vs cached. Allocate and free a 1MB block 2000 times + CpuTimer cpu_timer; + char *d_1024MB = NULL; + allocator.debug = false; + + // Prime the caching allocator and the kernel + CubDebugExit(allocator.DeviceAllocate((void **) &d_1024MB, timing_bytes)); + CubDebugExit(allocator.DeviceFree(d_1024MB)); + cub::EmptyKernel<<<1, 32>>>(); + + // CUDA + cpu_timer.Start(); + for (int i = 0; i < timing_iterations; ++i) + { + CubDebugExit(cudaMalloc((void **) &d_1024MB, timing_bytes)); + CubDebugExit(cudaFree(d_1024MB)); + } + cpu_timer.Stop(); + float cuda_malloc_elapsed_millis = cpu_timer.ElapsedMillis(); + + // CUB + cpu_timer.Start(); + for (int i = 0; i < timing_iterations; ++i) + { + CubDebugExit(allocator.DeviceAllocate((void **) &d_1024MB, timing_bytes)); + CubDebugExit(allocator.DeviceFree(d_1024MB)); + } + cpu_timer.Stop(); + float cub_calloc_elapsed_millis = cpu_timer.ElapsedMillis(); + + printf("\t CUB CachingDeviceAllocator allocation CPU speedup: %.2f (avg cudaMalloc %.4f ms vs. avg DeviceAllocate %.4f ms)\n", + cuda_malloc_elapsed_millis / cub_calloc_elapsed_millis, + cuda_malloc_elapsed_millis / timing_iterations, + cub_calloc_elapsed_millis / timing_iterations); + + // GPU performance comparisons. Allocate and free a 1MB block 2000 times + GpuTimer gpu_timer; + + printf("\nGPU Performance (%d timing iterations, %d bytes):\n", timing_iterations, timing_bytes); + fflush(stdout); fflush(stderr); + + // Kernel-only + gpu_timer.Start(); + for (int i = 0; i < timing_iterations; ++i) + { + cub::EmptyKernel<<<1, 32>>>(); + } + gpu_timer.Stop(); + float cuda_empty_elapsed_millis = gpu_timer.ElapsedMillis(); + + // CUDA + gpu_timer.Start(); + for (int i = 0; i < timing_iterations; ++i) + { + CubDebugExit(cudaMalloc((void **) &d_1024MB, timing_bytes)); + cub::EmptyKernel<<<1, 32>>>(); + CubDebugExit(cudaFree(d_1024MB)); + } + gpu_timer.Stop(); + cuda_malloc_elapsed_millis = gpu_timer.ElapsedMillis() - cuda_empty_elapsed_millis; + + // CUB + gpu_timer.Start(); + for (int i = 0; i < timing_iterations; ++i) + { + CubDebugExit(allocator.DeviceAllocate((void **) &d_1024MB, timing_bytes)); + cub::EmptyKernel<<<1, 32>>>(); + CubDebugExit(allocator.DeviceFree(d_1024MB)); + } + gpu_timer.Stop(); + cub_calloc_elapsed_millis = gpu_timer.ElapsedMillis() - cuda_empty_elapsed_millis; + + printf("\t CUB CachingDeviceAllocator allocation GPU speedup: %.2f (avg cudaMalloc %.4f ms vs. avg DeviceAllocate %.4f ms)\n", + cuda_malloc_elapsed_millis / cub_calloc_elapsed_millis, + cuda_malloc_elapsed_millis / timing_iterations, + cub_calloc_elapsed_millis / timing_iterations); + + +#endif + + printf("Success\n"); + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_histogram.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_histogram.cu new file mode 100644 index 0000000..b76466f --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_histogram.cu @@ -0,0 +1,310 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of BlockHistogram utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * BlockHistogram test kernel. + */ +template < + int BINS, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockHistogramAlgorithm ALGORITHM, + typename T, + typename HistoCounter> +__global__ void BlockHistogramKernel( + T *d_samples, + HistoCounter *d_histogram) +{ + // Parameterize BlockHistogram type for our thread block + typedef BlockHistogram BlockHistogram; + + // Allocate temp storage in shared memory + __shared__ typename BlockHistogram::TempStorage temp_storage; + + // Per-thread tile data + T data[ITEMS_PER_THREAD]; + LoadDirectStriped(threadIdx.x, d_samples, data); + + // Test histo (writing directly to histogram buffer in global) + BlockHistogram(temp_storage).Histogram(data, d_histogram); +} + + +/** + * Initialize problem (and solution) + */ +template < + int BINS, + typename SampleT> +void Initialize( + GenMode gen_mode, + SampleT *h_samples, + int *h_histograms_linear, + int num_samples) +{ + // Init bins + for (int bin = 0; bin < BINS; ++bin) + { + h_histograms_linear[bin] = 0; + } + + if (g_verbose) printf("Samples: \n"); + + // Initialize interleaved channel samples and histogram them correspondingly + for (int i = 0; i < num_samples; ++i) + { + InitValue(gen_mode, h_samples[i], i); + h_samples[i] %= BINS; + + if (g_verbose) std::cout << CoutCast(h_samples[i]) << ", "; + + h_histograms_linear[h_samples[i]]++; + } + + if (g_verbose) printf("\n\n"); +} + + +/** + * Test BlockHistogram + */ +template < + typename SampleT, + int BINS, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockHistogramAlgorithm ALGORITHM> +void Test( + GenMode gen_mode) +{ + int num_samples = BLOCK_THREADS * ITEMS_PER_THREAD; + + printf("cub::BlockHistogram %s %d %s samples (%dB), %d bins, %d threads, gen-mode %s\n", + (ALGORITHM == BLOCK_HISTO_SORT) ? "BLOCK_HISTO_SORT" : "BLOCK_HISTO_ATOMIC", + num_samples, + typeid(SampleT).name(), + (int) sizeof(SampleT), + BINS, + BLOCK_THREADS, + (gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS"); + fflush(stdout); + + // Allocate host arrays + SampleT *h_samples = new SampleT[num_samples]; + int *h_reference = new int[BINS]; + + // Initialize problem + Initialize(gen_mode, h_samples, h_reference, num_samples); + + // Allocate problem device arrays + SampleT *d_samples = NULL; + int *d_histogram = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * num_samples)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram, sizeof(int) * BINS)); + + // Initialize/clear device arrays + CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * num_samples, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_histogram, 0, sizeof(int) * BINS)); + + // Run kernel + BlockHistogramKernel<<<1, BLOCK_THREADS>>>( + d_samples, + d_histogram); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults((int*) h_reference, d_histogram, BINS, g_verbose, g_verbose); + printf("\t%s\n\n", compare ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + fflush(stdout); + fflush(stderr); + + // Cleanup + if (h_samples) delete[] h_samples; + if (h_reference) delete[] h_reference; + if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); + if (d_histogram) CubDebugExit(g_allocator.DeviceFree(d_histogram)); + + // Correctness asserts + AssertEquals(0, compare); +} + + +/** + * Test different sample distributions + */ +template < + typename SampleT, + int BINS, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockHistogramAlgorithm ALGORITHM> +void Test() +{ + Test(UNIFORM); + Test(INTEGER_SEED); + Test(RANDOM); +} + + +/** + * Test different ALGORITHM + */ +template < + typename SampleT, + int BINS, + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void Test() +{ + Test(); + Test(); +} + + +/** + * Test different ITEMS_PER_THREAD + */ +template < + typename SampleT, + int BINS, + int BLOCK_THREADS> +void Test() +{ + Test(); + Test(); +} + + +/** + * Test different BLOCK_THREADS + */ +template < + typename SampleT, + int BINS> +void Test() +{ + Test(); + Test(); + Test(); +} + + + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--device=] " + "[--repeat=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#ifdef QUICK_TEST + + // Compile/run quick tests + Test(RANDOM); + Test(RANDOM); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + Test(); + Test(); + Test(); + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_load_store.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_load_store.cu new file mode 100644 index 0000000..f1a0bf3 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_load_store.cu @@ -0,0 +1,549 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of BlockLoad and BlockStore utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +CachingDeviceAllocator g_allocator(true); + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + + +/** + * Test load/store kernel. + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM, + typename InputIteratorT, + typename OutputIteratorT> +__launch_bounds__ (BLOCK_THREADS, 1) +__global__ void Kernel( + InputIteratorT d_in, + OutputIteratorT d_out_unguarded, + OutputIteratorT d_out_guarded, + int num_items) +{ + enum + { + TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD + }; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Threadblock load/store abstraction types + typedef BlockLoad BlockLoad; + typedef BlockStore BlockStore; + + // Shared memory type for this thread block + union TempStorage + { + typename BlockLoad::TempStorage load; + typename BlockStore::TempStorage store; + }; + + // Allocate temp storage in shared memory + __shared__ TempStorage temp_storage; + + // Threadblock work bounds + int block_offset = blockIdx.x * TILE_SIZE; + int guarded_elements = num_items - block_offset; + + // Tile of items + OutputT data[ITEMS_PER_THREAD]; + + // Load data + BlockLoad(temp_storage.load).Load(d_in + block_offset, data); + + __syncthreads(); + + // Store data + BlockStore(temp_storage.store).Store(d_out_unguarded + block_offset, data); + + __syncthreads(); + + // reset data + #pragma unroll + for (int ITEM = 0; ITEM < ITEMS_PER_THREAD; ++ITEM) + data[ITEM] = OutputT(); + + __syncthreads(); + + // Load data + BlockLoad(temp_storage.load).Load(d_in + block_offset, data, guarded_elements); + + __syncthreads(); + + // Store data + BlockStore(temp_storage.store).Store(d_out_guarded + block_offset, data, guarded_elements); +} + + +//--------------------------------------------------------------------- +// Host testing subroutines +//--------------------------------------------------------------------- + + +/** + * Test load/store variants + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM, + typename InputIteratorT, + typename OutputIteratorT> +void TestKernel( + T *h_in, + InputIteratorT d_in, + OutputIteratorT d_out_unguarded_itr, + OutputIteratorT d_out_guarded_itr, + T *d_out_unguarded_ptr, + T *d_out_guarded_ptr, + int grid_size, + int guarded_elements) +{ + int compare; + + int unguarded_elements = grid_size * BLOCK_THREADS * ITEMS_PER_THREAD; + + // Test with discard output iterator + typedef typename std::iterator_traits::difference_type OffsetT; + DiscardOutputIterator discard_itr; + + Kernel + <<>>( + d_in, + discard_itr, + discard_itr, + guarded_elements); + + // Test with regular output iterator + Kernel + <<>>( + d_in, + d_out_unguarded_itr, + d_out_guarded_itr, + guarded_elements); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Check results + compare = CompareDeviceResults(h_in, d_out_guarded_ptr, guarded_elements, g_verbose, g_verbose); + printf("\tGuarded: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check results + compare = CompareDeviceResults(h_in, d_out_unguarded_ptr, unguarded_elements, g_verbose, g_verbose); + printf("\tUnguarded: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); +} + + +/** + * Test native pointer. Specialized for sufficient resources + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM> +void TestNative( + int grid_size, + float fraction_valid, + Int2Type sufficient_resources) +{ + int unguarded_elements = grid_size * BLOCK_THREADS * ITEMS_PER_THREAD; + int guarded_elements = int(fraction_valid * float(unguarded_elements)); + + // Allocate host arrays + T *h_in = (T*) malloc(unguarded_elements * sizeof(T)); + + // Allocate device arrays + T *d_in = NULL; + T *d_out_unguarded = NULL; + T *d_out_guarded = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * unguarded_elements)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out_unguarded, sizeof(T) * unguarded_elements)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out_guarded, sizeof(T) * guarded_elements)); + CubDebugExit(cudaMemset(d_out_unguarded, 0, sizeof(T) * unguarded_elements)); + CubDebugExit(cudaMemset(d_out_guarded, 0, sizeof(T) * guarded_elements)); + + // Initialize problem on host and device + for (int i = 0; i < unguarded_elements; ++i) + { + InitValue(INTEGER_SEED, h_in[i], i); + } + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * unguarded_elements, cudaMemcpyHostToDevice)); + + printf("TestNative " + "grid_size(%d) " + "guarded_elements(%d) " + "unguarded_elements(%d) " + "BLOCK_THREADS(%d) " + "ITEMS_PER_THREAD(%d) " + "LOAD_ALGORITHM(%d) " + "STORE_ALGORITHM(%d) " + "sizeof(T)(%d)\n", + grid_size, guarded_elements, unguarded_elements, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM, STORE_ALGORITHM, (int) sizeof(T)); + + TestKernel( + h_in, + (T const *) d_in, // Test const + d_out_unguarded, + d_out_guarded, + d_out_unguarded, + d_out_guarded, + grid_size, + guarded_elements); + + // Cleanup + if (h_in) free(h_in); + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out_unguarded) CubDebugExit(g_allocator.DeviceFree(d_out_unguarded)); + if (d_out_guarded) CubDebugExit(g_allocator.DeviceFree(d_out_guarded)); +} + + +/** + * Test native pointer. Specialized for insufficient resources + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM> +void TestNative( + int grid_size, + float fraction_valid, + Int2Type sufficient_resources) +{} + + +/** + * Test iterator. Specialized for sufficient resources. + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM, + CacheLoadModifier LOAD_MODIFIER, + CacheStoreModifier STORE_MODIFIER> +void TestIterator( + int grid_size, + float fraction_valid, + Int2Type sufficient_resources) +{ + int unguarded_elements = grid_size * BLOCK_THREADS * ITEMS_PER_THREAD; + int guarded_elements = int(fraction_valid * float(unguarded_elements)); + + // Allocate host arrays + T *h_in = (T*) malloc(unguarded_elements * sizeof(T)); + + // Allocate device arrays + T *d_in = NULL; + T *d_out_unguarded = NULL; + T *d_out_guarded = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * unguarded_elements)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out_unguarded, sizeof(T) * unguarded_elements)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out_guarded, sizeof(T) * guarded_elements)); + CubDebugExit(cudaMemset(d_out_unguarded, 0, sizeof(T) * unguarded_elements)); + CubDebugExit(cudaMemset(d_out_guarded, 0, sizeof(T) * guarded_elements)); + + // Initialize problem on host and device + for (int i = 0; i < unguarded_elements; ++i) + { + InitValue(INTEGER_SEED, h_in[i], i); + } + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * unguarded_elements, cudaMemcpyHostToDevice)); + + printf("TestIterator " + "grid_size(%d) " + "guarded_elements(%d) " + "unguarded_elements(%d) " + "BLOCK_THREADS(%d) " + "ITEMS_PER_THREAD(%d) " + "LOAD_ALGORITHM(%d) " + "STORE_ALGORITHM(%d) " + "LOAD_MODIFIER(%d) " + "STORE_MODIFIER(%d) " + "sizeof(T)(%d)\n", + grid_size, guarded_elements, unguarded_elements, BLOCK_THREADS, ITEMS_PER_THREAD, LOAD_ALGORITHM, STORE_ALGORITHM, LOAD_MODIFIER, STORE_MODIFIER, (int) sizeof(T)); + + TestKernel( + h_in, + CacheModifiedInputIterator(d_in), + CacheModifiedOutputIterator(d_out_unguarded), + CacheModifiedOutputIterator(d_out_guarded), + d_out_unguarded, + d_out_guarded, + grid_size, + guarded_elements); + + // Cleanup + if (h_in) free(h_in); + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out_unguarded) CubDebugExit(g_allocator.DeviceFree(d_out_unguarded)); + if (d_out_guarded) CubDebugExit(g_allocator.DeviceFree(d_out_guarded)); +} + +/** + * Test iterator. Specialized for insufficient resources. + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM, + CacheLoadModifier LOAD_MODIFIER, + CacheStoreModifier STORE_MODIFIER> +void TestIterator( + int grid_size, + float fraction_valid, + Int2Type sufficient_resources) +{} + + +/** + * Evaluate different pointer access types + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM> +void TestPointerType( + int grid_size, + float fraction_valid) +{ + // Threadblock load/store abstraction types + typedef BlockLoad BlockLoad; + typedef BlockStore BlockStore; + +#if defined(SM100) || defined(SM110) || defined(SM130) + static const bool sufficient_load_smem = sizeof(typename BlockLoad::TempStorage) <= 1024 * 16; + static const bool sufficient_store_smem = sizeof(typename BlockStore::TempStorage) <= 1024 * 16; + static const bool sufficient_threads = BLOCK_THREADS <= 512; +#else + static const bool sufficient_load_smem = sizeof(typename BlockLoad::TempStorage) <= 1024 * 48; + static const bool sufficient_store_smem = sizeof(typename BlockStore::TempStorage) <= 1024 * 48; + static const bool sufficient_threads = BLOCK_THREADS <= 1024; +#endif + + static const bool sufficient_resources = sufficient_load_smem && sufficient_store_smem && sufficient_threads; + + TestNative(grid_size, fraction_valid, Int2Type()); + TestIterator(grid_size, fraction_valid, Int2Type()); +} + + +/** + * Evaluate different time-slicing strategies + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + BlockLoadAlgorithm LOAD_ALGORITHM, + BlockStoreAlgorithm STORE_ALGORITHM> +void TestSlicedStrategy( + int grid_size, + float fraction_valid) +{ + TestPointerType(grid_size, fraction_valid); + TestPointerType(grid_size, fraction_valid); +} + + + +/** + * Evaluate different load/store strategies (specialized for block sizes that are not a multiple of 32) + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void TestStrategy( + int grid_size, + float fraction_valid, + Int2Type is_warp_multiple) +{ + TestPointerType(grid_size, fraction_valid); + TestPointerType(grid_size, fraction_valid); + TestPointerType(grid_size, fraction_valid); +} + + +/** + * Evaluate different load/store strategies (specialized for block sizes that are a multiple of 32) + */ +template < + typename T, + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void TestStrategy( + int grid_size, + float fraction_valid, + Int2Type is_warp_multiple) +{ + TestStrategy(grid_size, fraction_valid, Int2Type()); + TestPointerType(grid_size, fraction_valid); + TestPointerType(grid_size, fraction_valid); +} + + +/** + * Evaluate different register blocking + */ +template < + typename T, + int BLOCK_THREADS> +void TestItemsPerThread( + int grid_size, + float fraction_valid) +{ + Int2Type is_warp_multiple; + + TestStrategy(grid_size, fraction_valid, is_warp_multiple); + TestStrategy(grid_size, fraction_valid, is_warp_multiple); + TestStrategy(grid_size, fraction_valid, is_warp_multiple); + TestStrategy(grid_size, fraction_valid, is_warp_multiple); +} + + +/** + * Evaluate different thread block sizes + */ +template +void TestThreads( + int grid_size, + float fraction_valid) +{ + TestItemsPerThread(grid_size, fraction_valid); + TestItemsPerThread(grid_size, fraction_valid); + TestItemsPerThread(grid_size, fraction_valid); + TestItemsPerThread(grid_size, fraction_valid); + TestItemsPerThread(grid_size, fraction_valid); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef QUICK_TEST + + // Compile/run quick tests + TestNative< int, 64, 2, BLOCK_LOAD_WARP_TRANSPOSE, BLOCK_STORE_WARP_TRANSPOSE>(1, 0.8f, Int2Type()); + TestIterator< int, 64, 2, BLOCK_LOAD_WARP_TRANSPOSE, BLOCK_STORE_WARP_TRANSPOSE, LOAD_DEFAULT, STORE_DEFAULT>(1, 0.8f, Int2Type()); + +#else + + // Compile/run thorough tests + TestThreads(2, 0.8f); + TestThreads(2, 0.8f); + TestThreads(2, 0.8f); + TestThreads(2, 0.8f); + + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + TestThreads(2, 0.8f); + TestThreads(2, 0.8f); + TestThreads(2, 0.8f); + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_radix_sort.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_radix_sort.cu new file mode 100644 index 0000000..959018b --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_radix_sort.cu @@ -0,0 +1,717 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of BlockRadixSort utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +CachingDeviceAllocator g_allocator(true); + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + + +/// Specialized descending, blocked -> blocked +template +__device__ __forceinline__ void TestBlockSort( + typename BlockRadixSort::TempStorage &temp_storage, + Key (&keys)[ITEMS_PER_THREAD], + Value (&values)[ITEMS_PER_THREAD], + Key *d_keys, + Value *d_values, + int begin_bit, + int end_bit, + clock_t &stop, + Int2Type is_descending, + Int2Type is_blocked_output) +{ + BlockRadixSort(temp_storage).SortDescending(keys, values, begin_bit, end_bit); + stop = clock(); + StoreDirectBlocked(threadIdx.x, d_keys, keys); + StoreDirectBlocked(threadIdx.x, d_values, values); +} + +/// Specialized descending, blocked -> striped +template +__device__ __forceinline__ void TestBlockSort( + typename BlockRadixSort::TempStorage &temp_storage, + Key (&keys)[ITEMS_PER_THREAD], + Value (&values)[ITEMS_PER_THREAD], + Key *d_keys, + Value *d_values, + int begin_bit, + int end_bit, + clock_t &stop, + Int2Type is_descending, + Int2Type is_blocked_output) +{ + BlockRadixSort(temp_storage).SortDescendingBlockedToStriped(keys, values, begin_bit, end_bit); + stop = clock(); + StoreDirectStriped(threadIdx.x, d_keys, keys); + StoreDirectStriped(threadIdx.x, d_values, values); +} + +/// Specialized ascending, blocked -> blocked +template +__device__ __forceinline__ void TestBlockSort( + typename BlockRadixSort::TempStorage &temp_storage, + Key (&keys)[ITEMS_PER_THREAD], + Value (&values)[ITEMS_PER_THREAD], + Key *d_keys, + Value *d_values, + int begin_bit, + int end_bit, + clock_t &stop, + Int2Type is_descending, + Int2Type is_blocked_output) +{ + BlockRadixSort(temp_storage).Sort(keys, values, begin_bit, end_bit); + stop = clock(); + StoreDirectBlocked(threadIdx.x, d_keys, keys); + StoreDirectBlocked(threadIdx.x, d_values, values); +} + +/// Specialized ascending, blocked -> striped +template +__device__ __forceinline__ void TestBlockSort( + typename BlockRadixSort::TempStorage &temp_storage, + Key (&keys)[ITEMS_PER_THREAD], + Value (&values)[ITEMS_PER_THREAD], + Key *d_keys, + Value *d_values, + int begin_bit, + int end_bit, + clock_t &stop, + Int2Type is_descending, + Int2Type is_blocked_output) +{ + BlockRadixSort(temp_storage).SortBlockedToStriped(keys, values, begin_bit, end_bit); + stop = clock(); + StoreDirectStriped(threadIdx.x, d_keys, keys); + StoreDirectStriped(threadIdx.x, d_values, values); +} + + + +/** + * BlockRadixSort kernel + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + cudaSharedMemConfig SMEM_CONFIG, + int DESCENDING, + int BLOCKED_OUTPUT, + typename Key, + typename Value> +__launch_bounds__ (BLOCK_THREADS, 1) +__global__ void Kernel( + Key *d_keys, + Value *d_values, + int begin_bit, + int end_bit, + clock_t *d_elapsed) +{ + // Threadblock load/store abstraction types + typedef BlockRadixSort< + Key, + BLOCK_THREADS, + ITEMS_PER_THREAD, + Value, + RADIX_BITS, + MEMOIZE_OUTER_SCAN, + INNER_SCAN_ALGORITHM, + SMEM_CONFIG> + BlockRadixSortT; + + // Allocate temp storage in shared memory + __shared__ typename BlockRadixSortT::TempStorage temp_storage; + + // Items per thread + Key keys[ITEMS_PER_THREAD]; + Value values[ITEMS_PER_THREAD]; + + LoadDirectBlocked(threadIdx.x, d_keys, keys); + LoadDirectBlocked(threadIdx.x, d_values, values); + + // Start cycle timer + clock_t stop; + clock_t start = clock(); + + TestBlockSort( + temp_storage, keys, values, d_keys, d_values, begin_bit, end_bit, stop, Int2Type(), Int2Type()); + + // Store time + if (threadIdx.x == 0) + *d_elapsed = (start > stop) ? start - stop : stop - start; +} + + + +//--------------------------------------------------------------------- +// Host testing subroutines +//--------------------------------------------------------------------- + + +/** + * Simple key-value pairing + */ +template < + typename Key, + typename Value, + bool IS_FLOAT = (Traits::CATEGORY == FLOATING_POINT)> +struct Pair +{ + Key key; + Value value; + + bool operator<(const Pair &b) const + { + return (key < b.key); + } +}; + +/** + * Simple key-value pairing (specialized for floating point types) + */ +template +struct Pair +{ + Key key; + Value value; + + bool operator<(const Pair &b) const + { + if (key < b.key) + return true; + + if (key > b.key) + return false; + + // Key in unsigned bits + typedef typename Traits::UnsignedBits UnsignedBits; + + // Return true if key is negative zero and b.key is positive zero + UnsignedBits key_bits = *reinterpret_cast(const_cast(&key)); + UnsignedBits b_key_bits = *reinterpret_cast(const_cast(&b.key)); + UnsignedBits HIGH_BIT = Traits::HIGH_BIT; + + return ((key_bits & HIGH_BIT) != 0) && ((b_key_bits & HIGH_BIT) == 0); + } +}; + + +/** + * Initialize key-value sorting problem. + */ +template +void Initialize( + GenMode gen_mode, + Key *h_keys, + Value *h_values, + Key *h_reference_keys, + Value *h_reference_values, + int num_items, + int entropy_reduction, + int begin_bit, + int end_bit) +{ + Pair *h_pairs = new Pair[num_items]; + + for (int i = 0; i < num_items; ++i) + { + InitValue(gen_mode, h_keys[i], i); + + RandomBits(h_values[i]); + + // Mask off unwanted portions + int num_bits = end_bit - begin_bit; + if ((begin_bit > 0) || (end_bit < sizeof(Key) * 8)) + { + unsigned long long base = 0; + memcpy(&base, &h_keys[i], sizeof(Key)); + base &= ((1ull << num_bits) - 1) << begin_bit; + memcpy(&h_keys[i], &base, sizeof(Key)); + } + + h_pairs[i].key = h_keys[i]; + h_pairs[i].value = h_values[i]; + } + + if (DESCENDING) std::reverse(h_pairs, h_pairs + num_items); + std::stable_sort(h_pairs, h_pairs + num_items); + if (DESCENDING) std::reverse(h_pairs, h_pairs + num_items); + + for (int i = 0; i < num_items; ++i) + { + h_reference_keys[i] = h_pairs[i].key; + h_reference_values[i] = h_pairs[i].value; + } + + delete[] h_pairs; +} + + + + +/** + * Test BlockRadixSort kernel + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + cudaSharedMemConfig SMEM_CONFIG, + bool DESCENDING, + bool BLOCKED_OUTPUT, + typename Key, + typename Value> +void TestDriver( + GenMode gen_mode, + int entropy_reduction, + int begin_bit, + int end_bit) +{ + enum + { + TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD, + KEYS_ONLY = Equals::VALUE, + }; + + // Allocate host arrays + Key *h_keys = new Key[TILE_SIZE]; + Key *h_reference_keys = new Key[TILE_SIZE]; + Value *h_values = new Value[TILE_SIZE]; + Value *h_reference_values = new Value[TILE_SIZE]; + + // Allocate device arrays + Key *d_keys = NULL; + Value *d_values = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys, sizeof(Key) * TILE_SIZE)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values, sizeof(Value) * TILE_SIZE)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(clock_t))); + + // Initialize problem and solution on host + Initialize(gen_mode, h_keys, h_values, h_reference_keys, h_reference_values, + TILE_SIZE, entropy_reduction, begin_bit, end_bit); + + // Copy problem to device + CubDebugExit(cudaMemcpy(d_keys, h_keys, sizeof(Key) * TILE_SIZE, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_values, h_values, sizeof(Value) * TILE_SIZE, cudaMemcpyHostToDevice)); + + printf("%s " + "BLOCK_THREADS(%d) " + "ITEMS_PER_THREAD(%d) " + "RADIX_BITS(%d) " + "MEMOIZE_OUTER_SCAN(%d) " + "INNER_SCAN_ALGORITHM(%d) " + "SMEM_CONFIG(%d) " + "DESCENDING(%d) " + "BLOCKED_OUTPUT(%d) " + "sizeof(Key)(%d) " + "sizeof(Value)(%d) " + "gen_mode(%d), " + "entropy_reduction(%d) " + "begin_bit(%d) " + "end_bit(%d), " + "samples(%d)\n", + ((KEYS_ONLY) ? "Keys-only" : "Key-value"), + BLOCK_THREADS, + ITEMS_PER_THREAD, + RADIX_BITS, + MEMOIZE_OUTER_SCAN, + INNER_SCAN_ALGORITHM, + SMEM_CONFIG, + DESCENDING, + BLOCKED_OUTPUT, + (int) sizeof(Key), + (int) sizeof(Value), + gen_mode, + entropy_reduction, + begin_bit, + end_bit, + g_num_rand_samples); + + // Set shared memory config + cudaDeviceSetSharedMemConfig(SMEM_CONFIG); + + // Run kernel + Kernel<<<1, BLOCK_THREADS>>>( + d_keys, d_values, begin_bit, end_bit, d_elapsed); + + // Flush kernel output / errors + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Check keys results + printf("\tKeys: "); + int compare = CompareDeviceResults(h_reference_keys, d_keys, TILE_SIZE, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check value results + if (!KEYS_ONLY) + { + printf("\tValues: "); + int compare = CompareDeviceResults(h_reference_values, d_values, TILE_SIZE, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + } + printf("\n"); + + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + printf("\n"); + + // Cleanup + if (h_keys) delete[] h_keys; + if (h_reference_keys) delete[] h_reference_keys; + if (h_values) delete[] h_values; + if (h_reference_values) delete[] h_reference_values; + if (d_keys) CubDebugExit(g_allocator.DeviceFree(d_keys)); + if (d_values) CubDebugExit(g_allocator.DeviceFree(d_values)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Test driver (valid tile size <= MAX_SMEM_BYTES) + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + cudaSharedMemConfig SMEM_CONFIG, + bool DESCENDING, + bool BLOCKED_OUTPUT, + typename Key, + typename Value> +void TestValid(Int2Type fits_smem_capacity) +{ + // Iterate begin_bit + for (int begin_bit = 0; begin_bit <= 1; begin_bit++) + { + // Iterate end bit + for (int end_bit = begin_bit + 1; end_bit <= sizeof(Key) * 8; end_bit = end_bit * 2 + begin_bit) + { + // Uniform key distribution + TestDriver( + UNIFORM, 0, begin_bit, end_bit); + + // Sequential key distribution + TestDriver( + INTEGER_SEED, 0, begin_bit, end_bit); + + // Iterate random with entropy_reduction + for (int entropy_reduction = 0; entropy_reduction <= 9; entropy_reduction += 3) + { + TestDriver( + RANDOM, entropy_reduction, begin_bit, end_bit); + } + } + } +} + + +/** + * Test driver (invalid tile size) + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + cudaSharedMemConfig SMEM_CONFIG, + bool DESCENDING, + bool BLOCKED_OUTPUT, + typename Key, + typename Value> +void TestValid(Int2Type fits_smem_capacity) +{} + + +/** + * Test ascending/descending and to-blocked/to-striped + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + cudaSharedMemConfig SMEM_CONFIG, + typename Key, + typename Value> +void Test() +{ + // Check size of smem storage for the target arch to make sure it will fit + typedef BlockRadixSort BlockRadixSortT; + +#if defined(SM100) || defined(SM110) || defined(SM130) + Int2Type fits_smem_capacity; +#else + Int2Type<(sizeof(typename BlockRadixSortT::TempStorage) <= 48 * 1024)> fits_smem_capacity; +#endif + + // Sort-ascending, to-striped + TestValid(fits_smem_capacity); + + // Sort-descending, to-blocked + TestValid(fits_smem_capacity); + + // Not necessary +// TestValid(fits_smem_capacity); +// TestValid(fits_smem_capacity); +} + + +/** + * Test value type and smem config + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + typename Key> +void TestKeys() +{ + // Test keys-only sorting with both smem configs + Test(); // Keys-only (4-byte smem bank config) +#if !defined(SM100) && !defined(SM110) && !defined(SM130) && !defined(SM200) + Test(); // Keys-only (8-byte smem bank config) +#endif +} + + +/** + * Test value type and smem config + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM, + typename Key> +void TestKeysAndPairs() +{ + // Test pairs sorting with only 4-byte configs + Test(); // With small-values + Test(); // With same-values + Test(); // With large values +} + + +/** + * Test key type + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN, + BlockScanAlgorithm INNER_SCAN_ALGORITHM> +void Test() +{ + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef TEST_KEYS_ONLY + + // Test unsigned types with keys-only + TestKeys(); + TestKeys(); + TestKeys(); + TestKeys(); + TestKeys(); + +#else + + // Test signed and fp types with paired values + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + TestKeysAndPairs(); + if (ptx_version > 120) + { + // Don't check doubles on PTX120 or below because they're down-converted + TestKeysAndPairs(); + } + +#endif +} + + +/** + * Test inner scan algorithm + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS, + bool MEMOIZE_OUTER_SCAN> +void Test() +{ + Test(); + Test(); +} + + +/** + * Test outer scan algorithm + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int RADIX_BITS> +void Test() +{ + Test(); + Test(); +} + + +/** + * Test radix bits + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void Test() +{ + Test(); + Test(); + Test(); +} + + +/** + * Test items per thread + */ +template +void Test() +{ + Test(); +#if defined(SM100) || defined(SM110) || defined(SM130) + // Open64 compiler can't handle the number of test cases +#else + Test(); +#endif + Test(); +} + + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#ifdef QUICK_TEST + + { + typedef float T; + TestDriver<32, 4, 4, true, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, false, false, T, NullType>(INTEGER_SEED, 0, 0, sizeof(T) * 8); + } +/* + // Compile/run quick tests + typedef unsigned int T; + TestDriver<64, 17, 4, true, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, false, false, T, NullType>(RANDOM, 0, 0, sizeof(T) * 8); + TestDriver<96, 8, 4, true, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, false, false, T, NullType>(RANDOM, 0, 0, sizeof(T) * 8); + TestDriver<128, 2, 4, true, BLOCK_SCAN_WARP_SCANS, cudaSharedMemBankSizeFourByte, false, false, T, NullType>(RANDOM, 0, 0, sizeof(T) * 8); +*/ + +#else + + // Compile/run thorough tests + Test<32>(); + Test<64>(); + Test<160>(); + + +#endif // QUICK_TEST + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_reduce.cu new file mode 100644 index 0000000..c8df4bc --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_reduce.cu @@ -0,0 +1,822 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of BlockReduce utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include +#include + +#include +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + + +/// Generic reduction (full, 1) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T (&data)[1], ReductionOp &reduction_op) +{ + return block_reduce.Reduce(data[0], reduction_op); +} + +/// Generic reduction (full, ITEMS_PER_THREAD) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T (&data)[ITEMS_PER_THREAD], ReductionOp &reduction_op) +{ + return block_reduce.Reduce(data, reduction_op); +} + +/// Generic reduction (partial, 1) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T &data, ReductionOp &reduction_op, int valid_threads) +{ + return block_reduce.Reduce(data, reduction_op, valid_threads); +} + +/// Sum reduction (full, 1) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T (&data)[1], Sum &reduction_op) +{ + return block_reduce.Sum(data[0]); +} + +/// Sum reduction (full, ITEMS_PER_THREAD) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T (&data)[ITEMS_PER_THREAD], Sum &reduction_op) +{ + return block_reduce.Sum(data); +} + +/// Sum reduction (partial, 1) +template +__device__ __forceinline__ T DeviceTest( + BlockReduceT &block_reduce, T &data, Sum &reduction_op, int valid_threads) +{ + return block_reduce.Sum(data, valid_threads); +} + + +/** + * Test full-tile reduction kernel (where num_items is an even + * multiple of BLOCK_THREADS) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> +__launch_bounds__ (BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) +__global__ void FullTileReduceKernel( + T *d_in, + T *d_out, + ReductionOp reduction_op, + int tiles, + clock_t *d_elapsed) +{ + const int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Cooperative thread block reduction utility type (returns aggregate in thread 0) + typedef BlockReduce BlockReduceT; + + // Allocate temp storage in shared memory + __shared__ typename BlockReduceT::TempStorage temp_storage; + + int linear_tid = RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + + // Per-thread tile data + T data[ITEMS_PER_THREAD]; + + // Load first tile of data + int block_offset = 0; + + if (block_offset < TILE_SIZE * tiles) + { + LoadDirectBlocked(linear_tid, d_in + block_offset, data); + block_offset += TILE_SIZE; + + // Start cycle timer + clock_t start = clock(); + + // Cooperative reduce first tile + BlockReduceT block_reduce(temp_storage) ; + T block_aggregate = DeviceTest(block_reduce, data, reduction_op); + + // Stop cycle timer + #if CUB_PTX_ARCH == 100 + // Bug: recording stop clock causes mis-write of running prefix value + clock_t stop = 0; +#else + clock_t stop = clock(); +#endif // CUB_PTX_ARCH == 100 + clock_t elapsed = (start > stop) ? start - stop : stop - start; + + // Loop over input tiles + while (block_offset < TILE_SIZE * tiles) + { + // TestBarrier between thread block reductions + __syncthreads(); + + // Load tile of data + LoadDirectBlocked(linear_tid, d_in + block_offset, data); + block_offset += TILE_SIZE; + + // Start cycle timer + clock_t start = clock(); + + // Cooperatively reduce the tile's aggregate + BlockReduceT block_reduce(temp_storage) ; + T tile_aggregate = DeviceTest(block_reduce, data, reduction_op); + + // Stop cycle timer +#if CUB_PTX_ARCH == 100 + // Bug: recording stop clock causes mis-write of running prefix value + clock_t stop = 0; +#else + clock_t stop = clock(); +#endif // CUB_PTX_ARCH == 100 + elapsed += (start > stop) ? start - stop : stop - start; + + // Reduce thread block aggregate + block_aggregate = reduction_op(block_aggregate, tile_aggregate); + } + + // Store data + if (linear_tid == 0) + { + d_out[0] = block_aggregate; + *d_elapsed = elapsed; + } + } +} + + + +/** + * Test partial-tile reduction kernel (where num_items < BLOCK_THREADS) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + typename T, + typename ReductionOp> +__launch_bounds__ (BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) +__global__ void PartialTileReduceKernel( + T *d_in, + T *d_out, + int num_items, + ReductionOp reduction_op, + clock_t *d_elapsed) +{ + // Cooperative thread block reduction utility type (returns aggregate only in thread-0) + typedef BlockReduce BlockReduceT; + + // Allocate temp storage in shared memory + __shared__ typename BlockReduceT::TempStorage temp_storage; + + int linear_tid = RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + + // Per-thread tile data + T partial; + + // Load partial tile data + if (linear_tid < num_items) + { + partial = d_in[linear_tid]; + } + + // Start cycle timer + clock_t start = clock(); + + // Cooperatively reduce the tile's aggregate + BlockReduceT block_reduce(temp_storage) ; + T tile_aggregate = DeviceTest(block_reduce, partial, reduction_op, num_items); + + // Stop cycle timer +#if CUB_PTX_ARCH == 100 + // Bug: recording stop clock causes mis-write of running prefix value + clock_t stop = 0; +#else + clock_t stop = clock(); +#endif // CUB_PTX_ARCH == 100 + + clock_t elapsed = (start > stop) ? start - stop : stop - start; + + // Store data + if (linear_tid == 0) + { + d_out[0] = tile_aggregate; + *d_elapsed = elapsed; + } +} + + +//--------------------------------------------------------------------- +// Host utility subroutines +//--------------------------------------------------------------------- + +/** + * Initialize problem (and solution) + */ +template < + typename T, + typename ReductionOp> +void Initialize( + GenMode gen_mode, + T *h_in, + T h_reference[1], + ReductionOp reduction_op, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + if (i == 0) + h_reference[0] = h_in[0]; + else + h_reference[0] = reduction_op(h_reference[0], h_in[i]); + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n"); + } +} + + +//--------------------------------------------------------------------- +// Full tile test generation +//--------------------------------------------------------------------- + + +/** + * Test full-tile reduction. (Specialized for sufficient resources) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + int tiles, + ReductionOp reduction_op, + Int2Type sufficient_resources) +{ + const int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + int num_items = TILE_SIZE * tiles; + + // Allocate host arrays + T *h_in = new T[num_items]; + T h_reference[1]; + + // Initialize problem + Initialize(gen_mode, h_in, h_reference, reduction_op, num_items); + + // Initialize/clear device arrays + T *d_in = NULL; + T *d_out = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(unsigned long long))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * 1)); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * 1)); + + // Test multi-tile (unguarded) + printf("TestFullTile %s, %s, gen-mode %d, num_items(%d), BLOCK_THREADS(%d) (%d,%d,%d), ITEMS_PER_THREAD(%d), tiles(%d), %s (%d bytes) elements:\n", + Equals::VALUE ? "Sum" : "Max", + (ALGORITHM == BLOCK_REDUCE_RAKING) ? "BLOCK_REDUCE_RAKING" : (ALGORITHM == BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY) ? "BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY" : "BLOCK_REDUCE_WARP_REDUCTIONS", + gen_mode, + num_items, + BLOCK_THREADS, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, + ITEMS_PER_THREAD, + tiles, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + dim3 block_dims(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + FullTileReduceKernel<<<1, block_dims>>>( + d_in, + d_out, + reduction_op, + tiles, + d_elapsed); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tReduction results: "); + int compare = CompareDeviceResults(h_reference, d_out, 1, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Cleanup + if (h_in) delete[] h_in; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Test full-tile reduction. (Specialized for insufficient resources) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + int tiles, + ReductionOp reduction_op, + Int2Type sufficient_resources) +{} + + +/** + * Test full-tile reduction. + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + int tiles, + ReductionOp reduction_op) +{ + // Check size of smem storage for the target arch to make sure it will fit + typedef BlockReduce BlockReduceT; + + enum + { +#if defined(SM100) || defined(SM110) || defined(SM130) + sufficient_smem = (sizeof(typename BlockReduceT::TempStorage) <= 16 * 1024), + sufficient_threads = ((BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 512), +#else + sufficient_smem = (sizeof(typename BlockReduceT::TempStorage) <= 48 * 1024), + sufficient_threads = ((BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 1024), +#endif + }; + + TestFullTile(gen_mode, tiles, reduction_op, Int2Type()); +} + + +/** + * Run battery of tests for different thread block dimensions + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + int tiles, + ReductionOp reduction_op) +{ + TestFullTile(gen_mode, tiles, reduction_op); + TestFullTile(gen_mode, tiles, reduction_op); +} + +/** + * Run battery of tests for different thread items + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_THREADS, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + int tiles, + ReductionOp reduction_op) +{ + TestFullTile(gen_mode, tiles, reduction_op); + TestFullTile(gen_mode, tiles, reduction_op); +} + + +/** + * Run battery of full-tile tests for different numbers of tiles + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_THREADS, + typename T, + typename ReductionOp> +void TestFullTile( + GenMode gen_mode, + ReductionOp reduction_op) +{ + for (int tiles = 1; tiles < 3; tiles++) + { + TestFullTile(gen_mode, tiles, reduction_op); + } +} + + +//--------------------------------------------------------------------- +// Partial-tile test generation +//--------------------------------------------------------------------- + +/** + * Test partial-tile reduction. (Specialized for sufficient resources) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + typename T, + typename ReductionOp> +void TestPartialTile( + GenMode gen_mode, + int num_items, + ReductionOp reduction_op, + Int2Type sufficient_resources) +{ + const int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; + const int TILE_SIZE = BLOCK_THREADS; + + // Allocate host arrays + T *h_in = new T[num_items]; + T h_reference[1]; + + // Initialize problem + Initialize(gen_mode, h_in, h_reference, reduction_op, num_items); + + // Initialize/clear device arrays + T *d_in = NULL; + T *d_out = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(unsigned long long))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * TILE_SIZE)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * 1)); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * 1)); + + printf("TestPartialTile %s, gen-mode %d, num_items(%d), BLOCK_THREADS(%d) (%d,%d,%d), %s (%d bytes) elements:\n", + (ALGORITHM == BLOCK_REDUCE_RAKING) ? "BLOCK_REDUCE_RAKING" : (ALGORITHM == BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY) ? "BLOCK_REDUCE_RAKING_COMMUTATIVE_ONLY" : "BLOCK_REDUCE_WARP_REDUCTIONS", + gen_mode, + num_items, + BLOCK_THREADS, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + dim3 block_dims(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + PartialTileReduceKernel<<<1, block_dims>>>( + d_in, + d_out, + num_items, + reduction_op, + d_elapsed); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tReduction results: "); + int compare = CompareDeviceResults(h_reference, d_out, 1, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Cleanup + if (h_in) delete[] h_in; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + + +/** + * Test partial-tile reduction (specialized for insufficient resources) + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + typename T, + typename ReductionOp> +void TestPartialTile( + GenMode gen_mode, + int num_items, + ReductionOp reduction_op, + Int2Type sufficient_resources) +{} + + +/** + * Run battery of partial-tile tests for different numbers of effective threads and thread dimensions + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + typename T, + typename ReductionOp> +void TestPartialTile( + GenMode gen_mode, + int num_items, + ReductionOp reduction_op) +{ + // Check size of smem storage for the target arch to make sure it will fit + typedef BlockReduce BlockReduceT; + + enum + { +#if defined(SM100) || defined(SM110) || defined(SM130) + sufficient_smem = sizeof(typename BlockReduceT::TempStorage) <= 16 * 1024, + sufficient_threads = (BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 512, +#else + sufficient_smem = sizeof(typename BlockReduceT::TempStorage) <= 48 * 1024, + sufficient_threads = (BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 1024, +#endif + }; + + TestPartialTile(gen_mode, num_items, reduction_op, Int2Type()); +} + + + +/** + * Run battery of partial-tile tests for different numbers of effective threads and thread dimensions + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_THREADS, + typename T, + typename ReductionOp> +void TestPartialTile( + GenMode gen_mode, + ReductionOp reduction_op) +{ + for ( + int num_items = 1; + num_items < BLOCK_THREADS; + num_items += CUB_MAX(1, BLOCK_THREADS / 5)) + { + TestPartialTile(gen_mode, num_items, reduction_op); + TestPartialTile(gen_mode, num_items, reduction_op); + } +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Run battery of full-tile tests for different gen modes + */ +template < + BlockReduceAlgorithm ALGORITHM, + int BLOCK_THREADS, + typename T, + typename ReductionOp> +void Test( + ReductionOp reduction_op) +{ + TestFullTile(UNIFORM, reduction_op); + TestPartialTile(UNIFORM, reduction_op); + + TestFullTile(INTEGER_SEED, reduction_op); + TestPartialTile(INTEGER_SEED, reduction_op); + + if (Traits::CATEGORY != FLOATING_POINT) + { + // Don't test randomly-generated floats b/c of stability + TestFullTile(RANDOM, reduction_op); + TestPartialTile(RANDOM, reduction_op); + } +} + + +/** + * Run battery of tests for different block-reduction algorithmic variants + */ +template < + int BLOCK_THREADS, + typename T, + typename ReductionOp> +void Test( + ReductionOp reduction_op) +{ +#ifdef TEST_RAKING + Test(reduction_op); + Test(reduction_op); +#endif +#ifdef TEST_WARP_REDUCTIONS + Test(reduction_op); +#endif +} + + +/** + * Run battery of tests for different block sizes + */ +template < + typename T, + typename ReductionOp> +void Test( + ReductionOp reduction_op) +{ + Test<7, T>(reduction_op); + Test<32, T>(reduction_op); + Test<63, T>(reduction_op); + Test<97, T>(reduction_op); + Test<128, T>(reduction_op); + Test<238, T>(reduction_op); +} + + +/** + * Run battery of tests for different block sizes + */ +template +void Test() +{ + Test(Sum()); + Test(Max()); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--repeat=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef QUICK_TEST + + // Compile/run quick tests + + + printf("\n full tile ------------------------\n\n"); + + TestFullTile(RANDOM, 1, Sum()); + TestFullTile(RANDOM, 1, Sum()); + TestFullTile(RANDOM, 1, Sum()); + + TestFullTile(RANDOM, 1, Sum()); + TestFullTile(RANDOM, 1, Sum()); + TestFullTile(RANDOM, 1, Sum()); + + printf("\n partial tile ------------------------\n\n"); + + TestPartialTile(RANDOM, 7, Sum()); + TestPartialTile(RANDOM, 7, Sum()); + TestPartialTile(RANDOM, 7, Sum()); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // primitives + Test(); + Test(); + Test(); + Test(); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(); + + Test(); + + // vector types + Test(); + Test(); + Test(); + Test(); + + Test(); + Test(); + Test(); + Test(); + + // Complex types + Test(); + Test(); + } + +#endif + + return 0; +} + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_scan.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_scan.cu new file mode 100644 index 0000000..192fb51 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_block_scan.cu @@ -0,0 +1,929 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of BlockScan utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "test_util.h" + + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + +/** + * Primitive variant to test + */ +enum TestMode +{ + BASIC, + AGGREGATE, + PREFIX, +}; + + +/** + * Scan mode to test + */ +enum ScanMode +{ + EXCLUSIVE, + INCLUSIVE +}; + + +/** + * \brief WrapperFunctor (for precluding test-specialized dispatch to *Sum variants) + */ +template +struct WrapperFunctor +{ + OpT op; + + WrapperFunctor(OpT op) : op(op) {} + + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return op(a, b); + } +}; + + +/** + * Stateful prefix functor + */ +template < + typename T, + typename ScanOpT> +struct BlockPrefixCallbackOp +{ + int linear_tid; + T prefix; + ScanOpT scan_op; + + __device__ __forceinline__ + BlockPrefixCallbackOp(int linear_tid, T prefix, ScanOpT scan_op) : + linear_tid(linear_tid), + prefix(prefix), + scan_op(scan_op) + {} + + __device__ __forceinline__ + T operator()(T block_aggregate) + { + // For testing purposes + T retval = (linear_tid == 0) ? prefix : T(); + prefix = scan_op(prefix, block_aggregate); + return retval; + } +}; + + +//--------------------------------------------------------------------- +// Exclusive scan +//--------------------------------------------------------------------- + +/// Exclusive scan (BASIC, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data[0], data[0], initial_value, scan_op); +} + +/// Exclusive scan (BASIC, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data, data, initial_value, scan_op); +} + +/// Exclusive scan (AGGREGATE, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data[0], data[0], initial_value, scan_op, block_aggregate); +} + +/// Exclusive scan (AGGREGATE, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data, data, initial_value, scan_op, block_aggregate); +} + +/// Exclusive scan (PREFIX, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data[0], data[0], scan_op, prefix_op); +} + +/// Exclusive scan (PREFIX, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.ExclusiveScan(data, data, scan_op, prefix_op); +} + + +//--------------------------------------------------------------------- +// Exclusive sum +//--------------------------------------------------------------------- + +/// Exclusive sum (BASIC, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data[0], data[0]); +} + +/// Exclusive sum (BASIC, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data, data); +} + +/// Exclusive sum (AGGREGATE, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data[0], data[0], block_aggregate); +} + +/// Exclusive sum (AGGREGATE, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data, data, block_aggregate); +} + +/// Exclusive sum (PREFIX, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data[0], data[0], prefix_op); +} + +/// Exclusive sum (PREFIX, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.ExclusiveSum(data, data, prefix_op); +} + + +//--------------------------------------------------------------------- +// Inclusive scan +//--------------------------------------------------------------------- + +/// Inclusive scan (BASIC, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data[0], data[0], scan_op); +} + +/// Inclusive scan (BASIC, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data, data, scan_op); +} + +/// Inclusive scan (AGGREGATE, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data[0], data[0], scan_op, block_aggregate); +} + +/// Inclusive scan (AGGREGATE, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data, data, scan_op, block_aggregate); +} + +/// Inclusive scan (PREFIX, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data[0], data[0], scan_op, prefix_op); +} + +/// Inclusive scan (PREFIX, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, ScanOpT &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, IsPrimitiveT is_primitive) +{ + block_scan.InclusiveScan(data, data, scan_op, prefix_op); +} + + +//--------------------------------------------------------------------- +// Inclusive sum +//--------------------------------------------------------------------- + +/// Inclusive sum (BASIC, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data[0], data[0]); +} + +/// Inclusive sum (BASIC, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data, data); +} + +/// Inclusive sum (AGGREGATE, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data[0], data[0], block_aggregate); +} + +/// Inclusive sum (AGGREGATE, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data, data, block_aggregate); +} + +/// Inclusive sum (PREFIX, 1) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[1], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data[0], data[0], prefix_op); +} + +/// Inclusive sum (PREFIX, ITEMS_PER_THREAD) +template +__device__ __forceinline__ void DeviceTest( + BlockScanT &block_scan, T (&data)[ITEMS_PER_THREAD], T &initial_value, Sum &scan_op, T &block_aggregate, PrefixCallbackOp &prefix_op, + Int2Type scan_mode, Int2Type test_mode, Int2Type is_primitive) +{ + block_scan.InclusiveSum(data, data, prefix_op); +} + + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * BlockScan test kernel. + */ +template < + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + BlockScanAlgorithm ALGORITHM, + typename T, + typename ScanOpT> +__launch_bounds__ (BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) +__global__ void BlockScanKernel( + T *d_in, + T *d_out, + T *d_aggregate, + ScanOpT scan_op, + T initial_value, + clock_t *d_elapsed) +{ + const int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Parameterize BlockScan type for our thread block + typedef BlockScan BlockScanT; + + // Allocate temp storage in shared memory + __shared__ typename BlockScanT::TempStorage temp_storage; + + int linear_tid = RowMajorTid(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + + // Per-thread tile data + T data[ITEMS_PER_THREAD]; + LoadDirectBlocked(linear_tid, d_in, data); + + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Test scan + T block_aggregate; + BlockScanT block_scan(temp_storage); + BlockPrefixCallbackOp prefix_op(linear_tid, initial_value, scan_op); + + DeviceTest(block_scan, data, initial_value, scan_op, block_aggregate, prefix_op, + Int2Type(), Int2Type(), Int2Type::PRIMITIVE>()); + + // Stop cycle timer + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Store output + StoreDirectBlocked(linear_tid, d_out, data); + + // Store block_aggregate + if (TEST_MODE != BASIC) + d_aggregate[linear_tid] = block_aggregate; + + // Store prefix + if (TEST_MODE == PREFIX) + { + if (linear_tid == 0) + d_out[TILE_SIZE] = prefix_op.prefix; + } + + // Store time + if (linear_tid == 0) + *d_elapsed = (start > stop) ? start - stop : stop - start; +} + + + +//--------------------------------------------------------------------- +// Host utility subroutines +//--------------------------------------------------------------------- + +/** + * Initialize exclusive-scan problem (and solution) + */ +template +T Initialize( + GenMode gen_mode, + T *h_in, + T *h_reference, + int num_items, + ScanOpT scan_op, + T initial_value, + Int2Type) +{ + InitValue(gen_mode, h_in[0], 0); + + T block_aggregate = h_in[0]; + h_reference[0] = initial_value; + T inclusive = scan_op(initial_value, h_in[0]); + + for (int i = 1; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + h_reference[i] = inclusive; + inclusive = scan_op(inclusive, h_in[i]); + block_aggregate = scan_op(block_aggregate, h_in[i]); + } + + return block_aggregate; +} + + +/** + * Initialize inclusive-scan problem (and solution) + */ +template +T Initialize( + GenMode gen_mode, + T *h_in, + T *h_reference, + int num_items, + ScanOpT scan_op, + T initial_value, + Int2Type) +{ + InitValue(gen_mode, h_in[0], 0); + + T block_aggregate = h_in[0]; + T inclusive = scan_op(initial_value, h_in[0]); + h_reference[0] = inclusive; + + for (int i = 1; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + inclusive = scan_op(inclusive, h_in[i]); + block_aggregate = scan_op(block_aggregate, h_in[i]); + h_reference[i] = inclusive; + } + + return block_aggregate; +} + + +/** + * Test thread block scan. (Specialized for sufficient resources) + */ +template < + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + BlockScanAlgorithm ALGORITHM, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value, + Int2Type sufficient_resources) +{ + const int BLOCK_THREADS = BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z; + const int TILE_SIZE = BLOCK_THREADS * ITEMS_PER_THREAD; + + // Allocate host arrays + T *h_in = new T[TILE_SIZE]; + T *h_reference = new T[TILE_SIZE]; + T *h_aggregate = new T[BLOCK_THREADS]; + + // Initialize problem + T block_aggregate = Initialize( + gen_mode, + h_in, + h_reference, + TILE_SIZE, + scan_op, + initial_value, + Int2Type()); + + // Test reference block_aggregate is returned in all threads + for (int i = 0; i < BLOCK_THREADS; ++i) + { + h_aggregate[i] = block_aggregate; + } + + // Run kernel + printf("Test-mode %d, gen-mode %d, policy %d, %s %s BlockScan, %d (%d,%d,%d) thread block threads, %d items per thread, %d tile size, %s (%d bytes) elements:\n", + TEST_MODE, gen_mode, ALGORITHM, + (SCAN_MODE == INCLUSIVE) ? "Inclusive" : "Exclusive", typeid(ScanOpT).name(), + BLOCK_THREADS, BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z, + ITEMS_PER_THREAD, TILE_SIZE, + typeid(T).name(), (int) sizeof(T)); + fflush(stdout); + + // Initialize/clear device arrays + T *d_in = NULL; + T *d_out = NULL; + T *d_aggregate = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(unsigned long long))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * TILE_SIZE)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * (TILE_SIZE + 2))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_aggregate, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * TILE_SIZE, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * (TILE_SIZE + 1))); + CubDebugExit(cudaMemset(d_aggregate, 0, sizeof(T) * BLOCK_THREADS)); + + // Display input problem data + if (g_verbose) + { + printf("Input data: "); + for (int i = 0; i < TILE_SIZE; i++) + { + std::cout << CoutCast(h_in[i]) << ", "; + } + printf("\n\n"); + } + + // Run block_aggregate/prefix kernel + dim3 block_dims(BLOCK_DIM_X, BLOCK_DIM_Y, BLOCK_DIM_Z); + BlockScanKernel<<<1, block_dims>>>( + d_in, + d_out, + d_aggregate, + scan_op, + initial_value, + d_elapsed); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tScan results: "); + int compare = CompareDeviceResults(h_reference, d_out, TILE_SIZE, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + if (TEST_MODE == AGGREGATE) + { + // Copy out and display block_aggregate + printf("\tScan block aggregate: "); + compare = CompareDeviceResults(h_aggregate, d_aggregate, BLOCK_THREADS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + } + + if (TEST_MODE == PREFIX) + { + // Copy out and display updated prefix + printf("\tScan running total: "); + T running_total = scan_op(initial_value, block_aggregate); + compare = CompareDeviceResults(&running_total, d_out + TILE_SIZE, 1, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + } + + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (h_aggregate) delete[] h_aggregate; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_aggregate) CubDebugExit(g_allocator.DeviceFree(d_aggregate)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Test thread block scan. (Specialized for insufficient resources) + */ +template < + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + BlockScanAlgorithm ALGORITHM, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value, + Int2Type sufficient_resources) +{} + + +/** + * Test thread block scan. + */ +template < + int BLOCK_DIM_X, + int BLOCK_DIM_Y, + int BLOCK_DIM_Z, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + BlockScanAlgorithm ALGORITHM, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value) +{ + // Check size of smem storage for the target arch to make sure it will fit + typedef BlockScan BlockScanT; + + enum + { +#if defined(SM100) || defined(SM110) || defined(SM130) + sufficient_smem = (sizeof(typename BlockScanT::TempStorage) <= 16 * 1024), + sufficient_threads = ((BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 512), +#else + sufficient_smem = (sizeof(typename BlockScanT::TempStorage) <= 16 * 1024), + sufficient_threads = ((BLOCK_DIM_X * BLOCK_DIM_Y * BLOCK_DIM_Z) <= 1024), +#endif + +#if defined(_WIN32) || defined(_WIN64) + // Accommodate ptxas crash bug (access violation) on Windows + special_skip = ((TEST_ARCH <= 130) && (Equals::VALUE) && (BLOCK_DIM_Z > 1)), +#else + special_skip = false, +#endif + sufficient_resources = (sufficient_smem && sufficient_threads && !special_skip), + }; + + Test( + gen_mode, scan_op, initial_value, Int2Type()); +} + + + +/** + * Run test for different thread block dimensions + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + BlockScanAlgorithm ALGORITHM, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value) +{ + Test(gen_mode, scan_op, initial_value); + Test(gen_mode, scan_op, initial_value); +} + + +/** + * Run test for different policy types + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + ScanMode SCAN_MODE, + TestMode TEST_MODE, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value) +{ +#ifdef TEST_RAKING + Test(gen_mode, scan_op, initial_value); +#endif +#ifdef TEST_RAKING_MEMOIZE + Test(gen_mode, scan_op, initial_value); +#endif +#ifdef TEST_WARP_SCANS + Test(gen_mode, scan_op, initial_value); +#endif +} + + +/** + * Run tests for different primitive variants + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T identity, + T initial_value) +{ + // Exclusive (use identity as initial value because it will dispatch to *Sum variants that don't take initial values) + Test(gen_mode, scan_op, identity); + Test(gen_mode, scan_op, identity); + Test(gen_mode, scan_op, identity); + + // Exclusive (non-specialized, so we can use initial-value) + Test(gen_mode, WrapperFunctor(scan_op), initial_value); + Test(gen_mode, WrapperFunctor(scan_op), initial_value); + Test(gen_mode, WrapperFunctor(scan_op), initial_value); + + // Inclusive + Test(gen_mode, scan_op, identity); // This scan doesn't take an initial value + Test(gen_mode, scan_op, identity); // This scan doesn't take an initial value + Test(gen_mode, scan_op, initial_value); +} + + +/** + * Run tests for different problem-generation options + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + typename ScanOpT, + typename T> +void Test( + ScanOpT scan_op, + T identity, + T initial_value) +{ + Test(UNIFORM, scan_op, identity, initial_value); + Test(INTEGER_SEED, scan_op, identity, initial_value); + + // Don't test randomly-generated floats b/c of stability + if (Traits::CATEGORY != FLOATING_POINT) + Test(RANDOM, scan_op, identity, initial_value); +} + + +/** + * Run tests for different data types and scan ops + */ +template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD> +void Test() +{ + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + + // primitive + Test(Sum(), (unsigned char) 0, (unsigned char) 99); + Test(Sum(), (unsigned short) 0, (unsigned short) 99); + Test(Sum(), (unsigned int) 0, (unsigned int) 99); + Test(Sum(), (unsigned long long) 0, (unsigned long long) 99); + Test(Sum(), (float) 0, (float) 99); + + // primitive (alternative scan op) + Test(Max(), std::numeric_limits::min(), (char) 99); + Test(Max(), std::numeric_limits::min(), (short) 99); + Test(Max(), std::numeric_limits::min(), (int) 99); + Test(Max(), std::numeric_limits::min(), (long long) 99); + + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(Max(), std::numeric_limits::max() * -1, (double) 99); + + // vec-1 + Test(Sum(), make_uchar1(0), make_uchar1(17)); + + // vec-2 + Test(Sum(), make_uchar2(0, 0), make_uchar2(17, 21)); + Test(Sum(), make_ushort2(0, 0), make_ushort2(17, 21)); + Test(Sum(), make_uint2(0, 0), make_uint2(17, 21)); + Test(Sum(), make_ulonglong2(0, 0), make_ulonglong2(17, 21)); + + // vec-4 + Test(Sum(), make_char4(0, 0, 0, 0), make_char4(17, 21, 32, 85)); + Test(Sum(), make_short4(0, 0, 0, 0), make_short4(17, 21, 32, 85)); + Test(Sum(), make_int4(0, 0, 0, 0), make_int4(17, 21, 32, 85)); + Test(Sum(), make_longlong4(0, 0, 0, 0), make_longlong4(17, 21, 32, 85)); + + // complex + Test(Sum(), TestFoo::MakeTestFoo(0, 0, 0, 0), TestFoo::MakeTestFoo(17, 21, 32, 85)); + Test(Sum(), TestBar(0, 0), TestBar(17, 21)); + +} + + +/** + * Run tests for different items per thread + */ +template +void Test() +{ + Test(); + Test(); + Test(); +} + + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--repeat=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#ifdef QUICK_TEST + + Test<128, 1, 1, 1, EXCLUSIVE, AGGREGATE, BLOCK_SCAN_WARP_SCANS>(UNIFORM, Sum(), int(0)); + + // Compile/run quick tests + Test<128, 1, 1, 4, EXCLUSIVE, AGGREGATE, BLOCK_SCAN_WARP_SCANS>(UNIFORM, Sum(), int(0)); + Test<128, 1, 1, 4, EXCLUSIVE, AGGREGATE, BLOCK_SCAN_RAKING>(UNIFORM, Sum(), int(0)); + Test<128, 1, 1, 4, EXCLUSIVE, AGGREGATE, BLOCK_SCAN_RAKING_MEMOIZE>(UNIFORM, Sum(), int(0)); + + Test<128, 1, 1, 2, INCLUSIVE, PREFIX, BLOCK_SCAN_RAKING>(INTEGER_SEED, Sum(), TestFoo::MakeTestFoo(17, 21, 32, 85)); + Test<128, 1, 1, 1, EXCLUSIVE, AGGREGATE, BLOCK_SCAN_WARP_SCANS>(UNIFORM, Sum(), make_longlong4(17, 21, 32, 85)); + + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Run tests for different thread block sizes + Test<17>(); + Test<32>(); + Test<62>(); + Test<65>(); +// Test<96>(); // TODO: file bug for UNREACHABLE error for Test<96, 9, BASIC, BLOCK_SCAN_RAKING>(UNIFORM, Sum(), NullType(), make_ulonglong2(17, 21)); + Test<128>(); + } + +#endif + + return 0; +} + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_histogram.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_histogram.cu new file mode 100644 index 0000000..da4e7db --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_histogram.cu @@ -0,0 +1,1669 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceHistogram utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include + +#if defined(QUICK_TEST) || defined(QUICKER_TEST) + #include +#endif + +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + + +// Dispatch types +enum Backend +{ + CUB, // CUB method + NPP, // NPP method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +bool g_verbose_input = false; +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + + + +//--------------------------------------------------------------------- +// Dispatch to NPP histogram +//--------------------------------------------------------------------- + +#if defined(QUICK_TEST) || defined(QUICKER_TEST) + +/** + * Dispatch to single-channel 8b NPP histo-even + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchEven( + Int2Type<1> num_channels, + Int2Type<1> num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + unsigned char *d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[1], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[1], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[1], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[1], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef unsigned char SampleT; + + cudaError_t error = cudaSuccess; + NppiSize oSizeROI = { + num_row_pixels, + num_rows + }; + + if (d_temp_storage_bytes == NULL) + { + int nDeviceBufferSize; + nppiHistogramEvenGetBufferSize_8u_C1R(oSizeROI, num_levels[0] ,&nDeviceBufferSize); + temp_storage_bytes = nDeviceBufferSize; + } + else + { + for (int i = 0; i < timing_timing_iterations; ++i) + { + // compute the histogram + nppiHistogramEven_8u_C1R( + d_samples, + row_stride_bytes, + oSizeROI, + d_histogram[0], + num_levels[0], + lower_level[0], + upper_level[0], + (Npp8u*) d_temp_storage); + } + } + + return error; +} + + +/** + * Dispatch to 3/4 8b NPP histo-even + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchEven( + Int2Type<4> num_channels, + Int2Type<3> num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + unsigned char *d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[3], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[3], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[3], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[3], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef unsigned char SampleT; + + cudaError_t error = cudaSuccess; + NppiSize oSizeROI = { + num_row_pixels, + num_rows + }; + + if (d_temp_storage_bytes == NULL) + { + int nDeviceBufferSize; + nppiHistogramEvenGetBufferSize_8u_AC4R(oSizeROI, num_levels ,&nDeviceBufferSize); + temp_storage_bytes = nDeviceBufferSize; + } + else + { + for (int i = 0; i < timing_timing_iterations; ++i) + { + // compute the histogram + nppiHistogramEven_8u_AC4R( + d_samples, + row_stride_bytes, + oSizeROI, + d_histogram, + num_levels, + lower_level, + upper_level, + (Npp8u*) d_temp_storage); + } + } + + return error; +} + + +#endif // #if defined(QUICK_TEST) || defined(QUICKER_TEST) + + +//--------------------------------------------------------------------- +// Dispatch to different DeviceHistogram entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to CUB single histogram-even entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchEven( + Int2Type<1> num_channels, + Int2Type<1> num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[1], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[1], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[1], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[1], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef typename std::iterator_traits::value_type SampleT; + + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceHistogram::HistogramEven( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram[0], + num_levels[0], + lower_level[0], + upper_level[0], + num_row_pixels, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + return error; +} + +/** + * Dispatch to CUB multi histogram-even entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchEven( + Int2Type num_channels, + Int2Type num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef typename std::iterator_traits::value_type SampleT; + + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceHistogram::MultiHistogramEven( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram, + num_levels, + lower_level, + upper_level, + num_row_pixels, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + return error; +} + + +/** + * Dispatch to CUB single histogram-range entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchRange( + Int2Type<1> num_channels, + Int2Type<1> num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[1], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[1], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT *d_levels[1], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef typename std::iterator_traits::value_type SampleT; + + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceHistogram::HistogramRange( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram[0], + num_levels[0], + d_levels[0], + num_row_pixels, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + return error; +} + + +/** + * Dispatch to CUB multi histogram-range entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t DispatchRange( + Int2Type num_channels, + Int2Type num_active_channels, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + SampleIteratorT d_samples, ///< [in] The pointer to the multi-channel input sequence of data samples. The samples from different channels are assumed to be interleaved (e.g., an array of 32-bit pixels where each pixel consists of four RGBA 8-bit samples). + CounterT *d_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT *d_levels[NUM_ACTIVE_CHANNELS], ///< [in] The pointers to the arrays of boundaries (levels), one for each active channel. Bin ranges are defined by consecutive boundary pairings: lower sample value boundaries are inclusive and upper sample value boundaries are exclusive. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + cudaStream_t stream, + bool debug_synchronous) +{ + typedef typename std::iterator_traits::value_type SampleT; + + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceHistogram::MultiHistogramRange( + d_temp_storage, + temp_storage_bytes, + d_samples, + d_histogram, + num_levels, + d_levels, + num_row_pixels, + num_rows, + row_stride_bytes, + stream, + debug_synchronous); + } + return error; +} + + + +//--------------------------------------------------------------------- +// CUDA nested-parallelism test kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceHistogram + * / +template +__global__ void CnpDispatchKernel( + Int2Type algorithm, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + SampleT *d_samples, + SampleIteratorT d_sample_itr, + ArrayWrapper d_out_histograms, + int num_samples, + bool debug_synchronous) +{ +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(algorithm, Int2Type(), timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_samples, d_sample_itr, d_out_histograms.array, num_samples, 0, debug_synchronous); + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/ ** + * Dispatch to CDP kernel + * / +template +cudaError_t Dispatch( + Int2Type algorithm, + Int2Type use_cdp, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + SampleT *d_samples, + SampleIteratorT d_sample_itr, + CounterT *d_histograms[NUM_ACTIVE_CHANNELS], + int num_samples, + cudaStream_t stream, + bool debug_synchronous) +{ + // Setup array wrapper for histogram channel output (because we can't pass static arrays as kernel parameters) + ArrayWrapper d_histo_wrapper; + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + d_histo_wrapper.array[CHANNEL] = d_histograms[CHANNEL]; + + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(algorithm, timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_samples, d_sample_itr, d_histo_wrapper, num_samples, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} +*/ + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + +// Searches for bin given a list of bin-boundary levels +template +struct SearchTransform +{ + LevelT *levels; // Pointer to levels array + int num_levels; // Number of levels in array + + // Functor for converting samples to bin-ids (num_levels is returned if sample is out of range) + template + int operator()(SampleT sample) + { + int bin = int(std::upper_bound(levels, levels + num_levels, (LevelT) sample) - levels - 1); + if (bin < 0) + { + // Sample out of range + return num_levels; + } + return bin; + } +}; + + +// Scales samples to evenly-spaced bins +template +struct ScaleTransform +{ + int num_levels; // Number of levels in array + LevelT max; // Max sample level (exclusive) + LevelT min; // Min sample level (inclusive) + LevelT scale; // Bin scaling factor + + void Init( + int num_levels, // Number of levels in array + LevelT max, // Max sample level (exclusive) + LevelT min, // Min sample level (inclusive) + LevelT scale) // Bin scaling factor + { + this->num_levels = num_levels; + this->max = max; + this->min = min; + this->scale = scale; + } + + // Functor for converting samples to bin-ids (num_levels is returned if sample is out of range) + template + int operator()(SampleT sample) + { + if ((sample < min) || (sample >= max)) + { + // Sample out of range + return num_levels; + } + + return (int) ((((LevelT) sample) - min) / scale); + } +}; + +// Scales samples to evenly-spaced bins +template <> +struct ScaleTransform +{ + int num_levels; // Number of levels in array + float max; // Max sample level (exclusive) + float min; // Min sample level (inclusive) + float scale; // Bin scaling factor + + void Init( + int num_levels, // Number of levels in array + float max, // Max sample level (exclusive) + float min, // Min sample level (inclusive) + float scale) // Bin scaling factor + { + this->num_levels = num_levels; + this->max = max; + this->min = min; + this->scale = 1.0f / scale; + } + + // Functor for converting samples to bin-ids (num_levels is returned if sample is out of range) + template + int operator()(SampleT sample) + { + if ((sample < min) || (sample >= max)) + { + // Sample out of range + return num_levels; + } + + return (int) ((((float) sample) - min) * scale); + } +}; + + +/** + * Generate sample + */ +template +void Sample(T &datum, LevelT max_level, int entropy_reduction) +{ + unsigned int max = (unsigned int) -1; + unsigned int bits; + RandomBits(bits, entropy_reduction); + float fraction = (float(bits) / max); + + datum = (T) (fraction * max_level); +} + + +/** + * Initialize histogram samples + */ +template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename LevelT, + typename SampleT, + typename OffsetT> +void InitializeSamples( + LevelT max_level, + int entropy_reduction, + SampleT *h_samples, + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes) ///< [in] The number of bytes between starts of consecutive rows in the region of interest +{ + // Initialize samples + for (OffsetT row = 0; row < num_rows; ++row) + { + for (OffsetT pixel = 0; pixel < num_row_pixels; ++pixel) + { + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + // Sample offset + OffsetT offset = (row * (row_stride_bytes / sizeof(SampleT))) + (pixel * NUM_CHANNELS) + channel; + + // Init sample value + Sample(h_samples[offset], max_level, entropy_reduction); + if (g_verbose_input) + { + if (channel > 0) printf(", "); + std::cout << CoutCast(h_samples[offset]); + } + } + } + } +} + + +/** + * Initialize histogram solutions + */ +template < + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename SampleIteratorT, + typename TransformOp, + typename OffsetT> +void InitializeBins( + SampleIteratorT h_samples, + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + TransformOp transform_op[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + CounterT *h_histogram[NUM_ACTIVE_CHANNELS], ///< [out] The pointers to the histogram counter output arrays, one for each active channel. For channeli, the allocation length of d_histograms[i] should be num_levels[i] - 1. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes) ///< [in] The number of bytes between starts of consecutive rows in the region of interest +{ + typedef typename std::iterator_traits::value_type SampleT; + + // Init bins + for (int CHANNEL = 0; CHANNEL < NUM_ACTIVE_CHANNELS; ++CHANNEL) + { + for (int bin = 0; bin < num_levels[CHANNEL] - 1; ++bin) + { + h_histogram[CHANNEL][bin] = 0; + } + } + + // Initialize samples + if (g_verbose_input) printf("Samples: \n"); + for (OffsetT row = 0; row < num_rows; ++row) + { + for (OffsetT pixel = 0; pixel < num_row_pixels; ++pixel) + { + if (g_verbose_input) printf("["); + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + // Sample offset + OffsetT offset = (row * (row_stride_bytes / sizeof(SampleT))) + (pixel * NUM_CHANNELS) + channel; + + // Update sample bin + int bin = transform_op[channel](h_samples[offset]); + if (g_verbose_input) printf(" (%d)", bin); fflush(stdout); + if ((bin >= 0) && (bin < num_levels[channel] - 1)) + { + // valid bin + h_histogram[channel][bin]++; + } + } + if (g_verbose_input) printf("]"); + } + if (g_verbose_input) printf("\n\n"); + } +} + + + +/** + * Test histogram-even + */ +template < + Backend BACKEND, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT, + typename SampleIteratorT> +void TestEven( + LevelT max_level, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes, ///< [in] The number of bytes between starts of consecutive rows in the region of interest + SampleIteratorT h_samples, + SampleIteratorT d_samples) +{ + OffsetT total_samples = num_rows * (row_stride_bytes / sizeof(SampleT)); + + printf("\n----------------------------\n"); + printf("%s cub::DeviceHistogramEven (%s) %d pixels (%d height, %d width, %d-byte row stride), %d %d-byte %s samples (entropy reduction %d), %s counters, %d/%d channels, max sample ", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == NPP) ? "NPP" : "CUB", + (IsPointer::VALUE) ? "pointer" : "iterator", + (int) (num_row_pixels * num_rows), + (int) num_rows, + (int) num_row_pixels, + (int) row_stride_bytes, + (int) total_samples, + (int) sizeof(SampleT), + typeid(SampleT).name(), + entropy_reduction, + typeid(CounterT).name(), + NUM_ACTIVE_CHANNELS, + NUM_CHANNELS); + std::cout << CoutCast(max_level) << "\n"; + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + std::cout << "\n\tChannel " << channel << ": " << num_levels[channel] - 1 << " bins [" << lower_level[channel] << ", " << upper_level[channel] << ")\n"; + fflush(stdout); + + // Allocate and initialize host and device data + + typedef SampleT Foo; // rename type to quelch gcc warnings (bug?) + CounterT* h_histogram[NUM_ACTIVE_CHANNELS]; + ScaleTransform transform_op[NUM_ACTIVE_CHANNELS]; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + int bins = num_levels[channel] - 1; + h_histogram[channel] = new CounterT[bins]; + + transform_op[channel].Init( + num_levels[channel], + upper_level[channel], + lower_level[channel], + ((upper_level[channel] - lower_level[channel]) / bins)); + } + + InitializeBins( + h_samples, num_levels, transform_op, h_histogram, num_row_pixels, num_rows, row_stride_bytes); + + // Allocate and initialize device data + + CounterT* d_histogram[NUM_ACTIVE_CHANNELS]; + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram[channel], sizeof(CounterT) * (num_levels[channel] - 1))); + CubDebugExit(cudaMemset(d_histogram[channel], 0, sizeof(CounterT) * (num_levels[channel] - 1))); + } + + // Allocate CDP device arrays + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + + DispatchEven( + Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, + d_samples, d_histogram, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, row_stride_bytes, + 0, true); + + // Allocate temporary storage with "canary" zones + int canary_bytes = 256; + char canary_token = 8; + char* canary_zone = new char[canary_bytes]; + + memset(canary_zone, canary_token, canary_bytes); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes + (canary_bytes * 2))); + CubDebugExit(cudaMemset(d_temp_storage, canary_token, temp_storage_bytes + (canary_bytes * 2))); + + // Run warmup/correctness iteration + DispatchEven( + Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + ((char *) d_temp_storage) + canary_bytes, temp_storage_bytes, + d_samples, d_histogram, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, row_stride_bytes, + 0, true); + + // Check canary zones + int error = CompareDeviceResults(canary_zone, (char *) d_temp_storage, canary_bytes, true, g_verbose); + AssertEquals(0, error); + error = CompareDeviceResults(canary_zone, ((char *) d_temp_storage) + canary_bytes + temp_storage_bytes, canary_bytes, true, g_verbose); + AssertEquals(0, error); + + // Flush any stdout/stderr + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + fflush(stdout); + fflush(stderr); + + // Check for correctness (and display results, if specified) + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + int channel_error = CompareDeviceResults(h_histogram[channel], d_histogram[channel], num_levels[channel] - 1, true, g_verbose); + printf("\tChannel %d %s", channel, channel_error ? "FAIL" : "PASS\n"); + error |= channel_error; + } + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + + DispatchEven( + Int2Type(), Int2Type(), Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, + d_samples, d_histogram, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, row_stride_bytes, + 0, false); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(total_samples) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = giga_rate * sizeof(SampleT); + printf("\t%.3f avg ms, %.3f billion samples/s, %.3f billion bins/s, %.3f billion pixels/s, %.3f logical GB/s", + avg_millis, + giga_rate, + giga_rate * NUM_ACTIVE_CHANNELS / NUM_CHANNELS, + giga_rate / NUM_CHANNELS, + giga_bandwidth); + } + + printf("\n\n"); + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + if (h_histogram[channel]) + delete[] h_histogram[channel]; + + if (d_histogram[channel]) + CubDebugExit(g_allocator.DeviceFree(d_histogram[channel])); + } + + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, error); +} + + +/** + * Test histogram-even (native pointer input) + */ +template < + Backend BACKEND, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestEvenNative( + LevelT max_level, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes) ///< [in] The number of bytes between starts of consecutive rows in the region of interest +{ + OffsetT total_samples = num_rows * (row_stride_bytes / sizeof(SampleT)); + + // Allocate and initialize host sample data + typedef SampleT Foo; // rename type to quelch gcc warnings (bug?) + SampleT* h_samples = new Foo[total_samples]; + + InitializeSamples( + max_level, entropy_reduction, h_samples, num_row_pixels, num_rows, row_stride_bytes); + + // Allocate and initialize device data + SampleT* d_samples = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * total_samples)); + CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * total_samples, cudaMemcpyHostToDevice)); + + TestEven( + max_level, entropy_reduction, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, row_stride_bytes, + h_samples, d_samples); + + // Cleanup + if (h_samples) delete[] h_samples; + if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); +} + + +/** + * Test histogram-even (native pointer input) + */ +template < + Backend BACKEND, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestEvenIterator( + LevelT max_level, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT lower_level[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + LevelT upper_level[NUM_ACTIVE_CHANNELS], ///< [in] The upper sample value bound (exclusive) for the highest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes) ///< [in] The number of bytes between starts of consecutive rows in the region of interest +{ + SampleT sample = (SampleT) lower_level[0]; + ConstantInputIterator sample_itr(sample); + + TestEven( + max_level, entropy_reduction, num_levels, lower_level, upper_level, + num_row_pixels, num_rows, row_stride_bytes, + sample_itr, sample_itr); + +} + + +/** + * Test histogram-range + */ +template < + Backend BACKEND, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestRange( + LevelT max_level, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], ///< [in] The number of boundaries (levels) for delineating histogram samples in each active channel. Implies that the number of bins for channeli is num_levels[i] - 1. + LevelT* levels[NUM_ACTIVE_CHANNELS], ///< [in] The lower sample value bound (inclusive) for the lowest histogram bin in each active channel. + OffsetT num_row_pixels, ///< [in] The number of multi-channel pixels per row in the region of interest + OffsetT num_rows, ///< [in] The number of rows in the region of interest + OffsetT row_stride_bytes) ///< [in] The number of bytes between starts of consecutive rows in the region of interest +{ + OffsetT total_samples = num_rows * (row_stride_bytes / sizeof(SampleT)); + + printf("\n----------------------------\n"); + printf("%s cub::DeviceHistogramRange %d pixels (%d height, %d width, %d-byte row stride), %d %d-byte %s samples (entropy reduction %d), %s counters, %d/%d channels, max sample ", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == NPP) ? "NPP" : "CUB", + (int) (num_row_pixels * num_rows), + (int) num_rows, + (int) num_row_pixels, + (int) row_stride_bytes, + (int) total_samples, + (int) sizeof(SampleT), + typeid(SampleT).name(), + entropy_reduction, + typeid(CounterT).name(), + NUM_ACTIVE_CHANNELS, + NUM_CHANNELS); + std::cout << CoutCast(max_level) << "\n"; + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + printf("Channel %d: %d bins [", channel, num_levels[channel] - 1); + std::cout << levels[channel][0]; + for (int level = 1; level < num_levels[channel]; ++level) + std::cout << ", " << levels[channel][level]; + printf("]\n"); + } + fflush(stdout); + + // Allocate and initialize host and device data + typedef SampleT Foo; // rename type to quelch gcc warnings (bug?) + SampleT* h_samples = new Foo[total_samples]; + CounterT* h_histogram[NUM_ACTIVE_CHANNELS]; + SearchTransform transform_op[NUM_ACTIVE_CHANNELS]; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + transform_op[channel].levels = levels[channel]; + transform_op[channel].num_levels = num_levels[channel]; + + int bins = num_levels[channel] - 1; + h_histogram[channel] = new CounterT[bins]; + } + + InitializeSamples( + max_level, entropy_reduction, h_samples, num_row_pixels, num_rows, row_stride_bytes); + + InitializeBins( + h_samples, num_levels, transform_op, h_histogram, num_row_pixels, num_rows, row_stride_bytes); + + // Allocate and initialize device data + SampleT* d_samples = NULL; + LevelT* d_levels[NUM_ACTIVE_CHANNELS]; + CounterT* d_histogram[NUM_ACTIVE_CHANNELS]; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_samples, sizeof(SampleT) * total_samples)); + CubDebugExit(cudaMemcpy(d_samples, h_samples, sizeof(SampleT) * total_samples, cudaMemcpyHostToDevice)); + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_levels[channel], sizeof(LevelT) * num_levels[channel])); + CubDebugExit(cudaMemcpy(d_levels[channel], levels[channel], sizeof(LevelT) * num_levels[channel], cudaMemcpyHostToDevice)); + + int bins = num_levels[channel] - 1; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_histogram[channel], sizeof(CounterT) * bins)); + CubDebugExit(cudaMemset(d_histogram[channel], 0, sizeof(CounterT) * bins)); + } + + // Allocate CDP device arrays + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + + DispatchRange( + Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, + d_samples, d_histogram, num_levels, d_levels, + num_row_pixels, num_rows, row_stride_bytes, + 0, true); + + // Allocate temporary storage with "canary" zones + int canary_bytes = 256; + char canary_token = 9; + char* canary_zone = new char[canary_bytes]; + + memset(canary_zone, canary_token, canary_bytes); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes + (canary_bytes * 2))); + CubDebugExit(cudaMemset(d_temp_storage, canary_token, temp_storage_bytes + (canary_bytes * 2))); + + // Run warmup/correctness iteration + DispatchRange( + Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + ((char *) d_temp_storage) + canary_bytes, temp_storage_bytes, + d_samples, d_histogram, num_levels, d_levels, + num_row_pixels, num_rows, row_stride_bytes, + 0, true); + + // Check canary zones + int error = CompareDeviceResults(canary_zone, (char *) d_temp_storage, canary_bytes, true, g_verbose); + AssertEquals(0, error); + error = CompareDeviceResults(canary_zone, ((char *) d_temp_storage) + canary_bytes + temp_storage_bytes, canary_bytes, true, g_verbose); + AssertEquals(0, error); + + // Flush any stdout/stderr + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + fflush(stdout); + fflush(stderr); + + // Check for correctness (and display results, if specified) + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + int channel_error = CompareDeviceResults(h_histogram[channel], d_histogram[channel], num_levels[channel] - 1, true, g_verbose); + printf("\tChannel %d %s", channel, channel_error ? "FAIL" : "PASS\n"); + error |= channel_error; + } + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + + DispatchRange( + Int2Type(), Int2Type(), Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, + d_samples, d_histogram, num_levels, d_levels, + num_row_pixels, num_rows, row_stride_bytes, + 0, false); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(total_samples) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = giga_rate * sizeof(SampleT); + printf("\t%.3f avg ms, %.3f billion samples/s, %.3f billion bins/s, %.3f billion pixels/s, %.3f logical GB/s", + avg_millis, + giga_rate, + giga_rate * NUM_ACTIVE_CHANNELS / NUM_CHANNELS, + giga_rate / NUM_CHANNELS, + giga_bandwidth); + } + + printf("\n\n"); + + // Cleanup + if (h_samples) delete[] h_samples; + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + if (h_histogram[channel]) + delete[] h_histogram[channel]; + + if (d_histogram[channel]) + CubDebugExit(g_allocator.DeviceFree(d_histogram[channel])); + + if (d_levels[channel]) + CubDebugExit(g_allocator.DeviceFree(d_levels[channel])); + } + + if (d_samples) CubDebugExit(g_allocator.DeviceFree(d_samples)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, error); +} + + +/** + * Test histogram-even + */ +template < + Backend BACKEND, + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestEven( + OffsetT num_row_pixels, + OffsetT num_rows, + OffsetT row_stride_bytes, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], + LevelT max_level, + int max_num_levels) +{ + LevelT lower_level[NUM_ACTIVE_CHANNELS]; + LevelT upper_level[NUM_ACTIVE_CHANNELS]; + + // Find smallest level increment + int max_bins = max_num_levels - 1; + LevelT min_level_increment = max_level / max_bins; + + // Set upper and lower levels for each channel + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + int num_bins = num_levels[channel] - 1; + lower_level[channel] = (max_level - (num_bins * min_level_increment)) / 2; + upper_level[channel] = (max_level + (num_bins * min_level_increment)) / 2; + } + + // Test pointer-based samples + TestEvenNative( + max_level, entropy_reduction, num_levels, lower_level, upper_level, num_row_pixels, num_rows, row_stride_bytes); + + // Test iterator-based samples (CUB-only) + TestEvenIterator( + max_level, entropy_reduction, num_levels, lower_level, upper_level, num_row_pixels, num_rows, row_stride_bytes); +} + + + +/** + * Test histogram-range + */ +template < + Backend BACKEND, + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestRange( + OffsetT num_row_pixels, + OffsetT num_rows, + OffsetT row_stride_bytes, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], + LevelT max_level, + int max_num_levels) +{ + // Find smallest level increment + int max_bins = max_num_levels - 1; + LevelT min_level_increment = max_level / max_bins; + + LevelT* levels[NUM_ACTIVE_CHANNELS]; + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + levels[channel] = new LevelT[num_levels[channel]]; + + int num_bins = num_levels[channel] - 1; + LevelT lower_level = (max_level - (num_bins * min_level_increment)) / 2; + + for (int level = 0; level < num_levels[channel]; ++level) + levels[channel][level] = lower_level + (level * min_level_increment); + } + + TestRange( + max_level, entropy_reduction, num_levels, levels, num_row_pixels, num_rows, row_stride_bytes); + + for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) + delete[] levels[channel]; + +} + + + +/** + * Test different entrypoints + */ +template < + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void Test( + OffsetT num_row_pixels, + OffsetT num_rows, + OffsetT row_stride_bytes, + int entropy_reduction, + int num_levels[NUM_ACTIVE_CHANNELS], + LevelT max_level, + int max_num_levels) +{ + TestEven( + num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, max_num_levels); + + TestRange( + num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, max_num_levels); +} + + +/** + * Test different number of levels + */ +template < + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void Test( + OffsetT num_row_pixels, + OffsetT num_rows, + OffsetT row_stride_bytes, + int entropy_reduction, + LevelT max_level, + int max_num_levels) +{ + int num_levels[NUM_ACTIVE_CHANNELS]; + +// Unnecessary testing +// // All the same level +// for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel) +// { +// num_levels[channel] = max_num_levels; +// } +// Test( +// num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, max_num_levels); + + // All different levels + num_levels[0] = max_num_levels; + for (int channel = 1; channel < NUM_ACTIVE_CHANNELS; ++channel) + { + num_levels[channel] = (num_levels[channel - 1] / 2) + 1; + } + Test( + num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, max_num_levels); +} + + + +/** + * Test different entropy-levels + */ +template < + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void Test( + OffsetT num_row_pixels, + OffsetT num_rows, + OffsetT row_stride_bytes, + LevelT max_level, + int max_num_levels) +{ + Test( + num_row_pixels, num_rows, row_stride_bytes, 0, max_level, max_num_levels); + + Test( + num_row_pixels, num_rows, row_stride_bytes, -1, max_level, max_num_levels); + + Test( + num_row_pixels, num_rows, row_stride_bytes, 5, max_level, max_num_levels); +} + + +/** + * Test different row strides + */ +template < + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void Test( + OffsetT num_row_pixels, + OffsetT num_rows, + LevelT max_level, + int max_num_levels) +{ + OffsetT row_stride_bytes = num_row_pixels * NUM_CHANNELS * sizeof(SampleT); + + // No padding + Test( + num_row_pixels, num_rows, row_stride_bytes, max_level, max_num_levels); + + // 13 samples padding + Test( + num_row_pixels, num_rows, row_stride_bytes + (13 * sizeof(SampleT)), max_level, max_num_levels); +} + + +/** + * Test different problem sizes + */ +template < + typename SampleT, + int NUM_CHANNELS, + int NUM_ACTIVE_CHANNELS, + typename CounterT, + typename LevelT, + typename OffsetT> +void Test( + LevelT max_level, + int max_num_levels) +{ + // 0 row/col images + Test( + OffsetT(1920), OffsetT(0), max_level, max_num_levels); + Test( + OffsetT(0), OffsetT(0), max_level, max_num_levels); + + // 1080 image + Test( + OffsetT(1920), OffsetT(1080), max_level, max_num_levels); + + // Sample different aspect ratios sizes + for (OffsetT rows = 1; rows < 1000000; rows *= 1000) + { + for (OffsetT cols = 1; cols < (1000000 / rows); cols *= 1000) + { + Test( + cols, rows, max_level, max_num_levels); + } + } + + // Randomly select linear problem size between 1:10,000,000 + unsigned int max_int = (unsigned int) -1; + for (int i = 0; i < 4; ++i) + { + unsigned int num_items; + RandomBits(num_items); + num_items = (unsigned int) ((double(num_items) * double(10000000)) / double(max_int)); + num_items = CUB_MAX(1, num_items); + + Test( + OffsetT(num_items), 1, max_level, max_num_levels); + } +} + + + +/** + * Test different channel interleavings (valid specialiation) + */ +template < + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestChannels( + LevelT max_level, + int max_num_levels, + Int2Type is_valid_tag) +{ + Test(max_level, max_num_levels); + Test(max_level, max_num_levels); + Test(max_level, max_num_levels); + Test(max_level, max_num_levels); +} + + +/** + * Test different channel interleavings (invalid specialiation) + */ +template < + typename SampleT, + typename CounterT, + typename LevelT, + typename OffsetT> +void TestChannels( + LevelT max_level, + int max_num_levels, + Int2Type is_valid_tag) +{} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + + + + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_row_pixels = -1; + int entropy_reduction = 0; + int num_rows = 1; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + g_verbose_input = args.CheckCmdLineFlag("v2"); + args.GetCmdLineArgument("n", num_row_pixels); + + int row_stride_pixels = num_row_pixels; + + args.GetCmdLineArgument("rows", num_rows); + args.GetCmdLineArgument("stride", row_stride_pixels); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("entropy", entropy_reduction); +#if defined(QUICK_TEST) || defined(QUICKER_TEST) + bool compare_npp = args.CheckCmdLineFlag("npp"); +#endif + + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--rows= " + "[--stride= " + "[--i= " + "[--device=] " + "[--repeat=]" + "[--entropy=]" + "[--v] " + "[--cdp]" + "[--npp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + + if (num_row_pixels < 0) + { + num_row_pixels = 1920 * 1080; + row_stride_pixels = num_row_pixels; + } + +#if defined(QUICKER_TEST) + + // Compile/run quick tests + { + // HistogramEven: unsigned char 256 bins + typedef unsigned char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[1] = {257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + if (compare_npp) + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + +#elif defined(QUICK_TEST) + + // Compile/run quick tests + { + // HistogramEven: unsigned char 256 bins + typedef unsigned char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[1] = {257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + if (compare_npp) + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: 4/4 multichannel Unsigned char 256 bins + typedef unsigned char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[4] = {257, 257, 257, 257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 4; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: 3/4 multichannel Unsigned char 256 bins + typedef unsigned char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[3] = {257, 257, 257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 4; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + if (compare_npp) + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: short [0,1024] 256 bins + typedef unsigned short SampleT; + typedef unsigned short LevelT; + + LevelT max_level = 1024; + int num_levels[1] = {257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: float [0,1.0] 256 bins + typedef float SampleT; + typedef float LevelT; + + LevelT max_level = 1.0; + int num_levels[1] = {257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: 3/4 multichannel float [0,1.0] 256 bins + typedef float SampleT; + typedef float LevelT; + + LevelT max_level = 1.0; + int num_levels[3] = {257, 257, 257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 4; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramRange: signed char 256 bins + typedef signed char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[1] = {257}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestRange(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramRange: 3/4 channel, unsigned char, varied bins (256, 128, 64) + typedef unsigned char SampleT; + typedef int LevelT; + + LevelT max_level = 256; + int num_levels[3] = {257, 129, 65}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 4; + + TestRange(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + { + // HistogramEven: double [0,1.0] 64 bins + typedef double SampleT; + typedef double LevelT; + + LevelT max_level = 1.0; + int num_levels[1] = {65}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + + { + // HistogramEven: short [0,1024] 512 bins + typedef unsigned short SampleT; + typedef unsigned short LevelT; + + LevelT max_level = 1024; + int num_levels[1] = {513}; + int row_stride_bytes = sizeof(SampleT) * row_stride_pixels * 1; + + TestEven(num_row_pixels, num_rows, row_stride_bytes, entropy_reduction, num_levels, max_level, num_levels[0]); + } + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + TestChannels (256, 256 + 1, Int2Type()); + TestChannels (256, 256 + 1, Int2Type()); + TestChannels (128, 128 + 1, Int2Type()); + TestChannels (8192, 8192 + 1, Int2Type()); + TestChannels (1.0, 256 + 1, Int2Type()); + + // Test down-conversion of size_t offsets to int + TestChannels (256, 256 + 1, Int2Type<(sizeof(size_t) != sizeof(int))>()); + } + +#endif + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_radix_sort.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_radix_sort.cu new file mode 100644 index 0000000..b2e387f --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_radix_sort.cu @@ -0,0 +1,1298 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceRadixSort utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#if (__CUDACC_VER_MAJOR__ >= 9) + #include +#endif + +#include +#include +#include + +#include "test_util.h" + +#include +#include +#include + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method (allows overwriting of input) + CUB_NO_OVERWRITE, // CUB method (disallows overwriting of input) + + CUB_SEGMENTED, // CUB method (allows overwriting of input) + CUB_SEGMENTED_NO_OVERWRITE, // CUB method (disallows overwriting of input) + + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +//--------------------------------------------------------------------- +// Dispatch to different DeviceRadixSort entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to CUB sorting entrypoint (specialized for ascending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + return DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, begin_bit, end_bit, stream, debug_synchronous); +} + +/** + * Dispatch to CUB_NO_OVERWRITE sorting entrypoint (specialized for ascending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + KeyT const *const_keys_itr = d_keys.Current(); + ValueT const *const_values_itr = d_values.Current(); + + cudaError_t retval = DeviceRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, + const_keys_itr, d_keys.Alternate(), const_values_itr, d_values.Alternate(), + num_items, begin_bit, end_bit, stream, debug_synchronous); + + d_keys.selector ^= 1; + d_values.selector ^= 1; + return retval; +} + +/** + * Dispatch to CUB sorting entrypoint (specialized for descending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + return DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, begin_bit, end_bit, stream, debug_synchronous); +} + + +/** + * Dispatch to CUB_NO_OVERWRITE sorting entrypoint (specialized for descending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + KeyT const *const_keys_itr = d_keys.Current(); + ValueT const *const_values_itr = d_values.Current(); + + cudaError_t retval = DeviceRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, + const_keys_itr, d_keys.Alternate(), const_values_itr, d_values.Alternate(), + num_items, begin_bit, end_bit, stream, debug_synchronous); + + d_keys.selector ^= 1; + d_values.selector ^= 1; + return retval; +} + +//--------------------------------------------------------------------- +// Dispatch to different DeviceRadixSort entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to CUB_SEGMENTED sorting entrypoint (specialized for ascending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + return DeviceSegmentedRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, num_segments, d_segment_offsets, d_segment_offsets + 1, + begin_bit, end_bit, stream, debug_synchronous); +} + +/** + * Dispatch to CUB_SEGMENTED_NO_OVERWRITE sorting entrypoint (specialized for ascending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + KeyT const *const_keys_itr = d_keys.Current(); + ValueT const *const_values_itr = d_values.Current(); + + cudaError_t retval = DeviceSegmentedRadixSort::SortPairs( + d_temp_storage, temp_storage_bytes, + const_keys_itr, d_keys.Alternate(), const_values_itr, d_values.Alternate(), + num_items, num_segments, d_segment_offsets, d_segment_offsets + 1, + begin_bit, end_bit, stream, debug_synchronous); + + d_keys.selector ^= 1; + d_values.selector ^= 1; + return retval; +} + + +/** + * Dispatch to CUB_SEGMENTED sorting entrypoint (specialized for descending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + return DeviceSegmentedRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, + d_keys, d_values, + num_items, num_segments, d_segment_offsets, d_segment_offsets + 1, + begin_bit, end_bit, stream, debug_synchronous); +} + +/** + * Dispatch to CUB_SEGMENTED_NO_OVERWRITE sorting entrypoint (specialized for descending) + */ +template +CUB_RUNTIME_FUNCTION +__forceinline__ +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + KeyT const *const_keys_itr = d_keys.Current(); + ValueT const *const_values_itr = d_values.Current(); + + cudaError_t retval = DeviceSegmentedRadixSort::SortPairsDescending( + d_temp_storage, temp_storage_bytes, + const_keys_itr, d_keys.Alternate(), const_values_itr, d_values.Alternate(), + num_items, num_segments, d_segment_offsets, d_segment_offsets + 1, + begin_bit, end_bit, stream, debug_synchronous); + + d_keys.selector ^= 1; + d_values.selector ^= 1; + return retval; +} + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch keys-only to Thrust sorting entrypoint + */ +template +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_keys_wrapper(d_keys.Current()); + + if (IS_DESCENDING) thrust::reverse(d_keys_wrapper, d_keys_wrapper + num_items); + thrust::sort(d_keys_wrapper, d_keys_wrapper + num_items); + if (IS_DESCENDING) thrust::reverse(d_keys_wrapper, d_keys_wrapper + num_items); + } + + return cudaSuccess; +} + + +/** + * Dispatch key-value pairs to Thrust sorting entrypoint + */ +template +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_keys_wrapper(d_keys.Current()); + thrust::device_ptr d_values_wrapper(d_values.Current()); + + if (IS_DESCENDING) { + thrust::reverse(d_keys_wrapper, d_keys_wrapper + num_items); + thrust::reverse(d_values_wrapper, d_values_wrapper + num_items); + } + + thrust::sort_by_key(d_keys_wrapper, d_keys_wrapper + num_items, d_values_wrapper); + + if (IS_DESCENDING) { + thrust::reverse(d_keys_wrapper, d_keys_wrapper + num_items); + thrust::reverse(d_values_wrapper, d_values_wrapper + num_items); + } + } + + return cudaSuccess; +} + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceRadixSort + */ +template +__global__ void CnpDispatchKernel( + Int2Type is_descending, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t temp_storage_bytes, + DoubleBuffer d_keys, + DoubleBuffer d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + bool debug_synchronous) +{ +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch( + is_descending, Int2Type(), d_selector, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_keys, d_values, + num_items, num_segments, d_segment_offsets, + begin_bit, end_bit, 0, debug_synchronous); + *d_temp_storage_bytes = temp_storage_bytes; + *d_selector = d_keys.selector; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template +cudaError_t Dispatch( + Int2Type is_descending, + Int2Type dispatch_to, + int *d_selector, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + DoubleBuffer &d_keys, + DoubleBuffer &d_values, + int num_items, + int num_segments, + const int *d_segment_offsets, + int begin_bit, + int end_bit, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>( + is_descending, d_selector, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_keys, d_values, + num_items, num_segments, d_segment_offsets, + begin_bit, end_bit, debug_synchronous); + + // Copy out selector + CubDebugExit(cudaMemcpy(&d_keys.selector, d_selector, sizeof(int) * 1, cudaMemcpyDeviceToHost)); + d_values.selector = d_keys.selector; + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Problem generation +//--------------------------------------------------------------------- + + +/** + * Simple key-value pairing + */ +template < + typename KeyT, + typename ValueT, + bool IS_FLOAT = (Traits::CATEGORY == FLOATING_POINT)> +struct Pair +{ + KeyT key; + ValueT value; + + bool operator<(const Pair &b) const + { + return (key < b.key); + } +}; + + +/** + * Simple key-value pairing (specialized for bool types) + */ +template +struct Pair +{ + bool key; + ValueT value; + + bool operator<(const Pair &b) const + { + return (!key && b.key); + } +}; + + +/** + * Simple key-value pairing (specialized for floating point types) + */ +template +struct Pair +{ + KeyT key; + ValueT value; + + bool operator<(const Pair &b) const + { + if (key < b.key) + return true; + + if (key > b.key) + return false; + + // KeyT in unsigned bits + typedef typename Traits::UnsignedBits UnsignedBits; + + // Return true if key is negative zero and b.key is positive zero + UnsignedBits key_bits = *reinterpret_cast(const_cast(&key)); + UnsignedBits b_key_bits = *reinterpret_cast(const_cast(&b.key)); + UnsignedBits HIGH_BIT = Traits::HIGH_BIT; + + return ((key_bits & HIGH_BIT) != 0) && ((b_key_bits & HIGH_BIT) == 0); + } +}; + + +/** + * Initialize key data + */ +template +void InitializeKeyBits( + GenMode gen_mode, + KeyT *h_keys, + int num_items, + int entropy_reduction) +{ + for (int i = 0; i < num_items; ++i) + InitValue(gen_mode, h_keys[i], i); +} + + +/** + * Initialize solution + */ +template +void InitializeSolution( + KeyT *h_keys, + int num_items, + int num_segments, + int *h_segment_offsets, + int begin_bit, + int end_bit, + int *&h_reference_ranks, + KeyT *&h_reference_keys) +{ + typedef Pair PairT; + + PairT *h_pairs = new PairT[num_items]; + + int num_bits = end_bit - begin_bit; + for (int i = 0; i < num_items; ++i) + { + + // Mask off unwanted portions + if (num_bits < sizeof(KeyT) * 8) + { + unsigned long long base = 0; + memcpy(&base, &h_keys[i], sizeof(KeyT)); + base &= ((1ull << num_bits) - 1) << begin_bit; + memcpy(&h_pairs[i].key, &base, sizeof(KeyT)); + } + else + { + h_pairs[i].key = h_keys[i]; + } + + h_pairs[i].value = i; + } + + printf("\nSorting reference solution on CPU (%d segments)...", num_segments); fflush(stdout); + + for (int i = 0; i < num_segments; ++i) + { + if (IS_DESCENDING) std::reverse(h_pairs + h_segment_offsets[i], h_pairs + h_segment_offsets[i + 1]); + std::stable_sort( h_pairs + h_segment_offsets[i], h_pairs + h_segment_offsets[i + 1]); + if (IS_DESCENDING) std::reverse(h_pairs + h_segment_offsets[i], h_pairs + h_segment_offsets[i + 1]); + } + + printf(" Done.\n"); fflush(stdout); + + h_reference_ranks = new int[num_items]; + h_reference_keys = new KeyT[num_items]; + + for (int i = 0; i < num_items; ++i) + { + h_reference_ranks[i] = h_pairs[i].value; + h_reference_keys[i] = h_keys[h_pairs[i].value]; + } + + if (h_pairs) delete[] h_pairs; +} + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Test DeviceRadixSort + */ +template < + Backend BACKEND, + bool IS_DESCENDING, + typename KeyT, + typename ValueT> +void Test( + KeyT *h_keys, + ValueT *h_values, + int num_items, + int num_segments, + int *h_segment_offsets, + int begin_bit, + int end_bit, + KeyT *h_reference_keys, + ValueT *h_reference_values) +{ + // Key alias type +#if (__CUDACC_VER_MAJOR__ >= 9) + typedef typename If::VALUE, __half, KeyT>::Type KeyAliasT; +#else + typedef KeyT KeyAliasT; +#endif + + const bool KEYS_ONLY = Equals::VALUE; + + printf("%s %s cub::DeviceRadixSort %d items, %d segments, %d-byte keys (%s) %d-byte values (%s), descending %d, begin_bit %d, end_bit %d\n", + (BACKEND == CUB_NO_OVERWRITE) ? "CUB_NO_OVERWRITE" : (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + (KEYS_ONLY) ? "keys-only" : "key-value", + num_items, num_segments, + (int) sizeof(KeyT), typeid(KeyT).name(), (KEYS_ONLY) ? 0 : (int) sizeof(ValueT), typeid(ValueT).name(), + IS_DESCENDING, begin_bit, end_bit); + fflush(stdout); + + if (g_verbose) + { + printf("Input keys:\n"); + DisplayResults(h_keys, num_items); + printf("\n\n"); + } + + // Allocate device arrays + DoubleBuffer d_keys; + DoubleBuffer d_values; + int *d_selector; + int *d_segment_offsets; + size_t *d_temp_storage_bytes; + cudaError_t *d_cdp_error; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[0], sizeof(KeyT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys.d_buffers[1], sizeof(KeyT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_selector, sizeof(int) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_segment_offsets, sizeof(int) * (num_segments + 1))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + if (!KEYS_ONLY) + { + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[0], sizeof(ValueT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values.d_buffers[1], sizeof(ValueT) * num_items)); + } + + // Allocate temporary storage (and make it un-aligned) + size_t temp_storage_bytes = 0; + void *d_temp_storage = NULL; + CubDebugExit(Dispatch( + Int2Type(), Int2Type(), d_selector, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_keys, d_values, + num_items, num_segments, d_segment_offsets, + begin_bit, end_bit, 0, true)); + + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes + 1)); + void* mis_aligned_temp = static_cast(d_temp_storage) + 1; + + // Initialize/clear device arrays + d_keys.selector = 0; + CubDebugExit(cudaMemcpy(d_keys.d_buffers[0], h_keys, sizeof(KeyT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_keys.d_buffers[1], 0, sizeof(KeyT) * num_items)); + if (!KEYS_ONLY) + { + d_values.selector = 0; + CubDebugExit(cudaMemcpy(d_values.d_buffers[0], h_values, sizeof(ValueT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_values.d_buffers[1], 0, sizeof(ValueT) * num_items)); + } + CubDebugExit(cudaMemcpy(d_segment_offsets, h_segment_offsets, sizeof(int) * (num_segments + 1), cudaMemcpyHostToDevice)); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch( + Int2Type(), Int2Type(), d_selector, d_temp_storage_bytes, d_cdp_error, + mis_aligned_temp, temp_storage_bytes, d_keys, d_values, + num_items, num_segments, d_segment_offsets, + begin_bit, end_bit, 0, true)); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Check for correctness (and display results, if specified) + printf("Warmup done. Checking results:\n"); fflush(stdout); + int compare = CompareDeviceResults(h_reference_keys, reinterpret_cast(d_keys.Current()), num_items, true, g_verbose); + printf("\t Compare keys (selector %d): %s ", d_keys.selector, compare ? "FAIL" : "PASS"); fflush(stdout); + if (!KEYS_ONLY) + { + int values_compare = CompareDeviceResults(h_reference_values, d_values.Current(), num_items, true, g_verbose); + compare |= values_compare; + printf("\t Compare values (selector %d): %s ", d_values.selector, values_compare ? "FAIL" : "PASS"); fflush(stdout); + } + if (BACKEND == CUB_NO_OVERWRITE) + { + // Check that input isn't overwritten + int input_compare = CompareDeviceResults(h_keys, reinterpret_cast(d_keys.d_buffers[0]), num_items, true, g_verbose); + compare |= input_compare; + printf("\t Compare input keys: %s ", input_compare ? "FAIL" : "PASS"); fflush(stdout); + } + + // Performance + if (g_timing_iterations) + printf("\nPerforming timing iterations:\n"); fflush(stdout); + + GpuTimer gpu_timer; + float elapsed_millis = 0.0f; + for (int i = 0; i < g_timing_iterations; ++i) + { + // Initialize/clear device arrays + CubDebugExit(cudaMemcpy(d_keys.d_buffers[d_keys.selector], h_keys, sizeof(KeyT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_keys.d_buffers[d_keys.selector ^ 1], 0, sizeof(KeyT) * num_items)); + if (!KEYS_ONLY) + { + CubDebugExit(cudaMemcpy(d_values.d_buffers[d_values.selector], h_values, sizeof(ValueT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_values.d_buffers[d_values.selector ^ 1], 0, sizeof(ValueT) * num_items)); + } + + gpu_timer.Start(); + CubDebugExit(Dispatch( + Int2Type(), Int2Type(), d_selector, d_temp_storage_bytes, d_cdp_error, + mis_aligned_temp, temp_storage_bytes, d_keys, d_values, + num_items, num_segments, d_segment_offsets, + begin_bit, end_bit, 0, false)); + gpu_timer.Stop(); + elapsed_millis += gpu_timer.ElapsedMillis(); + } + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = (KEYS_ONLY) ? + giga_rate * sizeof(KeyT) * 2 : + giga_rate * (sizeof(KeyT) + sizeof(ValueT)) * 2; + printf("\n%.3f elapsed ms, %.3f avg ms, %.3f billion items/s, %.3f logical GB/s", elapsed_millis, avg_millis, giga_rate, giga_bandwidth); + } + + printf("\n\n"); + + // Cleanup + if (d_keys.d_buffers[0]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[0])); + if (d_keys.d_buffers[1]) CubDebugExit(g_allocator.DeviceFree(d_keys.d_buffers[1])); + if (d_values.d_buffers[0]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[0])); + if (d_values.d_buffers[1]) CubDebugExit(g_allocator.DeviceFree(d_values.d_buffers[1])); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_selector) CubDebugExit(g_allocator.DeviceFree(d_selector)); + if (d_segment_offsets) CubDebugExit(g_allocator.DeviceFree(d_segment_offsets)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + + // Correctness asserts + AssertEquals(0, compare); +} + + +/** + * Test backend + */ +template +void TestBackend( + KeyT *h_keys, + int num_items, + int num_segments, + int *h_segment_offsets, + int begin_bit, + int end_bit, + KeyT *h_reference_keys, + int *h_reference_ranks) +{ + const bool KEYS_ONLY = Equals::VALUE; + + ValueT *h_values = NULL; + ValueT *h_reference_values = NULL; + + if (!KEYS_ONLY) + { + h_values = new ValueT[num_items]; + h_reference_values = new ValueT[num_items]; + + for (int i = 0; i < num_items; ++i) + { + InitValue(INTEGER_SEED, h_values[i], i); + InitValue(INTEGER_SEED, h_reference_values[i], h_reference_ranks[i]); + } + } + +#ifdef SEGMENTED_SORT + // Test multi-segment implementations + Test( h_keys, h_values, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_values); + Test( h_keys, h_values, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_values); +#else // SEGMENTED_SORT + if (num_segments == 1) + { + // Test single-segment implementations + Test( h_keys, h_values, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_values); + Test( h_keys, h_values, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_values); + #ifdef CUB_CDP + Test( h_keys, h_values, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_values); + #endif + } +#endif // SEGMENTED_SORT + + if (h_values) delete[] h_values; + if (h_reference_values) delete[] h_reference_values; +} + + + + +/** + * Test value type + */ +template +void TestValueTypes( + KeyT *h_keys, + int num_items, + int num_segments, + int *h_segment_offsets, + int begin_bit, + int end_bit) +{ + // Initialize the solution + + int *h_reference_ranks = NULL; + KeyT *h_reference_keys = NULL; + InitializeSolution(h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_ranks, h_reference_keys); + + // Test keys-only + TestBackend (h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_ranks); + + // Test with 8b value + TestBackend (h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_ranks); + + // Test with 32b value + TestBackend (h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_ranks); + + // Test with 64b value + TestBackend(h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_ranks); + + // Test with non-trivially-constructable value + TestBackend (h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit, h_reference_keys, h_reference_ranks); + + // Cleanup + if (h_reference_ranks) delete[] h_reference_ranks; + if (h_reference_keys) delete[] h_reference_keys; +} + + + +/** + * Test ascending/descending + */ +template +void TestDirection( + KeyT *h_keys, + int num_items, + int num_segments, + int *h_segment_offsets, + int begin_bit, + int end_bit) +{ + TestValueTypes(h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit); + TestValueTypes(h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit); +} + + +/** + * Test different bit ranges + */ +template +void TestBits( + KeyT *h_keys, + int num_items, + int num_segments, + int *h_segment_offsets) +{ + // Don't test partial-word sorting for boolean, fp, or signed types (the bit-flipping techniques get in the way) + if ((Traits::CATEGORY == UNSIGNED_INTEGER) && (!Equals::VALUE)) + { + // Partial bits + int begin_bit = 1; + int end_bit = (sizeof(KeyT) * 8) - 1; + printf("Testing key bits [%d,%d)\n", begin_bit, end_bit); fflush(stdout); + TestDirection(h_keys, num_items, num_segments, h_segment_offsets, begin_bit, end_bit); + + // Across subword boundaries + int mid_bit = sizeof(KeyT) * 4; + printf("Testing key bits [%d,%d)\n", mid_bit - 1, mid_bit + 1); fflush(stdout); + TestDirection(h_keys, num_items, num_segments, h_segment_offsets, mid_bit - 1, mid_bit + 1); + } + + printf("Testing key bits [%d,%d)\n", 0, int(sizeof(KeyT)) * 8); fflush(stdout); + TestDirection(h_keys, num_items, num_segments, h_segment_offsets, 0, sizeof(KeyT) * 8); +} + + +/** + * Test different segment compositions + */ +template +void TestSegments( + KeyT *h_keys, + int num_items, + int max_segments) +{ + int *h_segment_offsets = new int[max_segments + 1]; + +#ifdef SEGMENTED_SORT + for (int num_segments = max_segments; num_segments > 1; num_segments = (num_segments + 32 - 1) / 32) + { + if (num_items / num_segments < 128 * 1000) { + // Right now we assign a single thread block to each segment, so lets keep it to under 128K items per segment + InitializeSegments(num_items, num_segments, h_segment_offsets); + TestBits(h_keys, num_items, num_segments, h_segment_offsets); + } + } +#else + // Test single segment + if (num_items < 128 * 1000) { + // Right now we assign a single thread block to each segment, so lets keep it to under 128K items per segment + InitializeSegments(num_items, 1, h_segment_offsets); + TestBits(h_keys, num_items, 1, h_segment_offsets); + } +#endif + if (h_segment_offsets) delete[] h_segment_offsets; +} + + +/** + * Test different (sub)lengths and number of segments + */ +template +void TestSizes( + KeyT *h_keys, + int max_items, + int max_segments) +{ + for (int num_items = max_items; num_items > 1; num_items = (num_items + 32 - 1) / 32) + { + TestSegments(h_keys, num_items, max_segments); + } + TestSegments(h_keys, 1, max_segments); + TestSegments(h_keys, 0, max_segments); +} + + +/** + * Test key sampling distributions + */ +template +void TestGen( + int max_items, + int max_segments) +{ + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + + if (max_items < 0) + max_items = (ptx_version > 100) ? 9000003 : max_items = 5000003; + + if (max_segments < 0) + max_segments = 5003; + + KeyT *h_keys = new KeyT[max_items]; + + for (int entropy_reduction = 0; entropy_reduction <= 6; entropy_reduction += 3) + { + printf("\nTesting random %s keys with entropy reduction factor %d\n", typeid(KeyT).name(), entropy_reduction); fflush(stdout); + InitializeKeyBits(RANDOM, h_keys, max_items, entropy_reduction); + TestSizes(h_keys, max_items, max_segments); + } + + printf("\nTesting uniform %s keys\n", typeid(KeyT).name()); fflush(stdout); + InitializeKeyBits(UNIFORM, h_keys, max_items, 0); + TestSizes(h_keys, max_items, max_segments); + + printf("\nTesting natural number %s keys\n", typeid(KeyT).name()); fflush(stdout); + InitializeKeyBits(INTEGER_SEED, h_keys, max_items, 0); + TestSizes(h_keys, max_items, max_segments); + + if (h_keys) delete[] h_keys; +} + + +//--------------------------------------------------------------------- +// Simple test +//--------------------------------------------------------------------- + +template < + Backend BACKEND, + typename KeyT, + typename ValueT, + bool IS_DESCENDING> +void Test( + int num_items, + int num_segments, + GenMode gen_mode, + int entropy_reduction, + int begin_bit, + int end_bit) +{ + const bool KEYS_ONLY = Equals::VALUE; + + KeyT *h_keys = new KeyT[num_items]; + int *h_reference_ranks = NULL; + KeyT *h_reference_keys = NULL; + ValueT *h_values = NULL; + ValueT *h_reference_values = NULL; + int *h_segment_offsets = new int[num_segments + 1]; + + if (end_bit < 0) + end_bit = sizeof(KeyT) * 8; + + InitializeKeyBits(gen_mode, h_keys, num_items, entropy_reduction); + InitializeSegments(num_items, num_segments, h_segment_offsets); + InitializeSolution( + h_keys, num_items, num_segments, h_segment_offsets, + begin_bit, end_bit, h_reference_ranks, h_reference_keys); + + if (!KEYS_ONLY) + { + h_values = new ValueT[num_items]; + h_reference_values = new ValueT[num_items]; + + for (int i = 0; i < num_items; ++i) + { + InitValue(INTEGER_SEED, h_values[i], i); + InitValue(INTEGER_SEED, h_reference_values[i], h_reference_ranks[i]); + } + } + if (h_reference_ranks) delete[] h_reference_ranks; + + printf("\nTesting bits [%d,%d) of %s keys with gen-mode %d\n", begin_bit, end_bit, typeid(KeyT).name(), gen_mode); fflush(stdout); + Test( + h_keys, h_values, + num_items, num_segments, h_segment_offsets, + begin_bit, end_bit, h_reference_keys, h_reference_values); + + if (h_keys) delete[] h_keys; + if (h_reference_keys) delete[] h_reference_keys; + if (h_values) delete[] h_values; + if (h_reference_values) delete[] h_reference_values; + if (h_segment_offsets) delete[] h_segment_offsets; +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int bits = -1; + int num_items = -1; + int num_segments = -1; + int entropy_reduction = 0; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("s", num_segments); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("bits", bits); + args.GetCmdLineArgument("entropy", entropy_reduction); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--bits=]" + "[--n= " + "[--s= " + "[--i= " + "[--device=] " + "[--repeat=]" + "[--v] " + "[--entropy=]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef QUICKER_TEST + + enum { + IS_DESCENDING = false + }; + + // Compile/run basic CUB test + if (num_items < 0) num_items = 48000000; + if (num_segments < 0) num_segments = 5000; + + Test(num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test(num_items, 1, RANDOM, entropy_reduction, 0, bits); + +#if (__CUDACC_VER_MAJOR__ >= 9) + Test( num_items, 1, RANDOM, entropy_reduction, 0, bits); +#endif + + Test( num_items, num_segments, RANDOM, entropy_reduction, 0, bits); + + Test( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test( num_items, 1, RANDOM, entropy_reduction, 0, bits); + + Test( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test( num_items, 1, RANDOM, entropy_reduction, 0, bits); + +#elif defined(QUICK_TEST) + + // Compile/run quick tests + if (num_items < 0) num_items = 48000000; + if (num_segments < 0) num_segments = 5000; + + // Compare CUB and thrust on 32b keys-only + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + + // Compare CUB and thrust on 64b keys-only + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + + + // Compare CUB and thrust on 32b key-value pairs + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + + // Compare CUB and thrust on 64b key-value pairs + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + Test ( num_items, 1, RANDOM, entropy_reduction, 0, bits); + + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + TestGen (num_items, num_segments); + + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + + TestGen (num_items, num_segments); + TestGen (num_items, num_segments); + +#if (__CUDACC_VER_MAJOR__ >= 9) + TestGen (num_items, num_segments); +#endif + TestGen (num_items, num_segments); + + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + TestGen (num_items, num_segments); + + } + +#endif + + return 0; +} + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce.cu new file mode 100644 index 0000000..275d8e1 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce.cu @@ -0,0 +1,1359 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceReduce utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +int g_ptx_version; +int g_sm_count; +bool g_verbose = false; +bool g_verbose_input = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + +// Dispatch types +enum Backend +{ + CUB, // CUB method + CUB_SEGMENTED, // CUB segmented method + CUB_CDP, // GPU-based (dynamic parallelism) dispatch to CUB method + THRUST, // Thrust method +}; + + +// Custom max functor +struct CustomMax +{ + /// Boolean max operator, returns (a > b) ? a : b + template + __host__ __device__ __forceinline__ OutputT operator()(const OutputT &a, const OutputT &b) + { + return CUB_MAX(a, b); + } +}; + + +//--------------------------------------------------------------------- +// Dispatch to different CUB DeviceReduce entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to reduce entrypoint (custom-max) + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Max-identity + OutputT identity = Traits::Lowest(); // replace with std::numeric_limits::lowest() when C++ support is more prevalent + + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::Reduce(d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, reduction_op, identity, + stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + +/** + * Dispatch to sum entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Sum reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::Sum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + +/** + * Dispatch to min entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Min reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::Min(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + +/** + * Dispatch to max entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Max reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::Max(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + +/** + * Dispatch to argmin entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::ArgMin reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::ArgMin(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + +/** + * Dispatch to argmax entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::ArgMax reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::ArgMax(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + + printf("\t timing_timing_iterations: %d, temp_storage_bytes: %lld\n", + timing_timing_iterations, temp_storage_bytes); + + return error; +} + + +//--------------------------------------------------------------------- +// Dispatch to different CUB DeviceSegmentedReduce entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to reduce entrypoint (custom-max) + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + // Max-identity + OutputT identity = Traits::Lowest(); // replace with std::numeric_limits::lowest() when C++ support is more prevalent + + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::Reduce(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, reduction_op, identity, + stream, debug_synchronous); + } + return error; +} + +/** + * Dispatch to sum entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Sum reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::Sum(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, + stream, debug_synchronous); + } + return error; +} + +/** + * Dispatch to min entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Min reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::Min(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, + stream, debug_synchronous); + } + return error; +} + +/** + * Dispatch to max entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::Max reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::Max(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, + stream, debug_synchronous); + } + return error; +} + +/** + * Dispatch to argmin entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::ArgMin reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::ArgMin(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, + stream, debug_synchronous); + } + return error; +} + +/** + * Dispatch to argmax entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + cub::ArgMax reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to device reduction directly + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSegmentedReduce::ArgMax(d_temp_storage, temp_storage_bytes, + d_in, d_out, max_segments, d_segment_offsets, d_segment_offsets + 1, + stream, debug_synchronous); + } + return error; +} + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to reduction entrypoint (min or max specialization) + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + OutputT init; + CubDebugExit(cudaMemcpy(&init, d_in + 0, sizeof(OutputT), cudaMemcpyDeviceToHost)); + + thrust::device_ptr d_in_wrapper(d_in); + OutputT retval; + for (int i = 0; i < timing_timing_iterations; ++i) + { + retval = thrust::reduce(d_in_wrapper, d_in_wrapper + num_items, init, reduction_op); + } + + if (!Equals >::VALUE) + CubDebugExit(cudaMemcpy(d_out, &retval, sizeof(OutputT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + +/** + * Dispatch to reduction entrypoint (sum specialization) + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + Sum reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + OutputT retval; + for (int i = 0; i < timing_timing_iterations; ++i) + { + retval = thrust::reduce(d_in_wrapper, d_in_wrapper + num_items); + } + + if (!Equals >::VALUE) + CubDebugExit(cudaMemcpy(d_out, &retval, sizeof(OutputT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + +//--------------------------------------------------------------------- +// CUDA nested-parallelism test kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceReduce + */ +template < + typename InputIteratorT, + typename OutputIteratorT, + typename OffsetIteratorT, + typename ReductionOpT> +__global__ void CnpDispatchKernel( + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + bool debug_synchronous) +{ +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(Int2Type(), timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, max_segments, d_segment_offsets, reduction_op, 0, debug_synchronous); + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CUB_CDP kernel + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + int num_items, + int max_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, max_segments, d_segment_offsets, reduction_op, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Problem generation +//--------------------------------------------------------------------- + +/// Initialize problem +template +void Initialize( + GenMode gen_mode, + InputT *h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + } + + if (g_verbose_input) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/// Solve problem (max/custom-max functor) +template +struct Solution +{ + typedef _OutputT OutputT; + + template + static void Solve(HostInputIteratorT h_in, OutputT *h_reference, OffsetT num_segments, OffsetIteratorT h_segment_offsets, + ReductionOpT reduction_op) + { + for (int i = 0; i < num_segments; ++i) + { + OutputT aggregate = Traits::Lowest(); // replace with std::numeric_limits::lowest() when C++ support is more prevalent + for (int j = h_segment_offsets[i]; j < h_segment_offsets[i + 1]; ++j) + aggregate = reduction_op(aggregate, OutputT(h_in[j])); + h_reference[i] = aggregate; + } + } +}; + +/// Solve problem (min functor) +template +struct Solution +{ + typedef _OutputT OutputT; + + template + static void Solve(HostInputIteratorT h_in, OutputT *h_reference, OffsetT num_segments, OffsetIteratorT h_segment_offsets, + cub::Min reduction_op) + { + for (int i = 0; i < num_segments; ++i) + { + OutputT aggregate = Traits::Max(); // replace with std::numeric_limits::max() when C++ support is more prevalent + for (int j = h_segment_offsets[i]; j < h_segment_offsets[i + 1]; ++j) + aggregate = reduction_op(aggregate, OutputT(h_in[j])); + h_reference[i] = aggregate; + } + } +}; + + +/// Solve problem (sum functor) +template +struct Solution +{ + typedef _OutputT OutputT; + + template + static void Solve(HostInputIteratorT h_in, OutputT *h_reference, OffsetT num_segments, OffsetIteratorT h_segment_offsets, + cub::Sum reduction_op) + { + for (int i = 0; i < num_segments; ++i) + { + OutputT aggregate; + InitValue(INTEGER_SEED, aggregate, 0); + for (int j = h_segment_offsets[i]; j < h_segment_offsets[i + 1]; ++j) + aggregate = reduction_op(aggregate, OutputT(h_in[j])); + h_reference[i] = aggregate; + } + } +}; + +/// Solve problem (argmin functor) +template +struct Solution +{ + typedef KeyValuePair OutputT; + + template + static void Solve(HostInputIteratorT h_in, OutputT *h_reference, OffsetT num_segments, OffsetIteratorT h_segment_offsets, + cub::ArgMin reduction_op) + { + for (int i = 0; i < num_segments; ++i) + { + OutputT aggregate(1, Traits::Max()); // replace with std::numeric_limits::max() when C++ support is more prevalent + for (int j = h_segment_offsets[i]; j < h_segment_offsets[i + 1]; ++j) + { + OutputT item(j - h_segment_offsets[i], OutputValueT(h_in[j])); + aggregate = reduction_op(aggregate, item); + } + h_reference[i] = aggregate; + } + } +}; + + +/// Solve problem (argmax functor) +template +struct Solution +{ + typedef KeyValuePair OutputT; + + template + static void Solve(HostInputIteratorT h_in, OutputT *h_reference, OffsetT num_segments, OffsetIteratorT h_segment_offsets, + cub::ArgMax reduction_op) + { + for (int i = 0; i < num_segments; ++i) + { + OutputT aggregate(1, Traits::Lowest()); // replace with std::numeric_limits::lowest() when C++ support is more prevalent + for (int j = h_segment_offsets[i]; j < h_segment_offsets[i + 1]; ++j) + { + OutputT item(j - h_segment_offsets[i], OutputValueT(h_in[j])); + aggregate = reduction_op(aggregate, item); + } + h_reference[i] = aggregate; + } + } +}; + + +//--------------------------------------------------------------------- +// Problem generation +//--------------------------------------------------------------------- + +/// Test DeviceReduce for a given problem input +template < + typename BackendT, + typename DeviceInputIteratorT, + typename DeviceOutputIteratorT, + typename HostReferenceIteratorT, + typename OffsetT, + typename OffsetIteratorT, + typename ReductionOpT> +void Test( + BackendT backend, + DeviceInputIteratorT d_in, + DeviceOutputIteratorT d_out, + OffsetT num_items, + OffsetT num_segments, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op, + HostReferenceIteratorT h_reference) +{ + // Input data types + typedef typename std::iterator_traits::value_type InputT; + + // Allocate CUB_CDP device arrays for temp storage size and error + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Inquire temp device storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch(backend, 1, + d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, num_segments, d_segment_offsets, + reduction_op, 0, true)); + + // Allocate temp device storage + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch(backend, 1, + d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, num_segments, d_segment_offsets, + reduction_op, 0, true)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_segments, g_verbose, g_verbose); + printf("\t%s", compare ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + if (g_timing_iterations > 0) + { + GpuTimer gpu_timer; + gpu_timer.Start(); + + CubDebugExit(Dispatch(backend, g_timing_iterations, + d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, + d_in, d_out, num_items, num_segments, d_segment_offsets, + reduction_op, 0, false)); + + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = giga_rate * sizeof(InputT); + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s", avg_millis, giga_rate, giga_bandwidth); + } + + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare); +} + + +/// Test DeviceReduce +template < + Backend BACKEND, + typename OutputValueT, + typename HostInputIteratorT, + typename DeviceInputIteratorT, + typename OffsetT, + typename OffsetIteratorT, + typename ReductionOpT> +void SolveAndTest( + HostInputIteratorT h_in, + DeviceInputIteratorT d_in, + OffsetT num_items, + OffsetT num_segments, + OffsetIteratorT h_segment_offsets, + OffsetIteratorT d_segment_offsets, + ReductionOpT reduction_op) +{ + typedef typename std::iterator_traits::value_type InputValueT; + typedef Solution SolutionT; + typedef typename SolutionT::OutputT OutputT; + + printf("\n\n%s cub::DeviceReduce<%s> %d items (%s), %d segments\n", + (BACKEND == CUB_CDP) ? "CUB_CDP" : (BACKEND == THRUST) ? "Thrust" : (BACKEND == CUB_SEGMENTED) ? "CUB_SEGMENTED" : "CUB", + typeid(ReductionOpT).name(), num_items, typeid(HostInputIteratorT).name(), num_segments); + fflush(stdout); + + // Allocate and solve solution + OutputT *h_reference = new OutputT[num_segments]; + SolutionT::Solve(h_in, h_reference, num_segments, h_segment_offsets, reduction_op); + + // Run with discard iterator + DiscardOutputIterator discard_itr; + Test(Int2Type(), d_in, discard_itr, num_items, num_segments, d_segment_offsets, reduction_op, h_reference); + + // Run with output data (cleared for sanity-check) + OutputT *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(OutputT) * num_segments)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(OutputT) * num_segments)); + Test(Int2Type(), d_in, d_out, num_items, num_segments, d_segment_offsets, reduction_op, h_reference); + + // Cleanup + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (h_reference) delete[] h_reference; +} + + +/// Test specific problem type +template < + Backend BACKEND, + typename InputT, + typename OutputT, + typename OffsetT, + typename ReductionOpT> +void TestProblem( + OffsetT num_items, + OffsetT num_segments, + GenMode gen_mode, + ReductionOpT reduction_op) +{ + printf("\n\nInitializing %d %s->%s (gen mode %d)... ", num_items, typeid(InputT).name(), typeid(OutputT).name(), gen_mode); fflush(stdout); + fflush(stdout); + + // Initialize value data + InputT* h_in = new InputT[num_items]; + Initialize(gen_mode, h_in, num_items); + + // Initialize segment data + OffsetT *h_segment_offsets = new OffsetT[num_segments + 1]; + InitializeSegments(num_items, num_segments, h_segment_offsets, g_verbose_input); + + // Initialize device data + OffsetT *d_segment_offsets = NULL; + InputT *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(InputT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_segment_offsets, sizeof(OffsetT) * (num_segments + 1))); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(InputT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_segment_offsets, h_segment_offsets, sizeof(OffsetT) * (num_segments + 1), cudaMemcpyHostToDevice)); + + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, reduction_op); + + if (h_segment_offsets) delete[] h_segment_offsets; + if (d_segment_offsets) CubDebugExit(g_allocator.DeviceFree(d_segment_offsets)); + if (h_in) delete[] h_in; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); +} + + +/// Test different operators +template < + Backend BACKEND, + typename OutputT, + typename HostInputIteratorT, + typename DeviceInputIteratorT, + typename OffsetT, + typename OffsetIteratorT> +void TestByOp( + HostInputIteratorT h_in, + DeviceInputIteratorT d_in, + OffsetT num_items, + OffsetT num_segments, + OffsetIteratorT h_segment_offsets, + OffsetIteratorT d_segment_offsets) +{ + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, CustomMax()); + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, Sum()); + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, Min()); + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, ArgMin()); + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, Max()); + SolveAndTest(h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets, ArgMax()); +} + + +/// Test different backends +template < + typename InputT, + typename OutputT, + typename OffsetT> +void TestByBackend( + OffsetT num_items, + OffsetT max_segments, + GenMode gen_mode) +{ + // Initialize host data + printf("\n\nInitializing %d %s -> %s (gen mode %d)... ", + num_items, typeid(InputT).name(), typeid(OutputT).name(), gen_mode); fflush(stdout); + + InputT *h_in = new InputT[num_items]; + OffsetT *h_segment_offsets = new OffsetT[max_segments + 1]; + Initialize(gen_mode, h_in, num_items); + + // Initialize device data + InputT *d_in = NULL; + OffsetT *d_segment_offsets = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(InputT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_segment_offsets, sizeof(OffsetT) * (max_segments + 1))); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(InputT) * num_items, cudaMemcpyHostToDevice)); + + // + // Test single-segment implementations + // + + InitializeSegments(num_items, 1, h_segment_offsets, g_verbose_input); + + // Page-aligned-input tests + TestByOp(h_in, d_in, num_items, 1, h_segment_offsets, (OffsetT*) NULL); // Host-dispatch +#ifdef CUB_CDP + TestByOp(h_in, d_in, num_items, 1, h_segment_offsets, (OffsetT*) NULL); // Device-dispatch +#endif + + // Non-page-aligned-input tests + if (num_items > 1) + { + InitializeSegments(num_items - 1, 1, h_segment_offsets, g_verbose_input); + TestByOp(h_in + 1, d_in + 1, num_items - 1, 1, h_segment_offsets, (OffsetT*) NULL); + } + + // + // Test segmented implementation + // + + // Right now we assign a single thread block to each segment, so lets keep it to under 128K items per segment + int max_items_per_segment = 128000; + + for (int num_segments = (num_items + max_items_per_segment - 1) / max_items_per_segment; + num_segments < max_segments; + num_segments = (num_segments * 32) + 1) + { + // Test with segment pointer + InitializeSegments(num_items, num_segments, h_segment_offsets, g_verbose_input); + CubDebugExit(cudaMemcpy(d_segment_offsets, h_segment_offsets, sizeof(OffsetT) * (num_segments + 1), cudaMemcpyHostToDevice)); + TestByOp( + h_in, d_in, num_items, num_segments, h_segment_offsets, d_segment_offsets); + + // Test with segment iterator + typedef CastOp IdentityOpT; + IdentityOpT identity_op; + TransformInputIterator h_segment_offsets_itr( + h_segment_offsets, + identity_op); + TransformInputIterator d_segment_offsets_itr( + d_segment_offsets, + identity_op); + + TestByOp( + h_in, d_in, num_items, num_segments, h_segment_offsets_itr, d_segment_offsets_itr); + } + + if (h_in) delete[] h_in; + if (h_segment_offsets) delete[] h_segment_offsets; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_segment_offsets) CubDebugExit(g_allocator.DeviceFree(d_segment_offsets)); +} + + +/// Test different input-generation modes +template < + typename InputT, + typename OutputT, + typename OffsetT> +void TestByGenMode( + OffsetT num_items, + OffsetT max_segments) +{ + // + // Test pointer support using different input-generation modes + // + + TestByBackend(num_items, max_segments, UNIFORM); + TestByBackend(num_items, max_segments, INTEGER_SEED); + TestByBackend(num_items, max_segments, RANDOM); + + // + // Test iterator support using a constant-iterator and SUM + // + + InputT val; + InitValue(UNIFORM, val, 0); + ConstantInputIterator h_in(val); + + OffsetT *h_segment_offsets = new OffsetT[1 + 1]; + InitializeSegments(num_items, 1, h_segment_offsets, g_verbose_input); + + SolveAndTest(h_in, h_in, num_items, 1, h_segment_offsets, (OffsetT*) NULL, Sum()); +#ifdef CUB_CDP + SolveAndTest(h_in, h_in, num_items, 1, h_segment_offsets, (OffsetT*) NULL, Sum()); +#endif + + if (h_segment_offsets) delete[] h_segment_offsets; +} + + +/// Test different problem sizes +template < + typename InputT, + typename OutputT, + typename OffsetT> +struct TestBySize +{ + OffsetT max_items; + OffsetT max_segments; + + TestBySize(OffsetT max_items, OffsetT max_segments) : + max_items(max_items), + max_segments(max_segments) + {} + + template + cudaError_t Invoke() + { + // + // Black-box testing on all backends + // + + // Test 0, 1, many + TestByGenMode(0, max_segments); + TestByGenMode(1, max_segments); + TestByGenMode(max_items, max_segments); + + // Test random problem sizes from a log-distribution [8, max_items-ish) + int num_iterations = 8; + double max_exp = log(double(max_items)) / log(double(2.0)); + for (int i = 0; i < num_iterations; ++i) + { + OffsetT num_items = (OffsetT) pow(2.0, RandomValue(max_exp - 3.0) + 3.0); + TestByGenMode(num_items, max_segments); + } + + // + // White-box testing of single-segment problems around specific sizes + // + + // Tile-boundaries: multiple blocks, one tile per block + OffsetT tile_size = ActivePolicyT::ReducePolicy::BLOCK_THREADS * ActivePolicyT::ReducePolicy::ITEMS_PER_THREAD; + TestProblem(tile_size * 4, 1, RANDOM, Sum()); + TestProblem(tile_size * 4 + 1, 1, RANDOM, Sum()); + TestProblem(tile_size * 4 - 1, 1, RANDOM, Sum()); + + // Tile-boundaries: multiple blocks, multiple tiles per block + OffsetT sm_occupancy = 32; + OffsetT occupancy = tile_size * sm_occupancy * g_sm_count; + TestProblem(occupancy, 1, RANDOM, Sum()); + TestProblem(occupancy + 1, 1, RANDOM, Sum()); + TestProblem(occupancy - 1, 1, RANDOM, Sum()); + + return cudaSuccess; + } +}; + + +/// Test problem type +template < + typename InputT, + typename OutputT, + typename OffsetT> +void TestType( + OffsetT max_items, + OffsetT max_segments) +{ + typedef typename DeviceReducePolicy::MaxPolicy MaxPolicyT; + + TestBySize dispatch(max_items, max_segments); + + MaxPolicyT::Invoke(g_ptx_version, dispatch); +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + + +/** + * Main + */ +int main(int argc, char** argv) +{ + typedef int OffsetT; + + OffsetT max_items = 27000000; + OffsetT max_segments = 34000; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + g_verbose_input = args.CheckCmdLineFlag("v2"); + args.GetCmdLineArgument("n", max_items); + args.GetCmdLineArgument("s", max_segments); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--s= " + "[--i= " + "[--device=] " + "[--repeat=]" + "[--v] " + "[--cdp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + CubDebugExit(PtxVersion(g_ptx_version)); + + // Get SM count + g_sm_count = args.deviceProp.multiProcessorCount; + + std::numeric_limits::max(); + +#ifdef QUICKER_TEST + + // Compile/run basic test + + + + TestProblem( max_items, 1, RANDOM, Sum()); + + TestProblem( max_items, 1, RANDOM, Sum()); + + TestProblem( max_items, 1, RANDOM, ArgMax()); + + TestProblem( max_items, 1, RANDOM, Sum()); + + TestProblem(max_items, max_segments, RANDOM, Sum()); + + +#elif defined(QUICK_TEST) + + // Compile/run quick comparison tests + + TestProblem( max_items * 4, 1, UNIFORM, Sum()); + TestProblem( max_items * 4, 1, UNIFORM, Sum()); + + printf("\n----------------------------\n"); + TestProblem( max_items * 2, 1, UNIFORM, Sum()); + TestProblem( max_items * 2, 1, UNIFORM, Sum()); + + printf("\n----------------------------\n"); + TestProblem( max_items, 1, UNIFORM, Sum()); + TestProblem( max_items, 1, UNIFORM, Sum()); + + printf("\n----------------------------\n"); + TestProblem( max_items / 2, 1, UNIFORM, Sum()); + TestProblem( max_items / 2, 1, UNIFORM, Sum()); + + printf("\n----------------------------\n"); + TestProblem( max_items / 4, 1, UNIFORM, Max()); + TestProblem( max_items / 4, 1, UNIFORM, Max()); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test different input types + TestType(max_items, max_segments); + + TestType(max_items, max_segments); + + TestType(max_items, max_segments); + +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); +// +// TestType(max_items, max_segments); +// TestType(max_items, max_segments); + + } + +#endif + + + printf("\n"); + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce_by_key.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce_by_key.cu new file mode 100644 index 0000000..7d35eef --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_reduce_by_key.cu @@ -0,0 +1,853 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceReduce::ReduceByKey utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +//--------------------------------------------------------------------- +// Dispatch to different CUB entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to reduce-by-key entrypoint + */ +template < + typename KeyInputIteratorT, + typename KeyOutputIteratorT, + typename ValueInputIteratorT, + typename ValueOutputIteratorT, + typename NumRunsIteratorT, + typename EqualityOpT, + typename ReductionOpT, + typename OffsetT> +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + KeyInputIteratorT d_keys_in, + KeyOutputIteratorT d_keys_out, + ValueInputIteratorT d_values_in, + ValueOutputIteratorT d_values_out, + NumRunsIteratorT d_num_runs, + EqualityOpT equality_op, + ReductionOpT reduction_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceReduce::ReduceByKey( + d_temp_storage, + temp_storage_bytes, + d_keys_in, + d_keys_out, + d_values_in, + d_values_out, + d_num_runs, + reduction_op, + num_items, + stream, + debug_synchronous); + } + return error; +} + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to reduce-by-key entrypoint + */ +template < + typename KeyInputIteratorT, + typename KeyOutputIteratorT, + typename ValueInputIteratorT, + typename ValueOutputIteratorT, + typename NumRunsIteratorT, + typename EqualityOpT, + typename ReductionOpT, + typename OffsetT> +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + KeyInputIteratorT d_keys_in, + KeyOutputIteratorT d_keys_out, + ValueInputIteratorT d_values_in, + ValueOutputIteratorT d_values_out, + NumRunsIteratorT d_num_runs, + EqualityOpT equality_op, + ReductionOpT reduction_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input keys type + typedef typename std::iterator_traits::value_type KeyInputT; + + // The output keys type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type KeyOutputT; // ... else the output iterator's value type + + // The input values type + typedef typename std::iterator_traits::value_type ValueInputT; + + // The output values type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type ValueOuputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_keys_in_wrapper(d_keys_in); + thrust::device_ptr d_keys_out_wrapper(d_keys_out); + + thrust::device_ptr d_values_in_wrapper(d_values_in); + thrust::device_ptr d_values_out_wrapper(d_values_out); + + thrust::pair, thrust::device_ptr > d_out_ends; + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_ends = thrust::reduce_by_key( + d_keys_in_wrapper, + d_keys_in_wrapper + num_items, + d_values_in_wrapper, + d_keys_out_wrapper, + d_values_out_wrapper); + } + + OffsetT num_segments = OffsetT(d_out_ends.first - d_keys_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_runs, &num_segments, sizeof(OffsetT), cudaMemcpyHostToDevice)); + + } + + return cudaSuccess; +} + + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceSelect + */ +template < + typename KeyInputIteratorT, + typename KeyOutputIteratorT, + typename ValueInputIteratorT, + typename ValueOutputIteratorT, + typename NumRunsIteratorT, + typename EqualityOpT, + typename ReductionOpT, + typename OffsetT> +__global__ void CnpDispatchKernel( + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t temp_storage_bytes, + KeyInputIteratorT d_keys_in, + KeyOutputIteratorT d_keys_out, + ValueInputIteratorT d_values_in, + ValueOutputIteratorT d_values_out, + NumRunsIteratorT d_num_runs, + EqualityOpT equality_op, + ReductionOpT reduction_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(Int2Type(), timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, d_num_runs, equality_op, reduction_op, num_items, 0, debug_synchronous); + + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template < + typename KeyInputIteratorT, + typename KeyOutputIteratorT, + typename ValueInputIteratorT, + typename ValueOutputIteratorT, + typename NumRunsIteratorT, + typename EqualityOpT, + typename ReductionOpT, + typename OffsetT> +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + KeyInputIteratorT d_keys_in, + KeyOutputIteratorT d_keys_out, + ValueInputIteratorT d_values_in, + ValueOutputIteratorT d_values_out, + NumRunsIteratorT d_num_runs, + EqualityOpT equality_op, + ReductionOpT reduction_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, d_num_runs, equality_op, reduction_op, num_items, 0, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +template +void Initialize( + int entropy_reduction, + T *h_in, + int num_items, + int max_segment) +{ + unsigned int max_int = (unsigned int) -1; + + int key = 0; + int i = 0; + while (i < num_items) + { + // Select number of repeating occurrences + + int repeat; + + if (max_segment < 0) + { + repeat = num_items; + } + else if (max_segment < 2) + { + repeat = 1; + } + else + { + RandomBits(repeat, entropy_reduction); + repeat = (int) ((double(repeat) * double(max_segment)) / double(max_int)); + repeat = CUB_MAX(1, repeat); + } + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + InitValue(INTEGER_SEED, h_in[j], key); + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve problem. Returns total number of segments identified + */ +template < + typename KeyInputIteratorT, + typename ValueInputIteratorT, + typename KeyT, + typename ValueT, + typename EqualityOpT, + typename ReductionOpT> +int Solve( + KeyInputIteratorT h_keys_in, + KeyT *h_keys_reference, + ValueInputIteratorT h_values_in, + ValueT *h_values_reference, + EqualityOpT equality_op, + ReductionOpT reduction_op, + int num_items) +{ + // First item + KeyT previous = h_keys_in[0]; + ValueT aggregate = h_values_in[0]; + int num_segments = 0; + + // Subsequent items + for (int i = 1; i < num_items; ++i) + { + if (!equality_op(previous, h_keys_in[i])) + { + h_keys_reference[num_segments] = previous; + h_values_reference[num_segments] = aggregate; + num_segments++; + aggregate = h_values_in[i]; + } + else + { + aggregate = reduction_op(aggregate, h_values_in[i]); + } + previous = h_keys_in[i]; + } + + h_keys_reference[num_segments] = previous; + h_values_reference[num_segments] = aggregate; + num_segments++; + + return num_segments; +} + + + +/** + * Test DeviceSelect for a given problem input + */ +template < + Backend BACKEND, + typename DeviceKeyInputIteratorT, + typename DeviceValueInputIteratorT, + typename KeyT, + typename ValueT, + typename EqualityOpT, + typename ReductionOpT> +void Test( + DeviceKeyInputIteratorT d_keys_in, + DeviceValueInputIteratorT d_values_in, + KeyT* h_keys_reference, + ValueT* h_values_reference, + EqualityOpT equality_op, + ReductionOpT reduction_op, + int num_segments, + int num_items) +{ + // Allocate device output arrays and number of segments + KeyT* d_keys_out = NULL; + ValueT* d_values_out = NULL; + int* d_num_runs = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys_out, sizeof(KeyT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values_out, sizeof(ValueT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_runs, sizeof(int))); + + // Allocate CDP device arrays + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch(Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, d_num_runs, equality_op, reduction_op, num_items, 0, true)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Clear device output arrays + CubDebugExit(cudaMemset(d_keys_out, 0, sizeof(KeyT) * num_items)); + CubDebugExit(cudaMemset(d_values_out, 0, sizeof(ValueT) * num_items)); + CubDebugExit(cudaMemset(d_num_runs, 0, sizeof(int))); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch(Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, d_num_runs, equality_op, reduction_op, num_items, 0, true)); + + // Check for correctness (and display results, if specified) + int compare1 = CompareDeviceResults(h_keys_reference, d_keys_out, num_segments, true, g_verbose); + printf("\t Keys %s ", compare1 ? "FAIL" : "PASS"); + + int compare2 = CompareDeviceResults(h_values_reference, d_values_out, num_segments, true, g_verbose); + printf("\t Values %s ", compare2 ? "FAIL" : "PASS"); + + int compare3 = CompareDeviceResults(&num_segments, d_num_runs, 1, true, g_verbose); + printf("\t Count %s ", compare3 ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + CubDebugExit(Dispatch(Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_keys_in, d_keys_out, d_values_in, d_values_out, d_num_runs, equality_op, reduction_op, num_items, 0, false)); + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + int bytes_moved = ((num_items + num_segments) * sizeof(KeyT)) + ((num_items + num_segments) * sizeof(ValueT)); + float giga_bandwidth = float(bytes_moved) / avg_millis / 1000.0f / 1000.0f; + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s", avg_millis, giga_rate, giga_bandwidth); + } + printf("\n\n"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Cleanup + if (d_keys_out) CubDebugExit(g_allocator.DeviceFree(d_keys_out)); + if (d_values_out) CubDebugExit(g_allocator.DeviceFree(d_values_out)); + if (d_num_runs) CubDebugExit(g_allocator.DeviceFree(d_num_runs)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare1 | compare2 | compare3); +} + + +/** + * Test DeviceSelect on pointer type + */ +template < + Backend BACKEND, + typename KeyT, + typename ValueT, + typename ReductionOpT> +void TestPointer( + int num_items, + int entropy_reduction, + int max_segment, + ReductionOpT reduction_op) +{ + // Allocate host arrays + KeyT* h_keys_in = new KeyT[num_items]; + KeyT* h_keys_reference = new KeyT[num_items]; + + ValueT* h_values_in = new ValueT[num_items]; + ValueT* h_values_reference = new ValueT[num_items]; + + for (int i = 0; i < num_items; ++i) + InitValue(INTEGER_SEED, h_values_in[i], 1); + + // Initialize problem and solution + Equality equality_op; + Initialize(entropy_reduction, h_keys_in, num_items, max_segment); + int num_segments = Solve(h_keys_in, h_keys_reference, h_values_in, h_values_reference, equality_op, reduction_op, num_items); + + printf("\nPointer %s cub::DeviceReduce::ReduceByKey %s reduction of %d items, %d segments (avg run length %.3f), {%s,%s} key value pairs, max_segment %d, entropy_reduction %d\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + (Equals::VALUE) ? "Sum" : "Max", + num_items, num_segments, float(num_items) / num_segments, + typeid(KeyT).name(), typeid(ValueT).name(), + max_segment, entropy_reduction); + fflush(stdout); + + // Allocate problem device arrays + KeyT *d_keys_in = NULL; + ValueT *d_values_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys_in, sizeof(KeyT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_values_in, sizeof(ValueT) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_keys_in, h_keys_in, sizeof(KeyT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_values_in, h_values_in, sizeof(ValueT) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_keys_in, d_values_in, h_keys_reference, h_values_reference, equality_op, reduction_op, num_segments, num_items); + + // Cleanup + if (h_keys_in) delete[] h_keys_in; + if (h_values_in) delete[] h_values_in; + if (h_keys_reference) delete[] h_keys_reference; + if (h_values_reference) delete[] h_values_reference; + if (d_keys_in) CubDebugExit(g_allocator.DeviceFree(d_keys_in)); + if (d_values_in) CubDebugExit(g_allocator.DeviceFree(d_values_in)); +} + + +/** + * Test on iterator type + */ +template < + Backend BACKEND, + typename KeyT, + typename ValueT, + typename ReductionOpT> +void TestIterator( + int num_items, + int entropy_reduction, + int max_segment, + ReductionOpT reduction_op) +{ + // Allocate host arrays + KeyT* h_keys_in = new KeyT[num_items]; + KeyT* h_keys_reference = new KeyT[num_items]; + + ValueT one_val; + InitValue(INTEGER_SEED, one_val, 1); + ConstantInputIterator h_values_in(one_val); + ValueT* h_values_reference = new ValueT[num_items]; + + // Initialize problem and solution + Equality equality_op; + Initialize(entropy_reduction, h_keys_in, num_items, max_segment); + int num_segments = Solve(h_keys_in, h_keys_reference, h_values_in, h_values_reference, equality_op, reduction_op, num_items); + + printf("\nIterator %s cub::DeviceReduce::ReduceByKey %s reduction of %d items, %d segments (avg run length %.3f), {%s,%s} key value pairs, max_segment %d, entropy_reduction %d\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + (Equals::VALUE) ? "Sum" : "Max", + num_items, num_segments, float(num_items) / num_segments, + typeid(KeyT).name(), typeid(ValueT).name(), + max_segment, entropy_reduction); + fflush(stdout); + + // Allocate problem device arrays + KeyT *d_keys_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_keys_in, sizeof(KeyT) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_keys_in, h_keys_in, sizeof(KeyT) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_keys_in, h_values_in, h_keys_reference, h_values_reference, equality_op, reduction_op, num_segments, num_items); + + // Cleanup + if (h_keys_in) delete[] h_keys_in; + if (h_keys_reference) delete[] h_keys_reference; + if (h_values_reference) delete[] h_values_reference; + if (d_keys_in) CubDebugExit(g_allocator.DeviceFree(d_keys_in)); +} + + +/** + * Test different gen modes + */ +template < + Backend BACKEND, + typename KeyT, + typename ValueT, + typename ReductionOpT> +void Test( + int num_items, + ReductionOpT reduction_op, + int max_segment) +{ + // 0 key-bit entropy reduction rounds + TestPointer(num_items, 0, max_segment, reduction_op); + + if (max_segment > 1) + { + // 2 key-bit entropy reduction rounds + TestPointer(num_items, 2, max_segment, reduction_op); + + // 7 key-bit entropy reduction rounds + TestPointer(num_items, 7, max_segment, reduction_op); + } +} + + +/** + * Test different avg segment lengths modes + */ +template < + Backend BACKEND, + typename KeyT, + typename ValueT, + typename ReductionOpT> +void Test( + int num_items, + ReductionOpT reduction_op) +{ + Test(num_items, reduction_op, -1); + Test(num_items, reduction_op, 1); + + // Evaluate different max-segment lengths + for (int max_segment = 3; max_segment < CUB_MIN(num_items, (unsigned short) -1); max_segment *= 11) + { + Test(num_items, reduction_op, max_segment); + } +} + + + +/** + * Test different dispatch + */ +template < + typename KeyT, + typename ValueT, + typename ReductionOpT> +void TestDispatch( + int num_items, + ReductionOpT reduction_op) +{ + Test(num_items, reduction_op); +#ifdef CUB_CDP + Test(num_items, reduction_op); +#endif +} + + +/** + * Test different input sizes + */ +template < + typename KeyT, + typename ValueT, + typename ReductionOpT> +void TestSize( + int num_items, + ReductionOpT reduction_op) +{ + if (num_items < 0) + { + TestDispatch(1, reduction_op); + TestDispatch(100, reduction_op); + TestDispatch(10000, reduction_op); + TestDispatch(1000000, reduction_op); + } + else + { + TestDispatch(num_items, reduction_op); + } + +} + + +template < + typename KeyT, + typename ValueT> +void TestOp( + int num_items) +{ + TestSize(num_items, cub::Sum()); + TestSize(num_items, cub::Max()); +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = -1; + int entropy_reduction = 0; + int maxseg = 1000; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("maxseg", maxseg); + args.GetCmdLineArgument("entropy", entropy_reduction); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--i= " + "[--device=] " + "[--maxseg=]" + "[--entropy=]" + "[--repeat=]" + "[--v] " + "[--cdp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + printf("\n"); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef QUICKER_TEST + + // Compile/run basic CUB test + if (num_items < 0) num_items = 32000000; + + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + TestIterator(num_items, entropy_reduction, maxseg, cub::Sum()); + +#elif defined(QUICK_TEST) + + // Compile/run quick tests + if (num_items < 0) num_items = 32000000; + + printf("---- RLE int ---- \n"); + TestIterator(num_items, entropy_reduction, maxseg, cub::Sum()); + + printf("---- RLE long long ---- \n"); + TestIterator(num_items, entropy_reduction, maxseg, cub::Sum()); + + printf("---- int ---- \n"); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + + printf("---- float ---- \n"); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + { + printf("---- double ---- \n"); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + TestPointer(num_items, entropy_reduction, maxseg, cub::Sum()); + } + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + + // Test different input types + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + TestOp(num_items); + + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + TestOp(num_items); + + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_run_length_encode.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_run_length_encode.cu new file mode 100644 index 0000000..7309db9 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_run_length_encode.cu @@ -0,0 +1,890 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceReduce::RunLengthEncode utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + +// Operation types +enum RleMethod +{ + RLE, // Run length encode + NON_TRIVIAL, + CSR, +}; + + +//--------------------------------------------------------------------- +// Dispatch to different CUB entrypoints +//--------------------------------------------------------------------- + + +/** + * Dispatch to run-length encode entrypoint + */ +template < + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsIterator, + typename OffsetT> +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type method, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + UniqueOutputIteratorT d_unique_out, + OffsetsOutputIteratorT d_offsets_out, + LengthsOutputIteratorT d_lengths_out, + NumRunsIterator d_num_runs, + cub::Equality equality_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceRunLengthEncode::Encode( + d_temp_storage, + temp_storage_bytes, + d_in, + d_unique_out, + d_lengths_out, + d_num_runs, + num_items, + stream, + debug_synchronous); + } + return error; +} + + +/** + * Dispatch to non-trivial runs entrypoint + */ +template < + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsIterator, + typename OffsetT> +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type method, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + UniqueOutputIteratorT d_unique_out, + OffsetsOutputIteratorT d_offsets_out, + LengthsOutputIteratorT d_lengths_out, + NumRunsIterator d_num_runs, + cub::Equality equality_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceRunLengthEncode::NonTrivialRuns( + d_temp_storage, + temp_storage_bytes, + d_in, + d_offsets_out, + d_lengths_out, + d_num_runs, + num_items, + stream, + debug_synchronous); + } + return error; +} + + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to run-length encode entrypoint + */ +template < + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsIterator, + typename OffsetT> +cudaError_t Dispatch( + Int2Type method, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + UniqueOutputIteratorT d_unique_out, + OffsetsOutputIteratorT d_offsets_out, + LengthsOutputIteratorT d_lengths_out, + NumRunsIterator d_num_runs, + cub::Equality equality_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type UniqueT; // ... else the output iterator's value type + + // The lengths output value type + typedef typename If<(Equals::value_type, void>::VALUE), // LengthT = (if output iterator's value type is void) ? + OffsetT, // ... then the OffsetT type, + typename std::iterator_traits::value_type>::Type LengthT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_unique_out_wrapper(d_unique_out); + thrust::device_ptr d_lengths_out_wrapper(d_lengths_out); + + thrust::pair, thrust::device_ptr > d_out_ends; + + LengthT one_val; + InitValue(INTEGER_SEED, one_val, 1); + thrust::constant_iterator constant_one(one_val); + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_ends = thrust::reduce_by_key( + d_in_wrapper, + d_in_wrapper + num_items, + constant_one, + d_unique_out_wrapper, + d_lengths_out_wrapper); + } + + OffsetT num_runs = OffsetT(d_out_ends.first - d_unique_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_runs, &num_runs, sizeof(OffsetT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceRunLengthEncode + */ +template < + int RLE_METHOD, + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsIterator, + typename EqualityOp, + typename OffsetT> +__global__ void CnpDispatchKernel( + Int2Type method, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + InputIteratorT d_in, + UniqueOutputIteratorT d_unique_out, + OffsetsOutputIteratorT d_offsets_out, + LengthsOutputIteratorT d_lengths_out, + NumRunsIterator d_num_runs, + cub::Equality equality_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(method, Int2Type(), timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_offsets_out, d_lengths_out, d_num_runs, equality_op, num_items, 0, debug_synchronous); + + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template < + int RLE_METHOD, + typename InputIteratorT, + typename UniqueOutputIteratorT, + typename OffsetsOutputIteratorT, + typename LengthsOutputIteratorT, + typename NumRunsIterator, + typename EqualityOp, + typename OffsetT> +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type method, + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + UniqueOutputIteratorT d_unique_out, + OffsetsOutputIteratorT d_offsets_out, + LengthsOutputIteratorT d_lengths_out, + NumRunsIterator d_num_runs, + EqualityOp equality_op, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(method, timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_offsets_out, d_lengths_out, d_num_runs, equality_op, num_items, 0, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +template +void Initialize( + int entropy_reduction, + T *h_in, + int num_items, + int max_segment) +{ + unsigned int max_int = (unsigned int) -1; + + int key = 0; + int i = 0; + while (i < num_items) + { + // Select number of repeating occurrences for the current run + int repeat; + if (max_segment < 0) + { + repeat = num_items; + } + else if (max_segment < 2) + { + repeat = 1; + } + else + { + RandomBits(repeat, entropy_reduction); + repeat = (int) ((double(repeat) * double(max_segment)) / double(max_int)); + repeat = CUB_MAX(1, repeat); + } + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + InitValue(INTEGER_SEED, h_in[j], key); + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve problem. Returns total number of segments identified + */ +template < + RleMethod RLE_METHOD, + typename InputIteratorT, + typename T, + typename OffsetT, + typename LengthT, + typename EqualityOp> +int Solve( + InputIteratorT h_in, + T *h_unique_reference, + OffsetT *h_offsets_reference, + LengthT *h_lengths_reference, + EqualityOp equality_op, + int num_items) +{ + if (num_items == 0) + return 0; + + // First item + T previous = h_in[0]; + LengthT length = 1; + int num_runs = 0; + int run_begin = 0; + + // Subsequent items + for (int i = 1; i < num_items; ++i) + { + if (!equality_op(previous, h_in[i])) + { + if ((RLE_METHOD != NON_TRIVIAL) || (length > 1)) + { + h_unique_reference[num_runs] = previous; + h_offsets_reference[num_runs] = run_begin; + h_lengths_reference[num_runs] = length; + num_runs++; + } + length = 1; + run_begin = i; + } + else + { + length++; + } + previous = h_in[i]; + } + + if ((RLE_METHOD != NON_TRIVIAL) || (length > 1)) + { + h_unique_reference[num_runs] = previous; + h_offsets_reference[num_runs] = run_begin; + h_lengths_reference[num_runs] = length; + num_runs++; + } + + return num_runs; +} + + + +/** + * Test DeviceRunLengthEncode for a given problem input + */ +template < + RleMethod RLE_METHOD, + Backend BACKEND, + typename DeviceInputIteratorT, + typename T, + typename OffsetT, + typename LengthT, + typename EqualityOp> +void Test( + DeviceInputIteratorT d_in, + T *h_unique_reference, + OffsetT *h_offsets_reference, + LengthT *h_lengths_reference, + EqualityOp equality_op, + int num_runs, + int num_items) +{ + // Allocate device output arrays and number of segments + T* d_unique_out = NULL; + LengthT* d_offsets_out = NULL; + OffsetT* d_lengths_out = NULL; + int* d_num_runs = NULL; + + if (RLE_METHOD == RLE) + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_unique_out, sizeof(T) * num_items)); + if (RLE_METHOD == NON_TRIVIAL) + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_offsets_out, sizeof(OffsetT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_lengths_out, sizeof(LengthT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_runs, sizeof(int))); + + // Allocate CDP device arrays + size_t* d_temp_storage_bytes = NULL; + cudaError_t* d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void* d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch(Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_offsets_out, d_lengths_out, d_num_runs, equality_op, num_items, 0, true)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Clear device output arrays + if (RLE_METHOD == RLE) + CubDebugExit(cudaMemset(d_unique_out, 0, sizeof(T) * num_items)); + if (RLE_METHOD == NON_TRIVIAL) + CubDebugExit(cudaMemset(d_offsets_out, 0, sizeof(OffsetT) * num_items)); + CubDebugExit(cudaMemset(d_lengths_out, 0, sizeof(LengthT) * num_items)); + CubDebugExit(cudaMemset(d_num_runs, 0, sizeof(int))); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch(Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_offsets_out, d_lengths_out, d_num_runs, equality_op, num_items, 0, true)); + + // Check for correctness (and display results, if specified) + int compare0 = 0; + int compare1 = 0; + int compare2 = 0; + int compare3 = 0; + + if (RLE_METHOD == RLE) + { + compare0 = CompareDeviceResults(h_unique_reference, d_unique_out, num_runs, true, g_verbose); + printf("\t Keys %s\n", compare0 ? "FAIL" : "PASS"); + } + + if (RLE_METHOD != RLE) + { + compare1 = CompareDeviceResults(h_offsets_reference, d_offsets_out, num_runs, true, g_verbose); + printf("\t Offsets %s\n", compare1 ? "FAIL" : "PASS"); + } + + if (RLE_METHOD != CSR) + { + compare2 = CompareDeviceResults(h_lengths_reference, d_lengths_out, num_runs, true, g_verbose); + printf("\t Lengths %s\n", compare2 ? "FAIL" : "PASS"); + } + + compare3 = CompareDeviceResults(&num_runs, d_num_runs, 1, true, g_verbose); + printf("\t Count %s\n", compare3 ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + CubDebugExit(Dispatch(Int2Type(), Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_unique_out, d_offsets_out, d_lengths_out, d_num_runs, equality_op, num_items, 0, false)); + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + int bytes_moved = (num_items * sizeof(T)) + (num_runs * (sizeof(OffsetT) + sizeof(LengthT))); + float giga_bandwidth = float(bytes_moved) / avg_millis / 1000.0f / 1000.0f; + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s", avg_millis, giga_rate, giga_bandwidth); + } + printf("\n\n"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Cleanup + if (d_unique_out) CubDebugExit(g_allocator.DeviceFree(d_unique_out)); + if (d_offsets_out) CubDebugExit(g_allocator.DeviceFree(d_offsets_out)); + if (d_lengths_out) CubDebugExit(g_allocator.DeviceFree(d_lengths_out)); + if (d_num_runs) CubDebugExit(g_allocator.DeviceFree(d_num_runs)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare0 | compare1 | compare2 | compare3); +} + + +/** + * Test DeviceRunLengthEncode on pointer type + */ +template < + RleMethod RLE_METHOD, + Backend BACKEND, + typename T, + typename OffsetT, + typename LengthT> +void TestPointer( + int num_items, + int entropy_reduction, + int max_segment) +{ + // Allocate host arrays + T* h_in = new T[num_items]; + T* h_unique_reference = new T[num_items]; + OffsetT* h_offsets_reference = new OffsetT[num_items]; + LengthT* h_lengths_reference = new LengthT[num_items]; + + for (int i = 0; i < num_items; ++i) + InitValue(INTEGER_SEED, h_offsets_reference[i], 1); + + // Initialize problem and solution + Equality equality_op; + Initialize(entropy_reduction, h_in, num_items, max_segment); + + int num_runs = Solve(h_in, h_unique_reference, h_offsets_reference, h_lengths_reference, equality_op, num_items); + + printf("\nPointer %s cub::%s on %d items, %d segments (avg run length %.3f), {%s key, %s offset, %s length}, max_segment %d, entropy_reduction %d\n", + (RLE_METHOD == RLE) ? "DeviceReduce::RunLengthEncode" : (RLE_METHOD == NON_TRIVIAL) ? "DeviceRunLengthEncode::NonTrivialRuns" : "Other", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_runs, float(num_items) / num_runs, + typeid(T).name(), typeid(OffsetT).name(), typeid(LengthT).name(), + max_segment, entropy_reduction); + fflush(stdout); + + // Allocate problem device arrays + T* d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_in, h_unique_reference, h_offsets_reference, h_lengths_reference, equality_op, num_runs, num_items); + + // Cleanup + if (h_in) delete[] h_in; + if (h_unique_reference) delete[] h_unique_reference; + if (h_offsets_reference) delete[] h_offsets_reference; + if (h_lengths_reference) delete[] h_lengths_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); +} + + +/** + * Test on iterator type + */ +template < + RleMethod RLE_METHOD, + Backend BACKEND, + typename T, + typename OffsetT, + typename LengthT> +void TestIterator( + int num_items, + Int2Type is_primitive) +{ + // Allocate host arrays + T* h_unique_reference = new T[num_items]; + OffsetT* h_offsets_reference = new OffsetT[num_items]; + LengthT* h_lengths_reference = new LengthT[num_items]; + + T one_val; + InitValue(INTEGER_SEED, one_val, 1); + ConstantInputIterator h_in(one_val); + + // Initialize problem and solution + Equality equality_op; + int num_runs = Solve(h_in, h_unique_reference, h_offsets_reference, h_lengths_reference, equality_op, num_items); + + printf("\nIterator %s cub::%s on %d items, %d segments (avg run length %.3f), {%s key, %s offset, %s length}\n", + (RLE_METHOD == RLE) ? "DeviceReduce::RunLengthEncode" : (RLE_METHOD == NON_TRIVIAL) ? "DeviceRunLengthEncode::NonTrivialRuns" : "Other", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_runs, float(num_items) / num_runs, + typeid(T).name(), typeid(OffsetT).name(), typeid(LengthT).name()); + fflush(stdout); + + // Run Test + Test(h_in, h_unique_reference, h_offsets_reference, h_lengths_reference, equality_op, num_runs, num_items); + + // Cleanup + if (h_unique_reference) delete[] h_unique_reference; + if (h_offsets_reference) delete[] h_offsets_reference; + if (h_lengths_reference) delete[] h_lengths_reference; +} + + +template < + RleMethod RLE_METHOD, + Backend BACKEND, + typename T, + typename OffsetT, + typename LengthT> +void TestIterator( + int num_items, + Int2Type is_primitive) +{} + + +/** + * Test different gen modes + */ +template < + RleMethod RLE_METHOD, + Backend BACKEND, + typename T, + typename OffsetT, + typename LengthT> +void Test( + int num_items) +{ + // Test iterator (one run) + TestIterator(num_items, Int2Type::PRIMITIVE>()); + + // num_items runs + TestPointer(num_items, 0, 1); + + // Evaluate different run lengths + for (int max_segment = 3; max_segment < CUB_MIN(num_items, (unsigned short) -1); max_segment *= 3) + { + // Uniform selection run length + TestPointer(num_items, 0, max_segment); + + // Reduced-entropy run length + TestPointer(num_items, 4, max_segment); + } +} + + +/** + * Test different dispatch + */ +template < + typename T, + typename OffsetT, + typename LengthT> +void TestDispatch( + int num_items) +{ + Test(num_items); + Test(num_items); + +#ifdef CUB_CDP + Test(num_items); + Test(num_items); +#endif +} + + +/** + * Test different input sizes + */ +template < + typename T, + typename OffsetT, + typename LengthT> +void TestSize( + int num_items) +{ + if (num_items < 0) + { + TestDispatch(0); + TestDispatch(1); + TestDispatch(100); + TestDispatch(10000); + TestDispatch(1000000); + + // Randomly select problem size between 1:10,000,000 + unsigned int max_int = (unsigned int) -1; + for (int i = 0; i < 10; ++i) + { + unsigned int num_items; + RandomBits(num_items); + num_items = (unsigned int) ((double(num_items) * double(10000000)) / double(max_int)); + num_items = CUB_MAX(1, num_items); + TestDispatch(num_items); + } + } + else + { + TestDispatch(num_items); + } + +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = -1; + int entropy_reduction = 0; + int max_segment = 1000; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("maxseg", max_segment); + args.GetCmdLineArgument("entropy", entropy_reduction); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--i= " + "[--device=] " + "[--maxseg=]" + "[--entropy=]" + "[--repeat=]" + "[--v] " + "[--cdp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + printf("\n"); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + +#ifdef QUICKER_TEST + + // Compile/run basic CUB test + if (num_items < 0) num_items = 32000000; + + TestPointer( num_items, entropy_reduction, max_segment); + TestPointer( num_items, entropy_reduction, max_segment); + TestIterator( num_items, Int2Type::PRIMITIVE>()); + + +#elif defined(QUICK_TEST) + + // Compile/run quick tests + if (num_items < 0) num_items = 32000000; + + TestPointer( num_items, entropy_reduction, max_segment); + TestPointer( num_items, entropy_reduction, max_segment); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test different input types + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + TestSize(num_items); + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_scan.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_scan.cu new file mode 100644 index 0000000..1f97d05 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_scan.cu @@ -0,0 +1,1015 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceScan utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +double g_device_giga_bandwidth; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +/** + * \brief WrapperFunctor (for precluding test-specialized dispatch to *Sum variants) + */ +template +struct WrapperFunctor +{ + OpT op; + + WrapperFunctor(OpT op) : op(op) {} + + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return op(a, b); + } +}; + + +//--------------------------------------------------------------------- +// Dispatch to different CUB DeviceScan entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to exclusive scan entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + InitialValueT initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceScan::ExclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, initial_value, num_items, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to exclusive sum entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + Sum scan_op, + InitialValueT initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceScan::ExclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to inclusive scan entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + NullType initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceScan::InclusiveScan(d_temp_storage, temp_storage_bytes, d_in, d_out, scan_op, num_items, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to inclusive sum entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + Sum scan_op, + NullType initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceScan::InclusiveSum(d_temp_storage, temp_storage_bytes, d_in, d_out, num_items, stream, debug_synchronous); + } + return error; +} + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to exclusive scan entrypoint + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + InitialValueT initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + for (int i = 0; i < timing_timing_iterations; ++i) + { + thrust::exclusive_scan(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper, initial_value, scan_op); + } + } + + return cudaSuccess; +} + + +/** + * Dispatch to exclusive sum entrypoint + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + Sum scan_op, + InitialValueT initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + for (int i = 0; i < timing_timing_iterations; ++i) + { + thrust::exclusive_scan(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper); + } + } + + return cudaSuccess; +} + + +/** + * Dispatch to inclusive scan entrypoint + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + NullType initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + for (int i = 0; i < timing_timing_iterations; ++i) + { + thrust::inclusive_scan(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper, scan_op); + } + } + + return cudaSuccess; +} + + +/** + * Dispatch to inclusive sum entrypoint + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + Sum scan_op, + NullType initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + for (int i = 0; i < timing_timing_iterations; ++i) + { + thrust::inclusive_scan(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper); + } + } + + return cudaSuccess; +} + + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceScan + */ +template +__global__ void CnpDispatchKernel( + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + InitialValueT initial_value, + OffsetT num_items, + bool debug_synchronous) +{ +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch( + Int2Type(), + is_primitive, + timing_timing_iterations, + d_temp_storage_bytes, + d_cdp_error, + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + initial_value, + num_items, + 0, + debug_synchronous); + + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + IsPrimitiveT is_primitive, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + ScanOpT scan_op, + InitialValueT initial_value, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>( + is_primitive, + timing_timing_iterations, + d_temp_storage_bytes, + d_cdp_error, + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + initial_value, + num_items, + debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +template +void Initialize( + GenMode gen_mode, + T *h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + +/** + * Solve exclusive-scan problem + */ +template < + typename InputIteratorT, + typename OutputT, + typename ScanOpT> +void Solve( + InputIteratorT h_in, + OutputT *h_reference, + int num_items, + ScanOpT scan_op, + OutputT initial_value) +{ + if (num_items > 0) + { + OutputT val = h_in[0]; + h_reference[0] = initial_value; + OutputT inclusive = scan_op(initial_value, val); + + for (int i = 1; i < num_items; ++i) + { + val = h_in[i]; + h_reference[i] = inclusive; + inclusive = scan_op(inclusive, val); + } + } +} + + +/** + * Solve inclusive-scan problem + */ +template < + typename InputIteratorT, + typename OutputT, + typename ScanOpT> +void Solve( + InputIteratorT h_in, + OutputT *h_reference, + int num_items, + ScanOpT scan_op, + NullType) +{ + if (num_items > 0) + { + OutputT inclusive = h_in[0]; + h_reference[0] = inclusive; + + for (int i = 1; i < num_items; ++i) + { + OutputT val = h_in[i]; + inclusive = scan_op(inclusive, val); + h_reference[i] = inclusive; + } + } +} + + +/** + * Test DeviceScan for a given problem input + */ +template < + Backend BACKEND, + typename DeviceInputIteratorT, + typename OutputT, + typename ScanOpT, + typename InitialValueT> +void Test( + DeviceInputIteratorT d_in, + OutputT *h_reference, + int num_items, + ScanOpT scan_op, + InitialValueT initial_value) +{ + typedef typename std::iterator_traits::value_type InputT; + + // Allocate device output array + OutputT *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(OutputT) * num_items)); + + // Allocate CDP device arrays + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch( + Int2Type(), + Int2Type::PRIMITIVE>(), + 1, + d_temp_storage_bytes, + d_cdp_error, + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + initial_value, + num_items, + 0, + true)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Clear device output array + CubDebugExit(cudaMemset(d_out, 0, sizeof(OutputT) * num_items)); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch( + Int2Type(), + Int2Type::PRIMITIVE>(), + 1, + d_temp_storage_bytes, + d_cdp_error, + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + initial_value, + num_items, + 0, + true)); + + // Check for correctness (and display results, if specified) + int compare = CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose); + printf("\t%s", compare ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + CubDebugExit(Dispatch(Int2Type(), + Int2Type::PRIMITIVE>(), + g_timing_iterations, + d_temp_storage_bytes, + d_cdp_error, + d_temp_storage, + temp_storage_bytes, + d_in, + d_out, + scan_op, + initial_value, + num_items, + 0, + false)); + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = giga_rate * (sizeof(InputT) + sizeof(OutputT)); + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s, %.1f%% peak", avg_millis, giga_rate, giga_bandwidth, giga_bandwidth / g_device_giga_bandwidth * 100.0); + } + + printf("\n\n"); + + // Cleanup + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare); +} + + +/** + * Test DeviceScan on pointer type + */ +template < + Backend BACKEND, + typename InputT, + typename OutputT, + typename ScanOpT, + typename InitialValueT> +void TestPointer( + int num_items, + GenMode gen_mode, + ScanOpT scan_op, + InitialValueT initial_value) +{ + printf("\nPointer %s %s cub::DeviceScan::%s %d items, %s->%s (%d->%d bytes) , gen-mode %s\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + (Equals::VALUE) ? "Inclusive" : "Exclusive", + (Equals::VALUE) ? "Sum" : "Scan", + num_items, + typeid(InputT).name(), typeid(OutputT).name(), (int) sizeof(InputT), (int) sizeof(OutputT), + (gen_mode == RANDOM) ? "RANDOM" : (gen_mode == INTEGER_SEED) ? "SEQUENTIAL" : "HOMOGENOUS"); + fflush(stdout); + + // Allocate host arrays + InputT* h_in = new InputT[num_items]; + OutputT* h_reference = new OutputT[num_items]; + + // Initialize problem and solution + Initialize(gen_mode, h_in, num_items); + Solve(h_in, h_reference, num_items, scan_op, initial_value); + + // Allocate problem device arrays + InputT *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(InputT) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(InputT) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_in, h_reference, num_items, scan_op, initial_value); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); +} + + +/** + * Test DeviceScan on iterator type + */ +template < + Backend BACKEND, + typename InputT, + typename OutputT, + typename ScanOpT, + typename InitialValueT> +void TestIterator( + int num_items, + ScanOpT scan_op, + InitialValueT initial_value) +{ + printf("\nIterator %s %s cub::DeviceScan::%s %d items, %s->%s (%d->%d bytes)\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + (Equals::VALUE) ? "Inclusive" : "Exclusive", + (Equals::VALUE) ? "Sum" : "Scan", + num_items, + typeid(InputT).name(), typeid(OutputT).name(), (int) sizeof(InputT), (int) sizeof(OutputT)); + fflush(stdout); + + // Use a constant iterator as the input + InputT val = InputT(); + ConstantInputIterator h_in(val); + + // Allocate host arrays + OutputT* h_reference = new OutputT[num_items]; + + // Initialize problem and solution + Solve(h_in, h_reference, num_items, scan_op, initial_value); + + // Run Test + Test(h_in, h_reference, num_items, scan_op, initial_value); + + // Cleanup + if (h_reference) delete[] h_reference; +} + + +/** + * Test different gen modes + */ +template < + Backend BACKEND, + typename InputT, + typename OutputT, + typename ScanOpT, + typename InitialValueT> +void Test( + int num_items, + ScanOpT scan_op, + InitialValueT initial_value) +{ + TestPointer( num_items, UNIFORM, scan_op, initial_value); + TestPointer( num_items, RANDOM, scan_op, initial_value); + TestIterator( num_items, scan_op, initial_value); +} + + +/** + * Test different dispatch + */ +template < + typename InputT, + typename OutputT, + typename ScanOpT, + typename InitialValueT> +void Test( + int num_items, + ScanOpT scan_op, + InitialValueT initial_value) +{ + Test(num_items, scan_op, initial_value); +#ifdef CUB_CDP + Test(num_items, scan_op, initial_value); +#endif +} + + +/** + * Test different operators + */ +template +void TestOp( + int num_items, + OutputT identity, + OutputT initial_value) +{ + // Exclusive (use identity as initial value because it will dispatch to *Sum variants that don't take initial values) + Test(num_items, cub::Sum(), identity); + Test(num_items, cub::Max(), identity); + + // Exclusive (non-specialized, so we can test initial-value) + Test(num_items, WrapperFunctor(cub::Sum()), initial_value); + Test(num_items, WrapperFunctor(cub::Max()), initial_value); + + // Inclusive (no initial value) + Test(num_items, cub::Sum(), NullType()); + Test(num_items, cub::Max(), NullType()); +} + + +/** + * Test different input sizes + */ +template < + typename InputT, + typename OutputT> +void TestSize( + int num_items, + OutputT identity, + OutputT initial_value) +{ + if (num_items < 0) + { + TestOp(0, identity, initial_value); + TestOp(1, identity, initial_value); + TestOp(100, identity, initial_value); + TestOp(10000, identity, initial_value); + TestOp(1000000, identity, initial_value); + + // Randomly select problem size between 1:10,000,000 + unsigned int max_int = (unsigned int) -1; + for (int i = 0; i < 10; ++i) + { + unsigned int num_items; + RandomBits(num_items); + num_items = (unsigned int) ((double(num_items) * double(10000000)) / double(max_int)); + num_items = CUB_MAX(1, num_items); + TestOp(num_items, identity, initial_value); + } + } + else + { + TestOp(num_items, identity, initial_value); + } +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = -1; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--i= " + "[--device=] " + "[--repeat=]" + "[--v] " + "[--cdp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + g_device_giga_bandwidth = args.device_giga_bandwidth; + printf("\n"); + +#ifdef QUICKER_TEST + + // Compile/run basic CUB test + if (num_items < 0) num_items = 32000000; + + TestPointer( num_items , UNIFORM, Sum(), (int) (0)); + TestPointer( num_items , UNIFORM, Sum(), (int) (0)); + +#elif defined(QUICK_TEST) + + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get device SM version + int sm_version; + CubDebugExit(SmVersion(sm_version, device_ordinal)); + + // Compile/run quick tests + if (num_items < 0) num_items = 32000000; + + TestPointer( num_items * ((sm_version <= 130) ? 1 : 4), UNIFORM, Sum(), char(0)); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 4), UNIFORM, Sum(), char(0)); + + printf("----------------------------\n"); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 2), UNIFORM, Sum(), short(0)); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 2), UNIFORM, Sum(), short(0)); + + printf("----------------------------\n"); + TestPointer( num_items , UNIFORM, Sum(), (int) (0)); + TestPointer( num_items , UNIFORM, Sum(), (int) (0)); + + printf("----------------------------\n"); + TestPointer( num_items / 2, UNIFORM, Sum(), (long long) (0)); + TestPointer(num_items / 2, UNIFORM, Sum(), (long long) (0)); + + printf("----------------------------\n"); + TestPointer( num_items / 4, UNIFORM, Sum(), TestBar()); + TestPointer( num_items / 4, UNIFORM, Sum(), TestBar()); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test different input+output data types + TestSize(num_items, (int) 0, (int) 99); + + // Test same intput+output data types + TestSize(num_items, (unsigned char) 0, (unsigned char) 99); + TestSize(num_items, (char) 0, (char) 99); + TestSize(num_items, (unsigned short) 0, (unsigned short)99); + TestSize(num_items, (unsigned int) 0, (unsigned int) 99); + TestSize(num_items, (unsigned long long) 0, (unsigned long long) 99); + + TestSize(num_items, make_uchar2(0, 0), make_uchar2(17, 21)); + TestSize(num_items, make_char2(0, 0), make_char2(17, 21)); + TestSize(num_items, make_ushort2(0, 0), make_ushort2(17, 21)); + TestSize(num_items, make_uint2(0, 0), make_uint2(17, 21)); + TestSize(num_items, make_ulonglong2(0, 0), make_ulonglong2(17, 21)); + TestSize(num_items, make_uchar4(0, 0, 0, 0), make_uchar4(17, 21, 32, 85)); + TestSize(num_items, make_char4(0, 0, 0, 0), make_char4(17, 21, 32, 85)); + + TestSize(num_items, make_ushort4(0, 0, 0, 0), make_ushort4(17, 21, 32, 85)); + TestSize(num_items, make_uint4(0, 0, 0, 0), make_uint4(17, 21, 32, 85)); + TestSize(num_items, make_ulonglong4(0, 0, 0, 0), make_ulonglong4(17, 21, 32, 85)); + + TestSize(num_items, + TestFoo::MakeTestFoo(0, 0, 0, 0), + TestFoo::MakeTestFoo(1ll << 63, 1 << 31, short(1 << 15), char(1 << 7))); + + TestSize(num_items, + TestBar(0, 0), + TestBar(1ll << 63, 1 << 31)); + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_if.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_if.cu new file mode 100644 index 0000000..9bdca34 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_if.cu @@ -0,0 +1,1039 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceSelect::If and DevicePartition::If utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +float g_device_giga_bandwidth; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +// Selection functor type +template +struct LessThan +{ + T compare; + + __host__ __device__ __forceinline__ + LessThan(T compare) : compare(compare) {} + + __host__ __device__ __forceinline__ + bool operator()(const T &a) const { + return (a < compare); + } +}; + +//--------------------------------------------------------------------- +// Dispatch to different CUB DeviceSelect entrypoints +//--------------------------------------------------------------------- + + +/** + * Dispatch to select if entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSelect::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to partition if entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DevicePartition::If(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, select_op, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to select flagged entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSelect::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, stream, debug_synchronous); + } + return error; +} + + +/** + * Dispatch to partition flagged entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DevicePartition::Flagged(d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, stream, debug_synchronous); + } + return error; +} + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + +/** + * Dispatch to select if entrypoint + */ +template +__host__ __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_out_wrapper_end; + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_wrapper_end = thrust::copy_if(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper, select_op); + } + + OffsetT num_selected = OffsetT(d_out_wrapper_end - d_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_selected_out, &num_selected, sizeof(OffsetT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + +/** + * Dispatch to partition if entrypoint + */ +template +__host__ __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + typedef thrust::reverse_iterator > ReverseOutputIteratorT; + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::pair, ReverseOutputIteratorT> d_out_wrapper_end; + + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + + ReverseOutputIteratorT d_out_unselected(d_out_wrapper + num_items); + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_wrapper_end = thrust::partition_copy( + d_in_wrapper, + d_in_wrapper + num_items, + d_out_wrapper, + d_out_unselected, + select_op); + } + + OffsetT num_selected = OffsetT(d_out_wrapper_end.first - d_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_selected_out, &num_selected, sizeof(OffsetT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + +/** + * Dispatch to select flagged entrypoint + */ +template +__host__ __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The flag type + typedef typename std::iterator_traits::value_type FlagT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_out_wrapper_end; + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + thrust::device_ptr d_flags_wrapper(d_flags); + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_wrapper_end = thrust::copy_if(d_in_wrapper, d_in_wrapper + num_items, d_flags_wrapper, d_out_wrapper, CastOp()); + } + + OffsetT num_selected = OffsetT(d_out_wrapper_end - d_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_selected_out, &num_selected, sizeof(OffsetT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + +/** + * Dispatch to partition flagged entrypoint + */ +template +__host__ __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + Int2Type is_flagged, + Int2Type is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // The flag type + typedef typename std::iterator_traits::value_type FlagT; + + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + typedef thrust::reverse_iterator > ReverseOutputIteratorT; + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::pair, ReverseOutputIteratorT> d_out_wrapper_end; + + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + thrust::device_ptr d_flags_wrapper(d_flags); + ReverseOutputIteratorT d_out_unselected(d_out_wrapper + num_items); + + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_wrapper_end = thrust::partition_copy( + d_in_wrapper, + d_in_wrapper + num_items, + d_flags_wrapper, + d_out_wrapper, + d_out_unselected, + CastOp()); + } + + OffsetT num_selected = OffsetT(d_out_wrapper_end.first - d_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_selected_out, &num_selected, sizeof(OffsetT), cudaMemcpyHostToDevice)); + } + + return cudaSuccess; +} + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceSelect + */ +template +__global__ void CnpDispatchKernel( + IsFlaggedTag is_flagged, + IsPartitionTag is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + bool debug_synchronous) +{ + +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(Int2Type(), is_flagged, is_partition, timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, debug_synchronous); + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + IsFlaggedTag is_flagged, + IsPartitionTag is_partition, + int timing_timing_iterations, + size_t* d_temp_storage_bytes, + cudaError_t* d_cdp_error, + + void* d_temp_storage, + size_t& temp_storage_bytes, + InputIteratorT d_in, + FlagIteratorT d_flags, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + SelectOpT select_op, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(is_flagged, is_partition, timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +template +void Initialize( + T* h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + // Initialize each item to a randomly selected value from [0..126] + unsigned int value; + RandomBits(value, 0, 0, 7); + if (value == 127) + value = 126; + InitValue(INTEGER_SEED, h_in[i], value); + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve selection problem (and set corresponding flags) + */ +template < + typename InputIteratorT, + typename FlagIteratorT, + typename SelectOpT, + typename T> +int Solve( + InputIteratorT h_in, + SelectOpT select_op, + T* h_reference, + FlagIteratorT h_flags, + int num_items) +{ + int num_selected = 0; + for (int i = 0; i < num_items; ++i) + { + if ((h_flags[i] = select_op(h_in[i]))) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + else + { + h_reference[num_items - (i - num_selected) - 1] = h_in[i]; + } + } + + return num_selected; +} + + + +/** + * Test DeviceSelect for a given problem input + */ +template < + Backend BACKEND, + bool IS_FLAGGED, + bool IS_PARTITION, + typename DeviceInputIteratorT, + typename FlagT, + typename SelectOpT, + typename T> +void Test( + DeviceInputIteratorT d_in, + FlagT* h_flags, + SelectOpT select_op, + T* h_reference, + int num_selected, + int num_items) +{ + // Allocate device flags, output, and num-selected + FlagT* d_flags = NULL; + T* d_out = NULL; + int* d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(FlagT) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate CDP device arrays + size_t* d_temp_storage_bytes = NULL; + cudaError_t* d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch(Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, true)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Copy flags and clear device output array + CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(FlagT) * num_items, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * num_items)); + CubDebugExit(cudaMemset(d_num_selected_out, 0, sizeof(int))); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch(Int2Type(), Int2Type(), Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, true)); + + // Check for correctness (and display results, if specified) + int compare1 = (IS_PARTITION) ? + CompareDeviceResults(h_reference, d_out, num_items, true, g_verbose) : + CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose); + printf("\t Data %s\n", compare1 ? "FAIL" : "PASS"); + + int compare2 = CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s\n", compare2 ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + CubDebugExit(Dispatch(Int2Type(), Int2Type(), Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_flags, d_out, d_num_selected_out, num_items, select_op, 0, false)); + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + int num_output_items = (IS_PARTITION) ? num_items : num_selected; + int num_flag_items = (IS_FLAGGED) ? num_items : 0; + size_t num_bytes = sizeof(T) * (num_items + num_output_items) + sizeof(FlagT) * num_flag_items; + float giga_bandwidth = float(num_bytes) / avg_millis / 1000.0f / 1000.0f; + + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s, %.1f%% peak", avg_millis, giga_rate, giga_bandwidth, giga_bandwidth / g_device_giga_bandwidth * 100.0); + } + printf("\n\n"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Cleanup + if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare1 | compare2); +} + + +/** + * Test on pointer type + */ +template < + Backend BACKEND, + bool IS_FLAGGED, + bool IS_PARTITION, + typename T> +void TestPointer( + int num_items, + float select_ratio) +{ + typedef char FlagT; + + // Allocate host arrays + T* h_in = new T[num_items]; + FlagT* h_flags = new FlagT[num_items]; + T* h_reference = new T[num_items]; + + // Initialize input + Initialize(h_in, num_items); + + // Select a comparison value that is select_ratio through the space of [0,127] + T compare; + if (select_ratio <= 0.0) + InitValue(INTEGER_SEED, compare, 0); // select none + else if (select_ratio >= 1.0) + InitValue(INTEGER_SEED, compare, 127); // select all + else + InitValue(INTEGER_SEED, compare, int(double(double(127) * select_ratio))); + + LessThan select_op(compare); + int num_selected = Solve(h_in, select_op, h_reference, h_flags, num_items); + + if (g_verbose) std::cout << "\nComparison item: " << compare << "\n"; + printf("\nPointer %s cub::%s::%s %d items, %d selected (select ratio %.3f), %s %d-byte elements\n", + (IS_PARTITION) ? "DevicePartition" : "DeviceSelect", + (IS_FLAGGED) ? "Flagged" : "If", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_selected, float(num_selected) / num_items, typeid(T).name(), (int) sizeof(T)); + fflush(stdout); + + // Allocate problem device arrays + T *d_in = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_in, h_flags, select_op, h_reference, num_selected, num_items); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (h_flags) delete[] h_flags; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); +} + + +/** + * Test on iterator type + */ +template < + Backend BACKEND, + bool IS_FLAGGED, + bool IS_PARTITION, + typename T> +void TestIterator( + int num_items, + float select_ratio) +{ + typedef char FlagT; + + // Allocate host arrays + T* h_reference = new T[num_items]; + FlagT* h_flags = new FlagT[num_items]; + + // Use counting iterator as the input + CountingInputIterator h_in(0); + + // Select a comparison value that is select_ratio through the space of [0,127] + T compare; + if (select_ratio <= 0.0) + InitValue(INTEGER_SEED, compare, 0); // select none + else if (select_ratio >= 1.0) + InitValue(INTEGER_SEED, compare, 127); // select all + else + InitValue(INTEGER_SEED, compare, int(double(double(127) * select_ratio))); + + LessThan select_op(compare); + int num_selected = Solve(h_in, select_op, h_reference, h_flags, num_items); + + if (g_verbose) std::cout << "\nComparison item: " << compare << "\n"; + printf("\nIterator %s cub::%s::%s %d items, %d selected (select ratio %.3f), %s %d-byte elements\n", + (IS_PARTITION) ? "DevicePartition" : "DeviceSelect", + (IS_FLAGGED) ? "Flagged" : "If", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_selected, float(num_selected) / num_items, typeid(T).name(), (int) sizeof(T)); + fflush(stdout); + + // Run Test + Test(h_in, h_flags, select_op, h_reference, num_selected, num_items); + + // Cleanup + if (h_reference) delete[] h_reference; + if (h_flags) delete[] h_flags; +} + + +/** + * Test different selection ratios + */ +template < + Backend BACKEND, + bool IS_FLAGGED, + bool IS_PARTITION, + typename T> +void Test( + int num_items) +{ + for (float select_ratio = 0.0f; select_ratio <= 1.0f; select_ratio += 0.2f) + { + TestPointer(num_items, select_ratio); + } +} + + +/** + * Test (select vs. partition) and (flagged vs. functor) + */ +template < + Backend BACKEND, + typename T> +void TestMethod( + int num_items) +{ + // Functor + Test(num_items); + Test(num_items); + + // Flagged + Test(num_items); + Test(num_items); +} + + +/** + * Test different dispatch + */ +template < + typename T> +void TestOp( + int num_items) +{ + TestMethod(num_items); +#ifdef CUB_CDP + TestMethod(num_items); +#endif +} + + +/** + * Test different input sizes + */ +template +void Test( + int num_items) +{ + if (num_items < 0) + { + TestOp(0); + TestOp(1); + TestOp(100); + TestOp(10000); + TestOp(1000000); + } + else + { + TestOp(num_items); + } +} + +/** + * Test select/partition on pointer types + */ +template +void ComparePointer( + int num_items, + float select_ratio) +{ + printf("-- Select-if ----------------------------\n"); + TestPointer(num_items, select_ratio); + TestPointer(num_items, select_ratio); + + printf("-- Partition-if ----------------------------\n"); + TestPointer(num_items, select_ratio); + TestPointer(num_items, select_ratio); + + printf("-- Select-flagged ----------------------------\n"); + TestPointer(num_items, select_ratio); + TestPointer(num_items, select_ratio); + + printf("-- Partition-flagged ----------------------------\n"); + TestPointer(num_items, select_ratio); + TestPointer(num_items, select_ratio); + +} + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = -1; + float select_ratio = 0.5; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("ratio", select_ratio); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--i= " + "[--device=] " + "[--ratio=] " + "[--repeat=] " + "[--v] " + "[--cdp] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + g_device_giga_bandwidth = args.device_giga_bandwidth; + printf("\n"); + +#ifdef QUICKER_TEST + + // Compile/run basic CUB test + if (num_items < 0) num_items = 32000000; + + printf("-- Select-if ----------------------------\n"); + TestPointer(num_items, select_ratio); + + printf("-- Partition-if ----------------------------\n"); + TestPointer(num_items, select_ratio); + + printf("-- Select-flagged ----------------------------\n"); + TestPointer(num_items, select_ratio); + + printf("-- Partition-flagged ----------------------------\n"); + TestPointer(num_items, select_ratio); + + +#elif defined(QUICK_TEST) + + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get device SM version + int sm_version; + CubDebugExit(SmVersion(sm_version, device_ordinal)); + + // Compile/run quick tests + if (num_items < 0) num_items = 32000000; + + printf("-- Iterator ----------------------------\n"); + TestIterator(num_items, select_ratio); + + ComparePointer( num_items * ((sm_version <= 130) ? 1 : 4), select_ratio); + ComparePointer( num_items * ((sm_version <= 130) ? 1 : 2), select_ratio); + ComparePointer( num_items, select_ratio); + ComparePointer( num_items / 2, select_ratio); + ComparePointer( num_items / 4, select_ratio); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test different input types + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_unique.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_unique.cu new file mode 100644 index 0000000..fff2958 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_device_select_unique.cu @@ -0,0 +1,651 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of DeviceSelect::Unique utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include + +#include +#include +#include + +#include +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_timing_iterations = 0; +int g_repeat = 0; +float g_device_giga_bandwidth; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +//--------------------------------------------------------------------- +// Dispatch to different CUB DeviceSelect entrypoints +//--------------------------------------------------------------------- + + +/** + * Dispatch to unique entrypoint + */ +template +CUB_RUNTIME_FUNCTION __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + cudaError_t error = cudaSuccess; + for (int i = 0; i < timing_timing_iterations; ++i) + { + error = DeviceSelect::Unique(d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, stream, debug_synchronous); + } + return error; +} + + +//--------------------------------------------------------------------- +// Dispatch to different Thrust entrypoints +//--------------------------------------------------------------------- + + +/** + * Dispatch to unique entrypoint + */ +template +__host__ __forceinline__ +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void *d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // The input value type + typedef typename std::iterator_traits::value_type InputT; + + // The output value type + typedef typename If<(Equals::value_type, void>::VALUE), // OutputT = (if output iterator's value type is void) ? + typename std::iterator_traits::value_type, // ... then the input iterator's value type, + typename std::iterator_traits::value_type>::Type OutputT; // ... else the output iterator's value type + + if (d_temp_storage == 0) + { + temp_storage_bytes = 1; + } + else + { + thrust::device_ptr d_out_wrapper_end; + thrust::device_ptr d_in_wrapper(d_in); + thrust::device_ptr d_out_wrapper(d_out); + for (int i = 0; i < timing_timing_iterations; ++i) + { + d_out_wrapper_end = thrust::unique_copy(d_in_wrapper, d_in_wrapper + num_items, d_out_wrapper); + } + + OffsetT num_selected = OffsetT(d_out_wrapper_end - d_out_wrapper); + CubDebugExit(cudaMemcpy(d_num_selected_out, &num_selected, sizeof(OffsetT), cudaMemcpyHostToDevice)); + + } + + return cudaSuccess; +} + + + +//--------------------------------------------------------------------- +// CUDA Nested Parallelism Test Kernel +//--------------------------------------------------------------------- + +/** + * Simple wrapper kernel to invoke DeviceSelect + */ +template +__global__ void CnpDispatchKernel( + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + bool debug_synchronous) +{ + +#ifndef CUB_CDP + *d_cdp_error = cudaErrorNotSupported; +#else + *d_cdp_error = Dispatch(Int2Type(), timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, 0, debug_synchronous); + *d_temp_storage_bytes = temp_storage_bytes; +#endif +} + + +/** + * Dispatch to CDP kernel + */ +template +cudaError_t Dispatch( + Int2Type dispatch_to, + int timing_timing_iterations, + size_t *d_temp_storage_bytes, + cudaError_t *d_cdp_error, + + void* d_temp_storage, + size_t &temp_storage_bytes, + InputIteratorT d_in, + OutputIteratorT d_out, + NumSelectedIteratorT d_num_selected_out, + OffsetT num_items, + cudaStream_t stream, + bool debug_synchronous) +{ + // Invoke kernel to invoke device-side dispatch + CnpDispatchKernel<<<1,1>>>(timing_timing_iterations, d_temp_storage_bytes, d_cdp_error, + d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, debug_synchronous); + + // Copy out temp_storage_bytes + CubDebugExit(cudaMemcpy(&temp_storage_bytes, d_temp_storage_bytes, sizeof(size_t) * 1, cudaMemcpyDeviceToHost)); + + // Copy out error + cudaError_t retval; + CubDebugExit(cudaMemcpy(&retval, d_cdp_error, sizeof(cudaError_t) * 1, cudaMemcpyDeviceToHost)); + return retval; +} + + + +//--------------------------------------------------------------------- +// Test generation +//--------------------------------------------------------------------- + + +/** + * Initialize problem + */ +template +void Initialize( + int entropy_reduction, + T *h_in, + int num_items, + int max_segment) +{ + unsigned int max_int = (unsigned int) -1; + + int key = 0; + int i = 0; + while (i < num_items) + { + // Select number of repeating occurrences for the current run + int repeat; + if (max_segment < 0) + { + repeat = num_items; + } + else if (max_segment < 2) + { + repeat = 1; + } + else + { + RandomBits(repeat, entropy_reduction); + repeat = (int) ((double(repeat) * double(max_segment)) / double(max_int)); + repeat = CUB_MAX(1, repeat); + } + + int j = i; + while (j < CUB_MIN(i + repeat, num_items)) + { + InitValue(INTEGER_SEED, h_in[j], key); + j++; + } + + i = j; + key++; + } + + if (g_verbose) + { + printf("Input:\n"); + DisplayResults(h_in, num_items); + printf("\n\n"); + } +} + + +/** + * Solve unique problem + */ +template < + typename InputIteratorT, + typename T> +int Solve( + InputIteratorT h_in, + T *h_reference, + int num_items) +{ + int num_selected = 0; + if (num_items > 0) + { + h_reference[num_selected] = h_in[0]; + num_selected++; + } + + for (int i = 1; i < num_items; ++i) + { + if (h_in[i] != h_in[i - 1]) + { + h_reference[num_selected] = h_in[i]; + num_selected++; + } + } + + return num_selected; +} + + + +/** + * Test DeviceSelect for a given problem input + */ +template < + Backend BACKEND, + typename DeviceInputIteratorT, + typename T> +void Test( + DeviceInputIteratorT d_in, + T *h_reference, + int num_selected, + int num_items) +{ + // Allocate device output array and num selected + T *d_out = NULL; + int *d_num_selected_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * num_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_num_selected_out, sizeof(int))); + + // Allocate CDP device arrays + size_t *d_temp_storage_bytes = NULL; + cudaError_t *d_cdp_error = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_temp_storage_bytes, sizeof(size_t) * 1)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_cdp_error, sizeof(cudaError_t) * 1)); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(Dispatch(Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, 0, true)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Clear device output array + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * num_items)); + CubDebugExit(cudaMemset(d_num_selected_out, 0, sizeof(int))); + + // Run warmup/correctness iteration + CubDebugExit(Dispatch(Int2Type(), 1, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, 0, true)); + + // Check for correctness (and display results, if specified) + int compare1 = CompareDeviceResults(h_reference, d_out, num_selected, true, g_verbose); + printf("\t Data %s ", compare1 ? "FAIL" : "PASS"); + + int compare2 = CompareDeviceResults(&num_selected, d_num_selected_out, 1, true, g_verbose); + printf("\t Count %s ", compare2 ? "FAIL" : "PASS"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Performance + GpuTimer gpu_timer; + gpu_timer.Start(); + CubDebugExit(Dispatch(Int2Type(), g_timing_iterations, d_temp_storage_bytes, d_cdp_error, d_temp_storage, temp_storage_bytes, d_in, d_out, d_num_selected_out, num_items, 0, false)); + gpu_timer.Stop(); + float elapsed_millis = gpu_timer.ElapsedMillis(); + + // Display performance + if (g_timing_iterations > 0) + { + float avg_millis = elapsed_millis / g_timing_iterations; + float giga_rate = float(num_items) / avg_millis / 1000.0f / 1000.0f; + float giga_bandwidth = float((num_items + num_selected) * sizeof(T)) / avg_millis / 1000.0f / 1000.0f; + printf(", %.3f avg ms, %.3f billion items/s, %.3f logical GB/s, %.1f%% peak", avg_millis, giga_rate, giga_bandwidth, giga_bandwidth / g_device_giga_bandwidth * 100.0); + } + printf("\n\n"); + + // Flush any stdout/stderr + fflush(stdout); + fflush(stderr); + + // Cleanup + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_num_selected_out) CubDebugExit(g_allocator.DeviceFree(d_num_selected_out)); + if (d_temp_storage_bytes) CubDebugExit(g_allocator.DeviceFree(d_temp_storage_bytes)); + if (d_cdp_error) CubDebugExit(g_allocator.DeviceFree(d_cdp_error)); + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + + // Correctness asserts + AssertEquals(0, compare1 | compare2); +} + + +/** + * Test DeviceSelect on pointer type + */ +template < + Backend BACKEND, + typename T> +void TestPointer( + int num_items, + int entropy_reduction, + int max_segment) +{ + // Allocate host arrays + T* h_in = new T[num_items]; + T* h_reference = new T[num_items]; + + // Initialize problem and solution + Initialize(entropy_reduction, h_in, num_items, max_segment); + int num_selected = Solve(h_in, h_reference, num_items); + + printf("\nPointer %s cub::DeviceSelect::Unique %d items, %d selected (avg run length %.3f), %s %d-byte elements, entropy_reduction %d\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_selected, float(num_items) / num_selected, + typeid(T).name(), + (int) sizeof(T), + entropy_reduction); + fflush(stdout); + + // Allocate problem device arrays + T *d_in = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * num_items)); + + // Initialize device input + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * num_items, cudaMemcpyHostToDevice)); + + // Run Test + Test(d_in, h_reference, num_selected, num_items); + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); +} + + +/** + * Test DeviceSelect on iterator type + */ +template < + Backend BACKEND, + typename T> +void TestIterator( + int num_items) +{ + // Use a counting iterator as the input + CountingInputIterator h_in(0); + + // Allocate host arrays + T* h_reference = new T[num_items]; + + // Initialize problem and solution + int num_selected = Solve(h_in, h_reference, num_items); + + printf("\nIterator %s cub::DeviceSelect::Unique %d items, %d selected (avg run length %.3f), %s %d-byte elements\n", + (BACKEND == CDP) ? "CDP CUB" : (BACKEND == THRUST) ? "Thrust" : "CUB", + num_items, num_selected, float(num_items) / num_selected, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + // Run Test + Test(h_in, h_reference, num_selected, num_items); + + // Cleanup + if (h_reference) delete[] h_reference; +} + + +/** + * Test different gen modes + */ +template < + Backend BACKEND, + typename T> +void Test( + int num_items) +{ + for (int max_segment = 1; ((max_segment > 0) && (max_segment < num_items)); max_segment *= 11) + { + TestPointer(num_items, 0, max_segment); + TestPointer(num_items, 2, max_segment); + TestPointer(num_items, 7, max_segment); + } +} + + +/** + * Test different dispatch + */ +template < + typename T> +void TestOp( + int num_items) +{ + Test(num_items); +#ifdef CUB_CDP + Test(num_items); +#endif +} + + +/** + * Test different input sizes + */ +template +void Test( + int num_items) +{ + if (num_items < 0) + { + TestOp(0); + TestOp(1); + TestOp(100); + TestOp(10000); + TestOp(1000000); + } + else + { + TestOp(num_items); + } +} + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + int num_items = -1; + int entropy_reduction = 0; + int maxseg = 1000; + + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("n", num_items); + args.GetCmdLineArgument("i", g_timing_iterations); + args.GetCmdLineArgument("repeat", g_repeat); + args.GetCmdLineArgument("maxseg", maxseg); + args.GetCmdLineArgument("entropy", entropy_reduction); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--n= " + "[--i= " + "[--device=] " + "[--maxseg=]" + "[--entropy=]" + "[--repeat=]" + "[--v] " + "[--cdp]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + g_device_giga_bandwidth = args.device_giga_bandwidth; + printf("\n"); + +#ifdef QUICKER_TEST + + // Compile/run basic CUB test + if (num_items < 0) num_items = 32000000; + TestPointer( num_items, entropy_reduction, maxseg); + +#elif defined(QUICK_TEST) + + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get device SM version + int sm_version; + CubDebugExit(SmVersion(sm_version, device_ordinal)); + + // Compile/run quick tests + if (num_items < 0) num_items = 32000000; + + printf("-- Iterator ----------------------------\n"); + TestIterator( num_items); + + printf("----------------------------\n"); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 4), entropy_reduction, maxseg); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 4), entropy_reduction, maxseg); + + printf("----------------------------\n"); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 2), entropy_reduction, maxseg); + TestPointer( num_items * ((sm_version <= 130) ? 1 : 2), entropy_reduction, maxseg); + + printf("----------------------------\n"); + TestPointer( num_items, entropy_reduction, maxseg); + TestPointer( num_items, entropy_reduction, maxseg); + + printf("----------------------------\n"); + TestPointer( num_items / 2, entropy_reduction, maxseg); + TestPointer(num_items / 2, entropy_reduction, maxseg); + + printf("----------------------------\n"); + TestPointer( num_items / 4, entropy_reduction, maxseg); + TestPointer( num_items / 4, entropy_reduction, maxseg); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test different input types + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + Test(num_items); + Test(num_items); + + Test(num_items); + Test(num_items); + } + +#endif + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_grid_barrier.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_grid_barrier.cu new file mode 100644 index 0000000..e6e3b81 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_grid_barrier.cu @@ -0,0 +1,152 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test evaluation for software global barrier throughput + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include + +#include + +#include "test_util.h" + +using namespace cub; + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * Kernel that iterates through the specified number of software global barriers + */ +__global__ void Kernel( + GridBarrier global_barrier, + int iterations) +{ + for (int i = 0; i < iterations; i++) + { + global_barrier.Sync(); + } +} + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + cudaError_t retval = cudaSuccess; + + // Defaults + int iterations = 10000; + int block_size = 128; + int grid_size = -1; + + // Initialize command line + CommandLineArgs args(argc, argv); + + // Get args + args.GetCmdLineArgument("i", iterations); + args.GetCmdLineArgument("grid-size", grid_size); + args.GetCmdLineArgument("block-size", block_size); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=]" + "[--i=]" + "[--grid-size]" + "[--block-size]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get device SM version + int sm_version; + CubDebugExit(SmVersion(sm_version, device_ordinal)); + + // Get SM properties + int sm_count, max_block_threads, max_sm_occupancy; + CubDebugExit(cudaDeviceGetAttribute(&sm_count, cudaDevAttrMultiProcessorCount, device_ordinal)); + CubDebugExit(cudaDeviceGetAttribute(&max_block_threads, cudaDevAttrMaxThreadsPerBlock, device_ordinal)); + CubDebugExit(MaxSmOccupancy(max_sm_occupancy, EmptyKernel, 32)); + + // Compute grid size and occupancy + int occupancy = CUB_MIN((max_block_threads / block_size), max_sm_occupancy); + + if (grid_size == -1) + { + grid_size = occupancy * sm_count; + } + else + { + occupancy = grid_size / sm_count; + } + + printf("Initializing software global barrier for Kernel<<<%d,%d>>> with %d occupancy\n", + grid_size, block_size, occupancy); + fflush(stdout); + + // Init global barrier + GridBarrierLifetime global_barrier; + global_barrier.Setup(grid_size); + + // Time kernel + GpuTimer gpu_timer; + gpu_timer.Start(); + Kernel<<>>(global_barrier, iterations); + gpu_timer.Stop(); + + retval = CubDebug(cudaThreadSynchronize()); + + // Output timing results + float avg_elapsed = gpu_timer.ElapsedMillis() / float(iterations); + printf("%d iterations, %f total elapsed millis, %f avg elapsed millis\n", + iterations, + gpu_timer.ElapsedMillis(), + avg_elapsed); + + return retval; +} diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_iterator.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_iterator.cu new file mode 100644 index 0000000..cb9b47e --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_iterator.cu @@ -0,0 +1,805 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of iterator utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "test_util.h" + +#include +#include + +using namespace cub; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +CachingDeviceAllocator g_allocator(true); + +// Dispatch types +enum Backend +{ + CUB, // CUB method + THRUST, // Thrust method + CDP, // GPU-based (dynamic parallelism) dispatch to CUB method +}; + + +template +struct TransformOp +{ + // Increment transform + __host__ __device__ __forceinline__ T operator()(T input) const + { + T addend; + InitValue(INTEGER_SEED, addend, 1); + return input + addend; + } +}; + +struct SelectOp +{ + template + __host__ __device__ __forceinline__ bool operator()(T input) + { + return true; + } +}; + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * Test random access input iterator + */ +template < + typename InputIteratorT, + typename T> +__global__ void Kernel( + InputIteratorT d_in, + T *d_out, + InputIteratorT *d_itrs) +{ + d_out[0] = *d_in; // Value at offset 0 + d_out[1] = d_in[100]; // Value at offset 100 + d_out[2] = *(d_in + 1000); // Value at offset 1000 + d_out[3] = *(d_in + 10000); // Value at offset 10000 + + d_in++; + d_out[4] = d_in[0]; // Value at offset 1 + + d_in += 20; + d_out[5] = d_in[0]; // Value at offset 21 + d_itrs[0] = d_in; // Iterator at offset 21 + + d_in -= 10; + d_out[6] = d_in[0]; // Value at offset 11; + + d_in -= 11; + d_out[7] = d_in[0]; // Value at offset 0 + d_itrs[1] = d_in; // Iterator at offset 0 +} + + + +//--------------------------------------------------------------------- +// Host testing subroutines +//--------------------------------------------------------------------- + + +/** + * Run iterator test on device + */ +template < + typename InputIteratorT, + typename T, + int TEST_VALUES> +void Test( + InputIteratorT d_in, + T (&h_reference)[TEST_VALUES]) +{ + // Allocate device arrays + T *d_out = NULL; + InputIteratorT *d_itrs = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * TEST_VALUES)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_itrs, sizeof(InputIteratorT) * 2)); + + int compare; + + // Run unguarded kernel + Kernel<<<1, 1>>>(d_in, d_out, d_itrs); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Check results + compare = CompareDeviceResults(h_reference, d_out, TEST_VALUES, g_verbose, g_verbose); + printf("\tValues: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check iterator at offset 21 + InputIteratorT h_itr = d_in + 21; + compare = CompareDeviceResults(&h_itr, d_itrs, 1, g_verbose, g_verbose); + printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Check iterator at offset 0 + compare = CompareDeviceResults(&d_in, d_itrs + 1, 1, g_verbose, g_verbose); + printf("\tIterators: %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_itrs) CubDebugExit(g_allocator.DeviceFree(d_itrs)); +} + + +/** + * Test constant iterator + */ +template +void TestConstant(T base) +{ + printf("\nTesting constant iterator on type %s (base: %lld)\n", typeid(T).name(), (unsigned long long) (base)); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + T h_reference[8] = {base, base, base, base, base, base, base, base}; + ConstantInputIterator d_itr(base); + Test(d_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + int copy_items = 100; + T *h_copy = new T[copy_items]; + T *d_copy = NULL; + + for (int i = 0; i < copy_items; ++i) + h_copy[i] = d_itr[i]; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); + thrust::device_ptr d_copy_wrapper(d_copy); + + thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + if (h_copy) delete[] h_copy; + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION +} + + +/** + * Test counting iterator + */ +template +void TestCounting(T base) +{ + printf("\nTesting counting iterator on type %s (base: %d) \n", typeid(T).name(), int(base)); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + // Initialize reference data + T h_reference[8]; + h_reference[0] = base + 0; // Value at offset 0 + h_reference[1] = base + 100; // Value at offset 100 + h_reference[2] = base + 1000; // Value at offset 1000 + h_reference[3] = base + 10000; // Value at offset 10000 + h_reference[4] = base + 1; // Value at offset 1 + h_reference[5] = base + 21; // Value at offset 21 + h_reference[6] = base + 11; // Value at offset 11 + h_reference[7] = base + 0; // Value at offset 0; + + CountingInputIterator d_itr(base); + Test(d_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + unsigned long long max_items = ((1ull << ((sizeof(T) * 8) - 1)) - 1); + size_t copy_items = (size_t) CUB_MIN(max_items - base, 100); // potential issue with differencing overflows when T is a smaller type than can handle the offset + T *h_copy = new T[copy_items]; + T *d_copy = NULL; + + for (unsigned long long i = 0; i < copy_items; ++i) + h_copy[i] = d_itr[i]; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * copy_items)); + thrust::device_ptr d_copy_wrapper(d_copy); + thrust::copy_if(d_itr, d_itr + copy_items, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_copy, d_copy, copy_items, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + if (h_copy) delete[] h_copy; + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION +} + + +/** + * Test modified iterator + */ +template +void TestModified() +{ + printf("\nTesting cache-modified iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + const unsigned int TEST_VALUES = 11000; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + RandomBits(h_data[i]); + } + + // Allocate device arrays + T *d_data = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + // Initialize reference data + T h_reference[8]; + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + Test(CacheModifiedInputIterator((CastT*) d_data), h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + T *d_copy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); + + CacheModifiedInputIterator d_in_itr((CastT*) d_data); + CacheModifiedOutputIterator d_out_itr((CastT*) d_copy); + + thrust::copy_if(d_in_itr, d_in_itr + TEST_VALUES, d_out_itr, SelectOp()); + + int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION + + if (h_data) delete[] h_data; + if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); +} + + +/** + * Test transform iterator + */ +template +void TestTransform() +{ + printf("\nTesting transform iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + const unsigned int TEST_VALUES = 11000; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + InitValue(INTEGER_SEED, h_data[i], i); + } + + // Allocate device arrays + T *d_data = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + TransformOp op; + + // Initialize reference data + T h_reference[8]; + h_reference[0] = op(h_data[0]); // Value at offset 0 + h_reference[1] = op(h_data[100]); // Value at offset 100 + h_reference[2] = op(h_data[1000]); // Value at offset 1000 + h_reference[3] = op(h_data[10000]); // Value at offset 10000 + h_reference[4] = op(h_data[1]); // Value at offset 1 + h_reference[5] = op(h_data[21]); // Value at offset 21 + h_reference[6] = op(h_data[11]); // Value at offset 11 + h_reference[7] = op(h_data[0]); // Value at offset 0; + + TransformInputIterator, CastT*> d_itr((CastT*) d_data, op); + Test(d_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + T *h_copy = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + h_copy[i] = op(h_data[i]); + + T *d_copy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); + thrust::device_ptr d_copy_wrapper(d_copy); + + thrust::copy_if(d_itr, d_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_copy) delete[] h_copy; + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION + + if (h_data) delete[] h_data; + if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); +} + + +/** + * Test tex-obj texture iterator + */ +template +void TestTexObj() +{ + printf("\nTesting tex-obj iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + const unsigned int TEST_VALUES = 11000; + const unsigned int DUMMY_OFFSET = 500; + const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + RandomBits(h_data[i]); + } + + // Allocate device arrays + T *d_data = NULL; + T *d_dummy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); + + // Initialize reference data + T h_reference[8]; + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + // Create and bind obj-based test iterator + TexObjInputIterator d_obj_itr; + CubDebugExit(d_obj_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); + + Test(d_obj_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + T *d_copy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); + thrust::device_ptr d_copy_wrapper(d_copy); + + CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); + thrust::copy_if(d_obj_itr, d_obj_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + CubDebugExit(d_obj_itr.UnbindTexture()); + + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION + + if (h_data) delete[] h_data; + if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); + if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); +} + + +#if CUDA_VERSION >= 5050 + +/** + * Test tex-ref texture iterator + */ +template +void TestTexRef() +{ + printf("\nTesting tex-ref iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + const unsigned int TEST_VALUES = 11000; + const unsigned int DUMMY_OFFSET = 500; + const unsigned int DUMMY_TEST_VALUES = TEST_VALUES - DUMMY_OFFSET; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + RandomBits(h_data[i]); + } + + // Allocate device arrays + T *d_data = NULL; + T *d_dummy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_dummy, h_data + DUMMY_OFFSET, sizeof(T) * DUMMY_TEST_VALUES, cudaMemcpyHostToDevice)); + + // Initialize reference data + T h_reference[8]; + h_reference[0] = h_data[0]; // Value at offset 0 + h_reference[1] = h_data[100]; // Value at offset 100 + h_reference[2] = h_data[1000]; // Value at offset 1000 + h_reference[3] = h_data[10000]; // Value at offset 10000 + h_reference[4] = h_data[1]; // Value at offset 1 + h_reference[5] = h_data[21]; // Value at offset 21 + h_reference[6] = h_data[11]; // Value at offset 11 + h_reference[7] = h_data[0]; // Value at offset 0; + + // Create and bind ref-based test iterator + TexRefInputIterator d_ref_itr; + CubDebugExit(d_ref_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); + + // Create and bind dummy iterator of same type to check with interferance + TexRefInputIterator d_ref_itr2; + CubDebugExit(d_ref_itr2.BindTexture((CastT*) d_dummy, sizeof(T) * DUMMY_TEST_VALUES)); + + Test(d_ref_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + T *d_copy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); + thrust::device_ptr d_copy_wrapper(d_copy); + + CubDebugExit(cudaMemset(d_copy, 0, sizeof(T) * TEST_VALUES)); + thrust::copy_if(d_ref_itr, d_ref_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_data, d_copy, TEST_VALUES, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION + + CubDebugExit(d_ref_itr.UnbindTexture()); + CubDebugExit(d_ref_itr2.UnbindTexture()); + + if (h_data) delete[] h_data; + if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); + if (d_dummy) CubDebugExit(g_allocator.DeviceFree(d_dummy)); +} + + +/** + * Test texture transform iterator + */ +template +void TestTexTransform() +{ + printf("\nTesting tex-transform iterator on type %s\n", typeid(T).name()); fflush(stdout); + + // + // Test iterator manipulation in kernel + // + + const unsigned int TEST_VALUES = 11000; + + T *h_data = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + { + InitValue(INTEGER_SEED, h_data[i], i); + } + + // Allocate device arrays + T *d_data = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_data, sizeof(T) * TEST_VALUES)); + CubDebugExit(cudaMemcpy(d_data, h_data, sizeof(T) * TEST_VALUES, cudaMemcpyHostToDevice)); + + TransformOp op; + + // Initialize reference data + T h_reference[8]; + h_reference[0] = op(h_data[0]); // Value at offset 0 + h_reference[1] = op(h_data[100]); // Value at offset 100 + h_reference[2] = op(h_data[1000]); // Value at offset 1000 + h_reference[3] = op(h_data[10000]); // Value at offset 10000 + h_reference[4] = op(h_data[1]); // Value at offset 1 + h_reference[5] = op(h_data[21]); // Value at offset 21 + h_reference[6] = op(h_data[11]); // Value at offset 11 + h_reference[7] = op(h_data[0]); // Value at offset 0; + + // Create and bind texture iterator + typedef TexRefInputIterator TextureIterator; + + TextureIterator d_tex_itr; + CubDebugExit(d_tex_itr.BindTexture((CastT*) d_data, sizeof(T) * TEST_VALUES)); + + // Create transform iterator + TransformInputIterator, TextureIterator> xform_itr(d_tex_itr, op); + + Test(xform_itr, h_reference); + +#if (THRUST_VERSION >= 100700) // Thrust 1.7 or newer + + // + // Test with thrust::copy_if() + // + + T *h_copy = new T[TEST_VALUES]; + for (int i = 0; i < TEST_VALUES; ++i) + h_copy[i] = op(h_data[i]); + + T *d_copy = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_copy, sizeof(T) * TEST_VALUES)); + thrust::device_ptr d_copy_wrapper(d_copy); + + thrust::copy_if(xform_itr, xform_itr + TEST_VALUES, d_copy_wrapper, SelectOp()); + + int compare = CompareDeviceResults(h_copy, d_copy, TEST_VALUES, g_verbose, g_verbose); + printf("\tthrust::copy_if(): %s\n", (compare) ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Cleanup + if (h_copy) delete[] h_copy; + if (d_copy) CubDebugExit(g_allocator.DeviceFree(d_copy)); + +#endif // THRUST_VERSION + + CubDebugExit(d_tex_itr.UnbindTexture()); + if (h_data) delete[] h_data; + if (d_data) CubDebugExit(g_allocator.DeviceFree(d_data)); +} + +#endif // CUDA_VERSION + + + + +/** + * Run non-integer tests + */ +template +void Test(Int2Type is_integer) +{ + TestModified(); + TestTransform(); + +#if CUB_CDP + // Test tex-obj iterators if CUDA dynamic parallelism enabled + TestTexObj(type_string); +#endif // CUB_CDP + +#if CUDA_VERSION >= 5050 + // Test tex-ref iterators for CUDA 5.5 + TestTexRef(); + TestTexTransform(); +#endif // CUDA_VERSION +} + +/** + * Run integer tests + */ +template +void Test(Int2Type is_integer) +{ + TestConstant(0); + TestConstant(99); + + TestCounting(0); + TestCounting(99); + + // Run non-integer tests + Test(Int2Type()); +} + +/** + * Run tests + */ +template +void Test() +{ + enum { + IS_INTEGER = (Traits::CATEGORY == SIGNED_INTEGER) || (Traits::CATEGORY == UNSIGNED_INTEGER) + }; + + // Test non-const type + Test(Int2Type()); + + // Test non-const type + Test(Int2Type()); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + + // Evaluate different data types + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(); + + Test(); + Test(); + Test(); + Test(); + Test(); + Test(); + if (ptx_version > 120) // Don't check doubles on PTX120 or below because they're down-converted + Test(); + + Test(); + Test(); + + printf("\nTest complete\n"); fflush(stdout); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_util.h b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_util.h new file mode 100644 index 0000000..491a5be --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_util.h @@ -0,0 +1,1628 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + + +#pragma once + +#if defined(_WIN32) || defined(_WIN64) + #include + #undef small // Windows is terrible for polluting macro namespace +#else + #include +#endif + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "mersenne.h" +#include "half.h" + +#include "cub/util_debug.cuh" +#include "cub/util_device.cuh" +#include "cub/util_type.cuh" +#include "cub/util_macro.cuh" +#include "cub/iterator/discard_output_iterator.cuh" + + +/****************************************************************************** + * Assertion macros + ******************************************************************************/ + +/** + * Assert equals + */ +#define AssertEquals(a, b) if ((a) != (b)) { std::cerr << "\n(" << __FILE__ << ": " << __LINE__ << ")\n"; exit(1);} + + +/****************************************************************************** + * Command-line parsing functionality + ******************************************************************************/ + +/** + * Utility for parsing command line arguments + */ +struct CommandLineArgs +{ + + std::vector keys; + std::vector values; + std::vector args; + cudaDeviceProp deviceProp; + float device_giga_bandwidth; + size_t device_free_physmem; + size_t device_total_physmem; + + /** + * Constructor + */ + CommandLineArgs(int argc, char **argv) : + keys(10), + values(10) + { + using namespace std; + + // Initialize mersenne generator + unsigned int mersenne_init[4]= {0x123, 0x234, 0x345, 0x456}; + mersenne::init_by_array(mersenne_init, 4); + + for (int i = 1; i < argc; i++) + { + string arg = argv[i]; + + if ((arg[0] != '-') || (arg[1] != '-')) + { + args.push_back(arg); + continue; + } + + string::size_type pos; + string key, val; + if ((pos = arg.find('=')) == string::npos) { + key = string(arg, 2, arg.length() - 2); + val = ""; + } else { + key = string(arg, 2, pos - 2); + val = string(arg, pos + 1, arg.length() - 1); + } + + keys.push_back(key); + values.push_back(val); + } + } + + + /** + * Checks whether a flag "--" is present in the commandline + */ + bool CheckCmdLineFlag(const char* arg_name) + { + using namespace std; + + for (int i = 0; i < int(keys.size()); ++i) + { + if (keys[i] == string(arg_name)) + return true; + } + return false; + } + + + /** + * Returns number of naked (non-flag and non-key-value) commandline parameters + */ + template + int NumNakedArgs() + { + return args.size(); + } + + + /** + * Returns the commandline parameter for a given index (not including flags) + */ + template + void GetCmdLineArgument(int index, T &val) + { + using namespace std; + if (index < args.size()) { + istringstream str_stream(args[index]); + str_stream >> val; + } + } + + /** + * Returns the value specified for a given commandline parameter --= + */ + template + void GetCmdLineArgument(const char *arg_name, T &val) + { + using namespace std; + + for (int i = 0; i < int(keys.size()); ++i) + { + if (keys[i] == string(arg_name)) + { + istringstream str_stream(values[i]); + str_stream >> val; + } + } + } + + + /** + * Returns the values specified for a given commandline parameter --=,* + */ + template + void GetCmdLineArguments(const char *arg_name, std::vector &vals) + { + using namespace std; + + if (CheckCmdLineFlag(arg_name)) + { + // Clear any default values + vals.clear(); + + // Recover from multi-value string + for (int i = 0; i < keys.size(); ++i) + { + if (keys[i] == string(arg_name)) + { + string val_string(values[i]); + istringstream str_stream(val_string); + string::size_type old_pos = 0; + string::size_type new_pos = 0; + + // Iterate comma-separated values + T val; + while ((new_pos = val_string.find(',', old_pos)) != string::npos) + { + if (new_pos != old_pos) + { + str_stream.width(new_pos - old_pos); + str_stream >> val; + vals.push_back(val); + } + + // skip over comma + str_stream.ignore(1); + old_pos = new_pos + 1; + } + + // Read last value + str_stream >> val; + vals.push_back(val); + } + } + } + } + + + /** + * The number of pairs parsed + */ + int ParsedArgc() + { + return (int) keys.size(); + } + + /** + * Initialize device + */ + cudaError_t DeviceInit(int dev = -1) + { + cudaError_t error = cudaSuccess; + + do + { + int deviceCount; + error = CubDebug(cudaGetDeviceCount(&deviceCount)); + if (error) break; + + if (deviceCount == 0) { + fprintf(stderr, "No devices supporting CUDA.\n"); + exit(1); + } + if (dev < 0) + { + GetCmdLineArgument("device", dev); + } + if ((dev > deviceCount - 1) || (dev < 0)) + { + dev = 0; + } + + error = CubDebug(cudaSetDevice(dev)); + if (error) break; + + CubDebugExit(cudaMemGetInfo(&device_free_physmem, &device_total_physmem)); + + int ptx_version; + error = CubDebug(cub::PtxVersion(ptx_version)); + if (error) break; + + error = CubDebug(cudaGetDeviceProperties(&deviceProp, dev)); + if (error) break; + + if (deviceProp.major < 1) { + fprintf(stderr, "Device does not support CUDA.\n"); + exit(1); + } + + device_giga_bandwidth = float(deviceProp.memoryBusWidth) * deviceProp.memoryClockRate * 2 / 8 / 1000 / 1000; + + if (!CheckCmdLineFlag("quiet")) + { + printf( + "Using device %d: %s (PTX version %d, SM%d, %d SMs, " + "%lld free / %lld total MB physmem, " + "%.3f GB/s @ %d kHz mem clock, ECC %s)\n", + dev, + deviceProp.name, + ptx_version, + deviceProp.major * 100 + deviceProp.minor * 10, + deviceProp.multiProcessorCount, + (unsigned long long) device_free_physmem / 1024 / 1024, + (unsigned long long) device_total_physmem / 1024 / 1024, + device_giga_bandwidth, + deviceProp.memoryClockRate, + (deviceProp.ECCEnabled) ? "on" : "off"); + fflush(stdout); + } + + } while (0); + + return error; + } +}; + +/****************************************************************************** + * Random bits generator + ******************************************************************************/ + +int g_num_rand_samples = 0; + + +template +bool IsNaN(T val) { return false; } + +template<> +__noinline__ bool IsNaN(float val) +{ + volatile unsigned int bits = reinterpret_cast(val); + + return (((bits >= 0x7F800001) && (bits <= 0x7FFFFFFF)) || + ((bits >= 0xFF800001) && (bits <= 0xFFFFFFFF))); +} + +template<> +__noinline__ bool IsNaN(float1 val) +{ + return (IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(float2 val) +{ + return (IsNaN(val.y) || IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(float3 val) +{ + return (IsNaN(val.z) || IsNaN(val.y) || IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(float4 val) +{ + return (IsNaN(val.y) || IsNaN(val.x) || IsNaN(val.w) || IsNaN(val.z)); +} + +template<> +__noinline__ bool IsNaN(double val) +{ + volatile unsigned long long bits = *reinterpret_cast(&val); + + return (((bits >= 0x7FF0000000000001) && (bits <= 0x7FFFFFFFFFFFFFFF)) || + ((bits >= 0xFFF0000000000001) && (bits <= 0xFFFFFFFFFFFFFFFF))); +} + +template<> +__noinline__ bool IsNaN(double1 val) +{ + return (IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(double2 val) +{ + return (IsNaN(val.y) || IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(double3 val) +{ + return (IsNaN(val.z) || IsNaN(val.y) || IsNaN(val.x)); +} + +template<> +__noinline__ bool IsNaN(double4 val) +{ + return (IsNaN(val.y) || IsNaN(val.x) || IsNaN(val.w) || IsNaN(val.z)); +} + + +template<> +__noinline__ bool IsNaN(half_t val) +{ + volatile unsigned short bits = reinterpret_cast(val); + + return (((bits >= 0x7C01) && (bits <= 0x7FFF)) || + ((bits >= 0xFC01) && (bits <= 0xFFFFFFFF))); +} + + + +/** + * Generates random keys. + * + * We always take the second-order byte from rand() because the higher-order + * bits returned by rand() are commonly considered more uniformly distributed + * than the lower-order bits. + * + * We can decrease the entropy level of keys by adopting the technique + * of Thearling and Smith in which keys are computed from the bitwise AND of + * multiple random samples: + * + * entropy_reduction | Effectively-unique bits per key + * ----------------------------------------------------- + * -1 | 0 + * 0 | 32 + * 1 | 25.95 (81%) + * 2 | 17.41 (54%) + * 3 | 10.78 (34%) + * 4 | 6.42 (20%) + * ... | ... + * + */ +template +void RandomBits( + K &key, + int entropy_reduction = 0, + int begin_bit = 0, + int end_bit = sizeof(K) * 8) +{ + const int NUM_BYTES = sizeof(K); + const int WORD_BYTES = sizeof(unsigned int); + const int NUM_WORDS = (NUM_BYTES + WORD_BYTES - 1) / WORD_BYTES; + + unsigned int word_buff[NUM_WORDS]; + + if (entropy_reduction == -1) + { + memset((void *) &key, 0, sizeof(key)); + return; + } + + if (end_bit < 0) + end_bit = sizeof(K) * 8; + + while (true) + { + // Generate random word_buff + for (int j = 0; j < NUM_WORDS; j++) + { + int current_bit = j * WORD_BYTES * 8; + + unsigned int word = 0xffffffff; + word &= 0xffffffff << CUB_MAX(0, begin_bit - current_bit); + word &= 0xffffffff >> CUB_MAX(0, (current_bit + (WORD_BYTES * 8)) - end_bit); + + for (int i = 0; i <= entropy_reduction; i++) + { + // Grab some of the higher bits from rand (better entropy, supposedly) + word &= mersenne::genrand_int32(); + g_num_rand_samples++; + } + + word_buff[j] = word; + } + + memcpy(&key, word_buff, sizeof(K)); + + K copy = key; + if (!IsNaN(copy)) + break; // avoids NaNs when generating random floating point numbers + } +} + +/// Randomly select number between [0:max) +template +T RandomValue(T max) +{ + unsigned int bits; + unsigned int max_int = (unsigned int) -1; + do { + RandomBits(bits); + } while (bits == max_int); + + return (T) ((double(bits) / double(max_int)) * double(max)); +} + + +/****************************************************************************** + * Console printing utilities + ******************************************************************************/ + +/** + * Helper for casting character types to integers for cout printing + */ +template +T CoutCast(T val) { return val; } + +int CoutCast(char val) { return val; } + +int CoutCast(unsigned char val) { return val; } + +int CoutCast(signed char val) { return val; } + + + +/****************************************************************************** + * Test value initialization utilities + ******************************************************************************/ + +/** + * Test problem generation options + */ +enum GenMode +{ + UNIFORM, // Assign to '2', regardless of integer seed + INTEGER_SEED, // Assign to integer seed + RANDOM, // Assign to random, regardless of integer seed +}; + +/** + * Initialize value + */ +template +__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0) +{ + switch (gen_mode) + { +#if (CUB_PTX_ARCH == 0) + case RANDOM: + RandomBits(value); + break; +#endif + case UNIFORM: + value = 2; + break; + case INTEGER_SEED: + default: + value = (T) index; + break; + } +} + + +/** + * Initialize value (bool) + */ +__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, bool &value, int index = 0) +{ + switch (gen_mode) + { +#if (CUB_PTX_ARCH == 0) + case RANDOM: + char c; + RandomBits(c, 0, 0, 1); + value = (c > 0); + break; +#endif + case UNIFORM: + value = true; + break; + case INTEGER_SEED: + default: + value = (index > 0); + break; + } +} + + +/** + * cub::NullType test initialization + */ +__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, cub::NullType &value, int index = 0) +{} + + +/** + * cub::KeyValuePairtest initialization + */ +template +__host__ __device__ __forceinline__ void InitValue( + GenMode gen_mode, + cub::KeyValuePair& value, + int index = 0) +{ + InitValue(gen_mode, value.value, index); + + // Assign corresponding flag with a likelihood of the last bit being set with entropy-reduction level 3 + RandomBits(value.key, 3); + value.key = (value.key & 0x1); +} + + + +/****************************************************************************** + * Comparison and ostream operators + ******************************************************************************/ + +/** + * KeyValuePair ostream operator + */ +template +std::ostream& operator<<(std::ostream& os, const cub::KeyValuePair &val) +{ + os << '(' << CoutCast(val.key) << ',' << CoutCast(val.value) << ')'; + return os; +} + + +/****************************************************************************** + * Comparison and ostream operators for CUDA vector types + ******************************************************************************/ + +/** + * Vector1 overloads + */ +#define CUB_VEC_OVERLOAD_1(T, BaseT) \ + /* Ostream output */ \ + std::ostream& operator<<( \ + std::ostream& os, \ + const T& val) \ + { \ + os << '(' << CoutCast(val.x) << ')'; \ + return os; \ + } \ + /* Inequality */ \ + __host__ __device__ __forceinline__ bool operator!=( \ + const T &a, \ + const T &b) \ + { \ + return (a.x != b.x); \ + } \ + /* Equality */ \ + __host__ __device__ __forceinline__ bool operator==( \ + const T &a, \ + const T &b) \ + { \ + return (a.x == b.x); \ + } \ + /* Test initialization */ \ + __host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0) \ + { \ + InitValue(gen_mode, value.x, index); \ + } \ + /* Max */ \ + __host__ __device__ __forceinline__ bool operator>( \ + const T &a, \ + const T &b) \ + { \ + return (a.x > b.x); \ + } \ + /* Min */ \ + __host__ __device__ __forceinline__ bool operator<( \ + const T &a, \ + const T &b) \ + { \ + return (a.x < b.x); \ + } \ + /* Summation (non-reference addends for VS2003 -O3 warpscan workaround */ \ + __host__ __device__ __forceinline__ T operator+( \ + T a, \ + T b) \ + { \ + T retval = make_##T(a.x + b.x); \ + return retval; \ + } \ + namespace cub { \ + template<> \ + struct NumericTraits \ + { \ + static const Category CATEGORY = NOT_A_NUMBER; \ + enum { \ + PRIMITIVE = false, \ + NULL_TYPE = false, \ + }; \ + static T Max() \ + { \ + T retval = { \ + NumericTraits::Max()}; \ + return retval; \ + } \ + static T Lowest() \ + { \ + T retval = { \ + NumericTraits::Lowest()}; \ + return retval; \ + } \ + }; \ + } /* namespace std */ + + + +/** + * Vector2 overloads + */ +#define CUB_VEC_OVERLOAD_2(T, BaseT) \ + /* Ostream output */ \ + std::ostream& operator<<( \ + std::ostream& os, \ + const T& val) \ + { \ + os << '(' \ + << CoutCast(val.x) << ',' \ + << CoutCast(val.y) << ')'; \ + return os; \ + } \ + /* Inequality */ \ + __host__ __device__ __forceinline__ bool operator!=( \ + const T &a, \ + const T &b) \ + { \ + return (a.x != b.x) || \ + (a.y != b.y); \ + } \ + /* Equality */ \ + __host__ __device__ __forceinline__ bool operator==( \ + const T &a, \ + const T &b) \ + { \ + return (a.x == b.x) && \ + (a.y == b.y); \ + } \ + /* Test initialization */ \ + __host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0) \ + { \ + InitValue(gen_mode, value.x, index); \ + InitValue(gen_mode, value.y, index); \ + } \ + /* Max */ \ + __host__ __device__ __forceinline__ bool operator>( \ + const T &a, \ + const T &b) \ + { \ + if (a.x > b.x) return true; else if (b.x > a.x) return false; \ + return a.y > b.y; \ + } \ + /* Min */ \ + __host__ __device__ __forceinline__ bool operator<( \ + const T &a, \ + const T &b) \ + { \ + if (a.x < b.x) return true; else if (b.x < a.x) return false; \ + return a.y < b.y; \ + } \ + /* Summation (non-reference addends for VS2003 -O3 warpscan workaround */ \ + __host__ __device__ __forceinline__ T operator+( \ + T a, \ + T b) \ + { \ + T retval = make_##T( \ + a.x + b.x, \ + a.y + b.y); \ + return retval; \ + } \ + namespace cub { \ + template<> \ + struct NumericTraits \ + { \ + static const Category CATEGORY = NOT_A_NUMBER; \ + enum { \ + PRIMITIVE = false, \ + NULL_TYPE = false, \ + }; \ + static T Max() \ + { \ + T retval = { \ + NumericTraits::Max(), \ + NumericTraits::Max()}; \ + return retval; \ + } \ + static T Lowest() \ + { \ + T retval = { \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest()}; \ + return retval; \ + } \ + }; \ + } /* namespace cub */ + + + +/** + * Vector3 overloads + */ +#define CUB_VEC_OVERLOAD_3(T, BaseT) \ + /* Ostream output */ \ + std::ostream& operator<<( \ + std::ostream& os, \ + const T& val) \ + { \ + os << '(' \ + << CoutCast(val.x) << ',' \ + << CoutCast(val.y) << ',' \ + << CoutCast(val.z) << ')'; \ + return os; \ + } \ + /* Inequality */ \ + __host__ __device__ __forceinline__ bool operator!=( \ + const T &a, \ + const T &b) \ + { \ + return (a.x != b.x) || \ + (a.y != b.y) || \ + (a.z != b.z); \ + } \ + /* Equality */ \ + __host__ __device__ __forceinline__ bool operator==( \ + const T &a, \ + const T &b) \ + { \ + return (a.x == b.x) && \ + (a.y == b.y) && \ + (a.z == b.z); \ + } \ + /* Test initialization */ \ + __host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0) \ + { \ + InitValue(gen_mode, value.x, index); \ + InitValue(gen_mode, value.y, index); \ + InitValue(gen_mode, value.z, index); \ + } \ + /* Max */ \ + __host__ __device__ __forceinline__ bool operator>( \ + const T &a, \ + const T &b) \ + { \ + if (a.x > b.x) return true; else if (b.x > a.x) return false; \ + if (a.y > b.y) return true; else if (b.y > a.y) return false; \ + return a.z > b.z; \ + } \ + /* Min */ \ + __host__ __device__ __forceinline__ bool operator<( \ + const T &a, \ + const T &b) \ + { \ + if (a.x < b.x) return true; else if (b.x < a.x) return false; \ + if (a.y < b.y) return true; else if (b.y < a.y) return false; \ + return a.z < b.z; \ + } \ + /* Summation (non-reference addends for VS2003 -O3 warpscan workaround */ \ + __host__ __device__ __forceinline__ T operator+( \ + T a, \ + T b) \ + { \ + T retval = make_##T( \ + a.x + b.x, \ + a.y + b.y, \ + a.z + b.z); \ + return retval; \ + } \ + namespace cub { \ + template<> \ + struct NumericTraits \ + { \ + static const Category CATEGORY = NOT_A_NUMBER; \ + enum { \ + PRIMITIVE = false, \ + NULL_TYPE = false, \ + }; \ + static T Max() \ + { \ + T retval = { \ + NumericTraits::Max(), \ + NumericTraits::Max(), \ + NumericTraits::Max()}; \ + return retval; \ + } \ + static T Lowest() \ + { \ + T retval = { \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest()}; \ + return retval; \ + } \ + }; \ + } /* namespace cub */ + + +/** + * Vector4 overloads + */ +#define CUB_VEC_OVERLOAD_4(T, BaseT) \ + /* Ostream output */ \ + std::ostream& operator<<( \ + std::ostream& os, \ + const T& val) \ + { \ + os << '(' \ + << CoutCast(val.x) << ',' \ + << CoutCast(val.y) << ',' \ + << CoutCast(val.z) << ',' \ + << CoutCast(val.w) << ')'; \ + return os; \ + } \ + /* Inequality */ \ + __host__ __device__ __forceinline__ bool operator!=( \ + const T &a, \ + const T &b) \ + { \ + return (a.x != b.x) || \ + (a.y != b.y) || \ + (a.z != b.z) || \ + (a.w != b.w); \ + } \ + /* Equality */ \ + __host__ __device__ __forceinline__ bool operator==( \ + const T &a, \ + const T &b) \ + { \ + return (a.x == b.x) && \ + (a.y == b.y) && \ + (a.z == b.z) && \ + (a.w == b.w); \ + } \ + /* Test initialization */ \ + __host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, T &value, int index = 0) \ + { \ + InitValue(gen_mode, value.x, index); \ + InitValue(gen_mode, value.y, index); \ + InitValue(gen_mode, value.z, index); \ + InitValue(gen_mode, value.w, index); \ + } \ + /* Max */ \ + __host__ __device__ __forceinline__ bool operator>( \ + const T &a, \ + const T &b) \ + { \ + if (a.x > b.x) return true; else if (b.x > a.x) return false; \ + if (a.y > b.y) return true; else if (b.y > a.y) return false; \ + if (a.z > b.z) return true; else if (b.z > a.z) return false; \ + return a.w > b.w; \ + } \ + /* Min */ \ + __host__ __device__ __forceinline__ bool operator<( \ + const T &a, \ + const T &b) \ + { \ + if (a.x < b.x) return true; else if (b.x < a.x) return false; \ + if (a.y < b.y) return true; else if (b.y < a.y) return false; \ + if (a.z < b.z) return true; else if (b.z < a.z) return false; \ + return a.w < b.w; \ + } \ + /* Summation (non-reference addends for VS2003 -O3 warpscan workaround */ \ + __host__ __device__ __forceinline__ T operator+( \ + T a, \ + T b) \ + { \ + T retval = make_##T( \ + a.x + b.x, \ + a.y + b.y, \ + a.z + b.z, \ + a.w + b.w); \ + return retval; \ + } \ + namespace cub { \ + template<> \ + struct NumericTraits \ + { \ + static const Category CATEGORY = NOT_A_NUMBER; \ + enum { \ + PRIMITIVE = false, \ + NULL_TYPE = false, \ + }; \ + static T Max() \ + { \ + T retval = { \ + NumericTraits::Max(), \ + NumericTraits::Max(), \ + NumericTraits::Max(), \ + NumericTraits::Max()}; \ + return retval; \ + } \ + static T Lowest() \ + { \ + T retval = { \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest(), \ + NumericTraits::Lowest()}; \ + return retval; \ + } \ + }; \ + } /* namespace cub */ + +/** + * All vector overloads + */ +#define CUB_VEC_OVERLOAD(COMPONENT_T, BaseT) \ + CUB_VEC_OVERLOAD_1(COMPONENT_T##1, BaseT) \ + CUB_VEC_OVERLOAD_2(COMPONENT_T##2, BaseT) \ + CUB_VEC_OVERLOAD_3(COMPONENT_T##3, BaseT) \ + CUB_VEC_OVERLOAD_4(COMPONENT_T##4, BaseT) + +/** + * Define for types + */ +CUB_VEC_OVERLOAD(char, char) +CUB_VEC_OVERLOAD(short, short) +CUB_VEC_OVERLOAD(int, int) +CUB_VEC_OVERLOAD(long, long) +CUB_VEC_OVERLOAD(longlong, long long) +CUB_VEC_OVERLOAD(uchar, unsigned char) +CUB_VEC_OVERLOAD(ushort, unsigned short) +CUB_VEC_OVERLOAD(uint, unsigned int) +CUB_VEC_OVERLOAD(ulong, unsigned long) +CUB_VEC_OVERLOAD(ulonglong, unsigned long long) +CUB_VEC_OVERLOAD(float, float) +CUB_VEC_OVERLOAD(double, double) + + +//--------------------------------------------------------------------- +// Complex data type TestFoo +//--------------------------------------------------------------------- + +/** + * TestFoo complex data type + */ +struct TestFoo +{ + long long x; + int y; + short z; + char w; + + // Factory + static __host__ __device__ __forceinline__ TestFoo MakeTestFoo(long long x, int y, short z, char w) + { + TestFoo retval = {x, y, z, w}; + return retval; + } + + // Assignment from int operator + __host__ __device__ __forceinline__ TestFoo& operator =(int b) + { + x = b; + y = b; + z = b; + w = b; + return *this; + } + + // Summation operator + __host__ __device__ __forceinline__ TestFoo operator+(const TestFoo &b) const + { + return MakeTestFoo(x + b.x, y + b.y, z + b.z, w + b.w); + } + + // Inequality operator + __host__ __device__ __forceinline__ bool operator !=(const TestFoo &b) const + { + return (x != b.x) || (y != b.y) || (z != b.z) || (w != b.w); + } + + // Equality operator + __host__ __device__ __forceinline__ bool operator ==(const TestFoo &b) const + { + return (x == b.x) && (y == b.y) && (z == b.z) && (w == b.w); + } + + // Less than operator + __host__ __device__ __forceinline__ bool operator <(const TestFoo &b) const + { + if (x < b.x) return true; else if (b.x < x) return false; + if (y < b.y) return true; else if (b.y < y) return false; + if (z < b.z) return true; else if (b.z < z) return false; + return w < b.w; + } + + // Greater than operator + __host__ __device__ __forceinline__ bool operator >(const TestFoo &b) const + { + if (x > b.x) return true; else if (b.x > x) return false; + if (y > b.y) return true; else if (b.y > y) return false; + if (z > b.z) return true; else if (b.z > z) return false; + return w > b.w; + } + +}; + +/** + * TestFoo ostream operator + */ +std::ostream& operator<<(std::ostream& os, const TestFoo& val) +{ + os << '(' << val.x << ',' << val.y << ',' << val.z << ',' << CoutCast(val.w) << ')'; + return os; +} + +/** + * TestFoo test initialization + */ +__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, TestFoo &value, int index = 0) +{ + InitValue(gen_mode, value.x, index); + InitValue(gen_mode, value.y, index); + InitValue(gen_mode, value.z, index); + InitValue(gen_mode, value.w, index); +} + + +/// numeric_limits specialization +namespace cub { +template<> +struct NumericTraits +{ + static const Category CATEGORY = NOT_A_NUMBER; + enum { + PRIMITIVE = false, + NULL_TYPE = false, + }; + static TestFoo Max() + { + return TestFoo::MakeTestFoo( + NumericTraits::Max(), + NumericTraits::Max(), + NumericTraits::Max(), + NumericTraits::Max()); + } + + static TestFoo Lowest() + { + return TestFoo::MakeTestFoo( + NumericTraits::Lowest(), + NumericTraits::Lowest(), + NumericTraits::Lowest(), + NumericTraits::Lowest()); + } +}; +} // namespace cub + + +//--------------------------------------------------------------------- +// Complex data type TestBar (with optimizations for fence-free warp-synchrony) +//--------------------------------------------------------------------- + +/** + * TestBar complex data type + */ +struct TestBar +{ + long long x; + int y; + + // Constructor + __host__ __device__ __forceinline__ TestBar() : x(0), y(0) + {} + + // Constructor + __host__ __device__ __forceinline__ TestBar(int b) : x(b), y(b) + {} + + // Constructor + __host__ __device__ __forceinline__ TestBar(long long x, int y) : x(x), y(y) + {} + + // Assignment from int operator + __host__ __device__ __forceinline__ TestBar& operator =(int b) + { + x = b; + y = b; + return *this; + } + + // Summation operator + __host__ __device__ __forceinline__ TestBar operator+(const TestBar &b) const + { + return TestBar(x + b.x, y + b.y); + } + + // Inequality operator + __host__ __device__ __forceinline__ bool operator !=(const TestBar &b) const + { + return (x != b.x) || (y != b.y); + } + + // Equality operator + __host__ __device__ __forceinline__ bool operator ==(const TestBar &b) const + { + return (x == b.x) && (y == b.y); + } + + // Less than operator + __host__ __device__ __forceinline__ bool operator <(const TestBar &b) const + { + if (x < b.x) return true; else if (b.x < x) return false; + return y < b.y; + } + + // Greater than operator + __host__ __device__ __forceinline__ bool operator >(const TestBar &b) const + { + if (x > b.x) return true; else if (b.x > x) return false; + return y > b.y; + } + +}; + + +/** + * TestBar ostream operator + */ +std::ostream& operator<<(std::ostream& os, const TestBar& val) +{ + os << '(' << val.x << ',' << val.y << ')'; + return os; +} + +/** + * TestBar test initialization + */ +__host__ __device__ __forceinline__ void InitValue(GenMode gen_mode, TestBar &value, int index = 0) +{ + InitValue(gen_mode, value.x, index); + InitValue(gen_mode, value.y, index); +} + +/// numeric_limits specialization +namespace cub { +template<> +struct NumericTraits +{ + static const Category CATEGORY = NOT_A_NUMBER; + enum { + PRIMITIVE = false, + NULL_TYPE = false, + }; + static TestBar Max() + { + return TestBar( + NumericTraits::Max(), + NumericTraits::Max()); + } + + static TestBar Lowest() + { + return TestBar( + NumericTraits::Lowest(), + NumericTraits::Lowest()); + } +}; +} // namespace cub + + +/****************************************************************************** + * Helper routines for list comparison and display + ******************************************************************************/ + + +/** + * Compares the equivalence of two arrays + */ +template +int CompareResults(T* computed, S* reference, OffsetT len, bool verbose = true) +{ + for (OffsetT i = 0; i < len; i++) + { + if (computed[i] != reference[i]) + { + if (verbose) std::cout << "INCORRECT: [" << i << "]: " + << CoutCast(computed[i]) << " != " + << CoutCast(reference[i]); + return 1; + } + } + return 0; +} + + +/** + * Compares the equivalence of two arrays + */ +template +int CompareResults(float* computed, float* reference, OffsetT len, bool verbose = true) +{ + for (OffsetT i = 0; i < len; i++) + { + if (computed[i] != reference[i]) + { + float difference = std::abs(computed[i]-reference[i]); + float fraction = difference / std::abs(reference[i]); + + if (fraction > 0.0001) + { + if (verbose) std::cout << "INCORRECT: [" << i << "]: " + << "(computed) " << CoutCast(computed[i]) << " != " + << CoutCast(reference[i]) << " (difference:" << difference << ", fraction: " << fraction << ")"; + return 1; + } + } + } + return 0; +} + + +/** + * Compares the equivalence of two arrays + */ +template +int CompareResults(cub::NullType* computed, cub::NullType* reference, OffsetT len, bool verbose = true) +{ + return 0; +} + +/** + * Compares the equivalence of two arrays + */ +template +int CompareResults(double* computed, double* reference, OffsetT len, bool verbose = true) +{ + for (OffsetT i = 0; i < len; i++) + { + if (computed[i] != reference[i]) + { + double difference = std::abs(computed[i]-reference[i]); + double fraction = difference / std::abs(reference[i]); + + if (fraction > 0.0001) + { + if (verbose) std::cout << "INCORRECT: [" << i << "]: " + << CoutCast(computed[i]) << " != " + << CoutCast(reference[i]) << " (difference:" << difference << ", fraction: " << fraction << ")"; + return 1; + } + } + } + return 0; +} + + +/** + * Verify the contents of a device array match those + * of a host array + */ +int CompareDeviceResults( + cub::NullType *h_reference, + cub::NullType *d_data, + size_t num_items, + bool verbose = true, + bool display_data = false) +{ + return 0; +} + +/** + * Verify the contents of a device array match those + * of a host array + */ +template +int CompareDeviceResults( + S *h_reference, + cub::DiscardOutputIterator d_data, + size_t num_items, + bool verbose = true, + bool display_data = false) +{ + return 0; +} + +/** + * Verify the contents of a device array match those + * of a host array + */ +template +int CompareDeviceResults( + S *h_reference, + T *d_data, + size_t num_items, + bool verbose = true, + bool display_data = false) +{ + // Allocate array on host + T *h_data = (T*) malloc(num_items * sizeof(T)); + + // Copy data back + cudaMemcpy(h_data, d_data, sizeof(T) * num_items, cudaMemcpyDeviceToHost); + + // Display data + if (display_data) + { + printf("Reference:\n"); + for (int i = 0; i < int(num_items); i++) + { + std::cout << CoutCast(h_reference[i]) << ", "; + } + printf("\n\nComputed:\n"); + for (int i = 0; i < int(num_items); i++) + { + std::cout << CoutCast(h_data[i]) << ", "; + } + printf("\n\n"); + } + + // Check + int retval = CompareResults(h_data, h_reference, num_items, verbose); + + // Cleanup + if (h_data) free(h_data); + + return retval; +} + + +/** + * Verify the contents of a device array match those + * of a device array + */ +template +int CompareDeviceDeviceResults( + T *d_reference, + T *d_data, + size_t num_items, + bool verbose = true, + bool display_data = false) +{ + // Allocate array on host + T *h_reference = (T*) malloc(num_items * sizeof(T)); + T *h_data = (T*) malloc(num_items * sizeof(T)); + + // Copy data back + cudaMemcpy(h_reference, d_reference, sizeof(T) * num_items, cudaMemcpyDeviceToHost); + cudaMemcpy(h_data, d_data, sizeof(T) * num_items, cudaMemcpyDeviceToHost); + + // Display data + if (display_data) { + printf("Reference:\n"); + for (int i = 0; i < num_items; i++) + { + std::cout << CoutCast(h_reference[i]) << ", "; + } + printf("\n\nComputed:\n"); + for (int i = 0; i < num_items; i++) + { + std::cout << CoutCast(h_data[i]) << ", "; + } + printf("\n\n"); + } + + // Check + int retval = CompareResults(h_data, h_reference, num_items, verbose); + + // Cleanup + if (h_reference) free(h_reference); + if (h_data) free(h_data); + + return retval; +} + + +/** + * Print the contents of a host array + */ +void DisplayResults( + cub::NullType *h_data, + size_t num_items) +{} + + +/** + * Print the contents of a host array + */ +template +void DisplayResults( + InputIteratorT h_data, + size_t num_items) +{ + // Display data + for (int i = 0; i < int(num_items); i++) + { + std::cout << CoutCast(h_data[i]) << ", "; + } + printf("\n"); +} + + +/** + * Print the contents of a device array + */ +template +void DisplayDeviceResults( + T *d_data, + size_t num_items) +{ + // Allocate array on host + T *h_data = (T*) malloc(num_items * sizeof(T)); + + // Copy data back + cudaMemcpy(h_data, d_data, sizeof(T) * num_items, cudaMemcpyDeviceToHost); + + DisplayResults(h_data, num_items); + + // Cleanup + if (h_data) free(h_data); +} + + +/****************************************************************************** + * Segment descriptor generation + ******************************************************************************/ + +/** + * Initialize segments + */ +void InitializeSegments( + int num_items, + int num_segments, + int *h_segment_offsets, + bool verbose = false) +{ + if (num_segments <= 0) + return; + + unsigned int expected_segment_length = (num_items + num_segments - 1) / num_segments; + int offset = 0; + for (int i = 0; i < num_segments; ++i) + { + h_segment_offsets[i] = offset; + + unsigned int segment_length = RandomValue((expected_segment_length * 2) + 1); + offset += segment_length; + offset = CUB_MIN(offset, num_items); + } + h_segment_offsets[num_segments] = num_items; + + if (verbose) + { + printf("Segment offsets: "); + DisplayResults(h_segment_offsets, num_segments + 1); + } +} + + +/****************************************************************************** + * Timing + ******************************************************************************/ + + +struct CpuTimer +{ +#if defined(_WIN32) || defined(_WIN64) + + LARGE_INTEGER ll_freq; + LARGE_INTEGER ll_start; + LARGE_INTEGER ll_stop; + + CpuTimer() + { + QueryPerformanceFrequency(&ll_freq); + } + + void Start() + { + QueryPerformanceCounter(&ll_start); + } + + void Stop() + { + QueryPerformanceCounter(&ll_stop); + } + + float ElapsedMillis() + { + double start = double(ll_start.QuadPart) / double(ll_freq.QuadPart); + double stop = double(ll_stop.QuadPart) / double(ll_freq.QuadPart); + + return float((stop - start) * 1000); + } + +#else + + rusage start; + rusage stop; + + void Start() + { + getrusage(RUSAGE_SELF, &start); + } + + void Stop() + { + getrusage(RUSAGE_SELF, &stop); + } + + float ElapsedMillis() + { + float sec = stop.ru_utime.tv_sec - start.ru_utime.tv_sec; + float usec = stop.ru_utime.tv_usec - start.ru_utime.tv_usec; + + return (sec * 1000) + (usec / 1000); + } + +#endif +}; + +struct GpuTimer +{ + cudaEvent_t start; + cudaEvent_t stop; + + GpuTimer() + { + cudaEventCreate(&start); + cudaEventCreate(&stop); + } + + ~GpuTimer() + { + cudaEventDestroy(start); + cudaEventDestroy(stop); + } + + void Start() + { + cudaEventRecord(start, 0); + } + + void Stop() + { + cudaEventRecord(stop, 0); + } + + float ElapsedMillis() + { + float elapsed; + cudaEventSynchronize(stop); + cudaEventElapsedTime(&elapsed, start, stop); + return elapsed; + } +}; diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_reduce.cu new file mode 100644 index 0000000..673219a --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_reduce.cu @@ -0,0 +1,840 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of WarpReduce utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include + +#include "test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +bool g_verbose = false; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + +/** + * \brief WrapperFunctor (for precluding test-specialized dispatch to *Sum variants) + */ +template< + typename OpT, + int LOGICAL_WARP_THREADS> +struct WrapperFunctor +{ + OpT op; + int num_valid; + + inline __host__ __device__ WrapperFunctor(OpT op, int num_valid) : op(op), num_valid(num_valid) {} + + template + inline __host__ __device__ T operator()(const T &a, const T &b) const + { +#if CUB_PTX_ARCH != 0 + if ((cub::LaneId() % LOGICAL_WARP_THREADS) >= num_valid) + cub::ThreadTrap(); +#endif + + return op(a, b); + } + +}; + + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/** + * Generic reduction + */ +template < + typename T, + typename ReductionOp, + typename WarpReduce, + bool PRIMITIVE = Traits::PRIMITIVE> +struct DeviceTest +{ + static __device__ __forceinline__ T Reduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + ReductionOp &reduction_op) + { + return WarpReduce(temp_storage).Reduce(data, reduction_op); + } + + static __device__ __forceinline__ T Reduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + ReductionOp &reduction_op, + const int &valid_warp_threads) + { + return WarpReduce(temp_storage).Reduce(data, reduction_op, valid_warp_threads); + } + + template + static __device__ __forceinline__ T HeadSegmentedReduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + FlagT &flag, + ReductionOp &reduction_op) + { + return WarpReduce(temp_storage).HeadSegmentedReduce(data, flag, reduction_op); + } + + template + static __device__ __forceinline__ T TailSegmentedReduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + FlagT &flag, + ReductionOp &reduction_op) + { + return WarpReduce(temp_storage).TailSegmentedReduce(data, flag, reduction_op); + } + +}; + + +/** + * Summation + */ +template < + typename T, + typename WarpReduce> +struct DeviceTest +{ + static __device__ __forceinline__ T Reduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + Sum &reduction_op) + { + return WarpReduce(temp_storage).Sum(data); + } + + static __device__ __forceinline__ T Reduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + Sum &reduction_op, + const int &valid_warp_threads) + { + return WarpReduce(temp_storage).Sum(data, valid_warp_threads); + } + + template + static __device__ __forceinline__ T HeadSegmentedReduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + FlagT &flag, + Sum &reduction_op) + { + return WarpReduce(temp_storage).HeadSegmentedSum(data, flag); + } + + template + static __device__ __forceinline__ T TailSegmentedReduce( + typename WarpReduce::TempStorage &temp_storage, + T &data, + FlagT &flag, + Sum &reduction_op) + { + return WarpReduce(temp_storage).TailSegmentedSum(data, flag); + } + +}; + + +/** + * Full-tile warp reduction kernel + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename ReductionOp> +__global__ void FullWarpReduceKernel( + T *d_in, + T *d_out, + ReductionOp reduction_op, + clock_t *d_elapsed) +{ + // Cooperative warp-reduce utility type (1 warp) + typedef WarpReduce WarpReduce; + + // Allocate temp storage in shared memory + __shared__ typename WarpReduce::TempStorage temp_storage[WARPS]; + + // Per-thread tile data + T input = d_in[threadIdx.x]; + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Test warp reduce + int warp_id = threadIdx.x / LOGICAL_WARP_THREADS; + + T output = DeviceTest::Reduce( + temp_storage[warp_id], input, reduction_op); + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + *d_elapsed = stop - start; + + // Store aggregate + d_out[threadIdx.x] = (threadIdx.x % LOGICAL_WARP_THREADS == 0) ? + output : + input; +} + +/** + * Partially-full warp reduction kernel + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename ReductionOp> +__global__ void PartialWarpReduceKernel( + T *d_in, + T *d_out, + ReductionOp reduction_op, + clock_t *d_elapsed, + int valid_warp_threads) +{ + // Cooperative warp-reduce utility type + typedef WarpReduce WarpReduce; + + // Allocate temp storage in shared memory + __shared__ typename WarpReduce::TempStorage temp_storage[WARPS]; + + // Per-thread tile data + T input = d_in[threadIdx.x]; + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Test partial-warp reduce + int warp_id = threadIdx.x / LOGICAL_WARP_THREADS; + T output = DeviceTest::Reduce( + temp_storage[warp_id], input, reduction_op, valid_warp_threads); + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + *d_elapsed = stop - start; + + // Store aggregate + d_out[threadIdx.x] = (threadIdx.x % LOGICAL_WARP_THREADS == 0) ? + output : + input; +} + + +/** + * Head-based segmented warp reduction test kernel + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename FlagT, + typename ReductionOp> +__global__ void WarpHeadSegmentedReduceKernel( + T *d_in, + FlagT *d_head_flags, + T *d_out, + ReductionOp reduction_op, + clock_t *d_elapsed) +{ + // Cooperative warp-reduce utility type + typedef WarpReduce WarpReduce; + + // Allocate temp storage in shared memory + __shared__ typename WarpReduce::TempStorage temp_storage[WARPS]; + + // Per-thread tile data + T input = d_in[threadIdx.x]; + FlagT head_flag = d_head_flags[threadIdx.x]; + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Test segmented warp reduce + int warp_id = threadIdx.x / LOGICAL_WARP_THREADS; + T output = DeviceTest::HeadSegmentedReduce( + temp_storage[warp_id], input, head_flag, reduction_op); + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + *d_elapsed = stop - start; + + // Store aggregate + d_out[threadIdx.x] = ((threadIdx.x % LOGICAL_WARP_THREADS == 0) || head_flag) ? + output : + input; +} + + +/** + * Tail-based segmented warp reduction test kernel + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename FlagT, + typename ReductionOp> +__global__ void WarpTailSegmentedReduceKernel( + T *d_in, + FlagT *d_tail_flags, + T *d_out, + ReductionOp reduction_op, + clock_t *d_elapsed) +{ + // Cooperative warp-reduce utility type + typedef WarpReduce WarpReduce; + + // Allocate temp storage in shared memory + __shared__ typename WarpReduce::TempStorage temp_storage[WARPS]; + + // Per-thread tile data + T input = d_in[threadIdx.x]; + FlagT tail_flag = d_tail_flags[threadIdx.x]; + FlagT head_flag = (threadIdx.x == 0) ? + 0 : + d_tail_flags[threadIdx.x - 1]; + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Test segmented warp reduce + int warp_id = threadIdx.x / LOGICAL_WARP_THREADS; + T output = DeviceTest::TailSegmentedReduce( + temp_storage[warp_id], input, tail_flag, reduction_op); + + // Record elapsed clocks + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + *d_elapsed = stop - start; + + // Store aggregate + d_out[threadIdx.x] = ((threadIdx.x % LOGICAL_WARP_THREADS == 0) || head_flag) ? + output : + input; +} + + +//--------------------------------------------------------------------- +// Host utility subroutines +//--------------------------------------------------------------------- + +/** + * Initialize reduction problem (and solution) + */ +template < + typename T, + typename ReductionOp> +void Initialize( + GenMode gen_mode, + int flag_entropy, + T *h_in, + int *h_flags, + int warps, + int warp_threads, + int valid_warp_threads, + ReductionOp reduction_op, + T *h_head_out, + T *h_tail_out) +{ + for (int i = 0; i < warps * warp_threads; ++i) + { + // Sample a value for this item + InitValue(gen_mode, h_in[i], i); + h_head_out[i] = h_in[i]; + h_tail_out[i] = h_in[i]; + + // Sample whether or not this item will be a segment head + char bits; + RandomBits(bits, flag_entropy); + h_flags[i] = bits & 0x1; + } + + // Accumulate segments (lane 0 of each warp is implicitly a segment head) + for (int warp = 0; warp < warps; ++warp) + { + int warp_offset = warp * warp_threads; + int item_offset = warp_offset + valid_warp_threads - 1; + + // Last item in warp + T head_aggregate = h_in[item_offset]; + T tail_aggregate = h_in[item_offset]; + + if (h_flags[item_offset]) + h_head_out[item_offset] = head_aggregate; + item_offset--; + + // Work backwards + while (item_offset >= warp_offset) + { + if (h_flags[item_offset + 1]) + { + head_aggregate = h_in[item_offset]; + } + else + { + head_aggregate = reduction_op(head_aggregate, h_in[item_offset]); + } + + if (h_flags[item_offset]) + { + h_head_out[item_offset] = head_aggregate; + h_tail_out[item_offset + 1] = tail_aggregate; + tail_aggregate = h_in[item_offset]; + } + else + { + tail_aggregate = reduction_op(tail_aggregate, h_in[item_offset]); + } + + item_offset--; + } + + // Record last segment head_aggregate to head offset + h_head_out[warp_offset] = head_aggregate; + h_tail_out[warp_offset] = tail_aggregate; + } +} + + +/** + * Test warp reduction + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename ReductionOp> +void TestReduce( + GenMode gen_mode, + ReductionOp reduction_op, + int valid_warp_threads = LOGICAL_WARP_THREADS) +{ + const int BLOCK_THREADS = LOGICAL_WARP_THREADS * WARPS; + + // Allocate host arrays + T *h_in = new T[BLOCK_THREADS]; + int *h_flags = new int[BLOCK_THREADS]; + T *h_out = new T[BLOCK_THREADS]; + T *h_tail_out = new T[BLOCK_THREADS]; + + // Initialize problem + Initialize(gen_mode, -1, h_in, h_flags, WARPS, LOGICAL_WARP_THREADS, valid_warp_threads, reduction_op, h_out, h_tail_out); + + // Initialize/clear device arrays + T *d_in = NULL; + T *d_out = NULL; + clock_t *d_elapsed = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(clock_t))); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * BLOCK_THREADS, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * BLOCK_THREADS)); + + if (g_verbose) + { + printf("Data:\n"); + for (int i = 0; i < WARPS; ++i) + DisplayResults(h_in + (i * LOGICAL_WARP_THREADS), valid_warp_threads); + } + + // Run kernel + printf("\nGen-mode %d, %d warps, %d warp threads, %d valid lanes, %s (%d bytes) elements:\n", + gen_mode, + WARPS, + LOGICAL_WARP_THREADS, + valid_warp_threads, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + if (valid_warp_threads == LOGICAL_WARP_THREADS) + { + // Run full-warp kernel + FullWarpReduceKernel<<<1, BLOCK_THREADS>>>( + d_in, + d_out, + reduction_op, + d_elapsed); + } + else + { + // Run partial-warp kernel + PartialWarpReduceKernel<<<1, BLOCK_THREADS>>>( + d_in, + d_out, + reduction_op, + d_elapsed, + valid_warp_threads); + } + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tReduction results: "); + int compare = CompareDeviceResults(h_out, d_out, BLOCK_THREADS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Cleanup + if (h_in) delete[] h_in; + if (h_flags) delete[] h_flags; + if (h_out) delete[] h_out; + if (h_tail_out) delete[] h_tail_out; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Test warp segmented reduction + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename ReductionOp> +void TestSegmentedReduce( + GenMode gen_mode, + int flag_entropy, + ReductionOp reduction_op) +{ + const int BLOCK_THREADS = LOGICAL_WARP_THREADS * WARPS; + + // Allocate host arrays + int compare; + T *h_in = new T[BLOCK_THREADS]; + int *h_flags = new int[BLOCK_THREADS]; + T *h_head_out = new T[BLOCK_THREADS]; + T *h_tail_out = new T[BLOCK_THREADS]; + + // Initialize problem + Initialize(gen_mode, flag_entropy, h_in, h_flags, WARPS, LOGICAL_WARP_THREADS, LOGICAL_WARP_THREADS, reduction_op, h_head_out, h_tail_out); + + // Initialize/clear device arrays + T *d_in = NULL; + int *d_flags = NULL; + T *d_head_out = NULL; + T *d_tail_out = NULL; + clock_t *d_elapsed = NULL; + + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_flags, sizeof(int) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_head_out, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_tail_out, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(clock_t))); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * BLOCK_THREADS, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemcpy(d_flags, h_flags, sizeof(int) * BLOCK_THREADS, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_head_out, 0, sizeof(T) * BLOCK_THREADS)); + CubDebugExit(cudaMemset(d_tail_out, 0, sizeof(T) * BLOCK_THREADS)); + + if (g_verbose) + { + printf("Data:\n"); + for (int i = 0; i < WARPS; ++i) + DisplayResults(h_in + (i * LOGICAL_WARP_THREADS), LOGICAL_WARP_THREADS); + + printf("\nFlags:\n"); + for (int i = 0; i < WARPS; ++i) + DisplayResults(h_flags + (i * LOGICAL_WARP_THREADS), LOGICAL_WARP_THREADS); + } + + printf("\nGen-mode %d, head flag entropy reduction %d, %d warps, %d warp threads, %s (%d bytes) elements:\n", + gen_mode, + flag_entropy, + WARPS, + LOGICAL_WARP_THREADS, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + // Run head-based kernel + WarpHeadSegmentedReduceKernel<<<1, BLOCK_THREADS>>>( + d_in, + d_flags, + d_head_out, + reduction_op, + d_elapsed); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tHead-based segmented reduction results: "); + compare = CompareDeviceResults(h_head_out, d_head_out, BLOCK_THREADS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Run tail-based kernel + WarpTailSegmentedReduceKernel<<<1, BLOCK_THREADS>>>( + d_in, + d_flags, + d_tail_out, + reduction_op, + d_elapsed); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tTail-based segmented reduction results: "); + compare = CompareDeviceResults(h_tail_out, d_tail_out, BLOCK_THREADS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + // Cleanup + if (h_in) delete[] h_in; + if (h_flags) delete[] h_flags; + if (h_head_out) delete[] h_head_out; + if (h_tail_out) delete[] h_tail_out; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_flags) CubDebugExit(g_allocator.DeviceFree(d_flags)); + if (d_head_out) CubDebugExit(g_allocator.DeviceFree(d_head_out)); + if (d_tail_out) CubDebugExit(g_allocator.DeviceFree(d_tail_out)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Run battery of tests for different full and partial tile sizes + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS, + typename T, + typename ReductionOp> +void Test( + GenMode gen_mode, + ReductionOp reduction_op) +{ + // Partial tiles + for ( + int valid_warp_threads = 1; + valid_warp_threads < LOGICAL_WARP_THREADS; + valid_warp_threads += CUB_MAX(1, LOGICAL_WARP_THREADS / 5)) + { + // Without wrapper (to test non-excepting PTX POD-op specializations) + TestReduce(gen_mode, reduction_op, valid_warp_threads); + + // With wrapper to ensure no ops called on OOB lanes + WrapperFunctor wrapped_op(reduction_op, valid_warp_threads); + TestReduce(gen_mode, wrapped_op, valid_warp_threads); + } + + // Full tile + TestReduce(gen_mode, reduction_op, LOGICAL_WARP_THREADS); + + // Segmented reduction with different head flags + for (int flag_entropy = 0; flag_entropy < 10; ++flag_entropy) + { + TestSegmentedReduce(gen_mode, flag_entropy, reduction_op); + } +} + + +/** + * Run battery of tests for different data types and reduce ops + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS> +void Test(GenMode gen_mode) +{ + // primitive + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + + if (gen_mode != RANDOM) + { + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + } + + // primitive (alternative reduce op) + Test( gen_mode, Max()); + Test( gen_mode, Max()); + Test( gen_mode, Max()); + Test( gen_mode, Max()); + + // vec-1 + Test( gen_mode, Sum()); + + // vec-2 + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + + // vec-4 + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); + + // complex + Test( gen_mode, Sum()); + Test( gen_mode, Sum()); +} + + +/** + * Run battery of tests for different problem generation options + */ +template < + int WARPS, + int LOGICAL_WARP_THREADS> +void Test() +{ + Test(UNIFORM); + Test(INTEGER_SEED); + Test(RANDOM); +} + + +/** + * Run battery of tests for different number of active warps + */ +template +void Test() +{ + Test<1, LOGICAL_WARP_THREADS>(); + + // Only power-of-two subwarps can be tiled + if ((LOGICAL_WARP_THREADS == 32) || PowerOfTwo::VALUE) + Test<2, LOGICAL_WARP_THREADS>(); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--repeat=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#ifdef QUICK_TEST + + // Compile/run quick tests + TestReduce<1, 32, int>(UNIFORM, Sum()); + + TestReduce<1, 32, double>(UNIFORM, Sum()); + TestReduce<2, 16, TestBar>(UNIFORM, Sum()); + TestSegmentedReduce<1, 32, int>(UNIFORM, 1, Sum()); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test logical warp sizes + Test<32>(); + Test<16>(); + Test<9>(); + Test<7>(); + } + +#endif + + return 0; +} + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_scan.cu b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_scan.cu new file mode 100644 index 0000000..ba8e5cf --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/test/test_warp_scan.cu @@ -0,0 +1,661 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Test of WarpScan utilities + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include + +#include +#include + +#include "test_util.h" + +using namespace cub; + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +static const int NUM_WARPS = 2; + + +bool g_verbose = false; +int g_repeat = 0; +CachingDeviceAllocator g_allocator(true); + + +/** + * Primitive variant to test + */ +enum TestMode +{ + BASIC, + AGGREGATE, +}; + + + +/** + * \brief WrapperFunctor (for precluding test-specialized dispatch to *Sum variants) + */ +template +struct WrapperFunctor +{ + OpT op; + + WrapperFunctor(OpT op) : op(op) {} + + template + __host__ __device__ __forceinline__ T operator()(const T &a, const T &b) const + { + return op(a, b); + } +}; + +//--------------------------------------------------------------------- +// Test kernels +//--------------------------------------------------------------------- + +/// Exclusive scan basic +template +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + T &initial_value, + ScanOpT &scan_op, + T &aggregate, + Int2Type test_mode, + IsPrimitiveT is_primitive) +{ + // Test basic warp scan + warp_scan.ExclusiveScan(data, data, initial_value, scan_op); +} + +/// Exclusive scan aggregate +template < + typename WarpScanT, + typename T, + typename ScanOpT, + typename IsPrimitiveT> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + T &initial_value, + ScanOpT &scan_op, + T &aggregate, + Int2Type test_mode, + IsPrimitiveT is_primitive) +{ + // Test with cumulative aggregate + warp_scan.ExclusiveScan(data, data, initial_value, scan_op, aggregate); +} + + +/// Exclusive sum basic +template < + typename WarpScanT, + typename T> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + T &initial_value, + Sum &scan_op, + T &aggregate, + Int2Type test_mode, + Int2Type is_primitive) +{ + // Test basic warp scan + warp_scan.ExclusiveSum(data, data); +} + + +/// Exclusive sum aggregate +template < + typename WarpScanT, + typename T> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + T &initial_value, + Sum &scan_op, + T &aggregate, + Int2Type test_mode, + Int2Type is_primitive) +{ + // Test with cumulative aggregate + warp_scan.ExclusiveSum(data, data, aggregate); +} + + +/// Inclusive scan basic +template < + typename WarpScanT, + typename T, + typename ScanOpT, + typename IsPrimitiveT> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + NullType &initial_value, + ScanOpT &scan_op, + T &aggregate, + Int2Type test_mode, + IsPrimitiveT is_primitive) +{ + // Test basic warp scan + warp_scan.InclusiveScan(data, data, scan_op); +} + +/// Inclusive scan aggregate +template < + typename WarpScanT, + typename T, + typename ScanOpT, + typename IsPrimitiveT> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + NullType &initial_value, + ScanOpT &scan_op, + T &aggregate, + Int2Type test_mode, + IsPrimitiveT is_primitive) +{ + // Test with cumulative aggregate + warp_scan.InclusiveScan(data, data, scan_op, aggregate); +} + +/// Inclusive sum basic +template < + typename WarpScanT, + typename T, + typename InitialValueT> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + NullType &initial_value, + Sum &scan_op, + T &aggregate, + Int2Type test_mode, + Int2Type is_primitive) +{ + // Test basic warp scan + warp_scan.InclusiveSum(data, data); +} + +/// Inclusive sum aggregate +template < + typename WarpScanT, + typename T, + typename InitialValueT> +__device__ __forceinline__ void DeviceTest( + WarpScanT &warp_scan, + T &data, + NullType &initial_value, + Sum &scan_op, + T &aggregate, + Int2Type test_mode, + Int2Type is_primitive) +{ + // Test with cumulative aggregate + warp_scan.InclusiveSum(data, data, aggregate); +} + + +/** + * WarpScan test kernel + */ +template < + int LOGICAL_WARP_THREADS, + TestMode TEST_MODE, + typename T, + typename ScanOpT, + typename InitialValueT> +__global__ void WarpScanKernel( + T *d_in, + T *d_out, + T *d_aggregate, + ScanOpT scan_op, + InitialValueT initial_value, + clock_t *d_elapsed) +{ + // Cooperative warp-scan utility type (1 warp) + typedef WarpScan WarpScanT; + + // Allocate temp storage in shared memory + __shared__ typename WarpScanT::TempStorage temp_storage[NUM_WARPS]; + + // Get warp index + int warp_id = threadIdx.x / LOGICAL_WARP_THREADS; + + // Per-thread tile data + T data = d_in[threadIdx.x]; + + // Start cycle timer + __threadfence_block(); // workaround to prevent clock hoisting + clock_t start = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + T aggregate; + + // Test scan + WarpScanT warp_scan(temp_storage[warp_id]); + DeviceTest( + warp_scan, + data, + initial_value, + scan_op, + aggregate, + Int2Type(), + Int2Type::PRIMITIVE>()); + + // Stop cycle timer + __threadfence_block(); // workaround to prevent clock hoisting + clock_t stop = clock(); + __threadfence_block(); // workaround to prevent clock hoisting + + // Store data + d_out[threadIdx.x] = data; + + if (TEST_MODE != BASIC) + { + // Store aggregate + d_aggregate[threadIdx.x] = aggregate; + } + + // Store time + if (threadIdx.x == 0) + { + *d_elapsed = (start > stop) ? start - stop : stop - start; + } +} + + +//--------------------------------------------------------------------- +// Host utility subroutines +//--------------------------------------------------------------------- + +/** + * Initialize exclusive-scan problem (and solution) + */ +template < + typename T, + typename ScanOpT> +void Initialize( + GenMode gen_mode, + T *h_in, + T *h_reference, + int logical_warp_items, + ScanOpT scan_op, + T initial_value, + T warp_aggregates[NUM_WARPS]) +{ + for (int w = 0; w < NUM_WARPS; ++w) + { + int base_idx = (w * logical_warp_items); + int i = base_idx; + + InitValue(gen_mode, h_in[i], i); + + T warp_aggregate = h_in[i]; + h_reference[i] = initial_value; + T inclusive = scan_op(initial_value, h_in[i]); + + for (i = i + 1; i < base_idx + logical_warp_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + h_reference[i] = inclusive; + inclusive = scan_op(inclusive, h_in[i]); + warp_aggregate = scan_op(warp_aggregate, h_in[i]); + } + + warp_aggregates[w] = warp_aggregate; + } + +} + + +/** + * Initialize inclusive-scan problem (and solution) + */ +template < + typename T, + typename ScanOpT> +void Initialize( + GenMode gen_mode, + T *h_in, + T *h_reference, + int logical_warp_items, + ScanOpT scan_op, + NullType, + T warp_aggregates[NUM_WARPS]) +{ + for (int w = 0; w < NUM_WARPS; ++w) + { + int base_idx = (w * logical_warp_items); + int i = base_idx; + + InitValue(gen_mode, h_in[i], i); + + T warp_aggregate = h_in[i]; + T inclusive = h_in[i]; + h_reference[i] = inclusive; + + for (i = i + 1; i < base_idx + logical_warp_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + inclusive = scan_op(inclusive, h_in[i]); + warp_aggregate = scan_op(warp_aggregate, h_in[i]); + h_reference[i] = inclusive; + } + + warp_aggregates[w] = warp_aggregate; + } +} + + +/** + * Test warp scan + */ +template < + int LOGICAL_WARP_THREADS, + TestMode TEST_MODE, + typename T, + typename ScanOpT, + typename InitialValueT> // NullType implies inclusive-scan, otherwise inclusive scan +void Test( + GenMode gen_mode, + ScanOpT scan_op, + InitialValueT initial_value) +{ + enum { + TOTAL_ITEMS = LOGICAL_WARP_THREADS * NUM_WARPS, + }; + + // Allocate host arrays + T *h_in = new T[TOTAL_ITEMS]; + T *h_reference = new T[TOTAL_ITEMS]; + T *h_aggregate = new T[TOTAL_ITEMS]; + + // Initialize problem + T aggregates[NUM_WARPS]; + + Initialize( + gen_mode, + h_in, + h_reference, + LOGICAL_WARP_THREADS, + scan_op, + initial_value, + aggregates); + + if (g_verbose) + { + printf("Input: \n"); + DisplayResults(h_in, TOTAL_ITEMS); + printf("\n"); + } + + for (int w = 0; w < NUM_WARPS; ++w) + { + for (int i = 0; i < LOGICAL_WARP_THREADS; ++i) + { + h_aggregate[(w * LOGICAL_WARP_THREADS) + i] = aggregates[w]; + } + } + + // Initialize/clear device arrays + T *d_in = NULL; + T *d_out = NULL; + T *d_aggregate = NULL; + clock_t *d_elapsed = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * TOTAL_ITEMS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * (TOTAL_ITEMS + 1))); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_aggregate, sizeof(T) * TOTAL_ITEMS)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_elapsed, sizeof(clock_t))); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * TOTAL_ITEMS, cudaMemcpyHostToDevice)); + CubDebugExit(cudaMemset(d_out, 0, sizeof(T) * (TOTAL_ITEMS + 1))); + CubDebugExit(cudaMemset(d_aggregate, 0, sizeof(T) * TOTAL_ITEMS)); + + // Run kernel + printf("Test-mode %d (%s), gen-mode %d (%s), %s warpscan, %d warp threads, %s (%d bytes) elements:\n", + TEST_MODE, typeid(TEST_MODE).name(), + gen_mode, typeid(gen_mode).name(), + (Equals::VALUE) ? "Inclusive" : "Exclusive", + LOGICAL_WARP_THREADS, + typeid(T).name(), + (int) sizeof(T)); + fflush(stdout); + + // Run aggregate/prefix kernel + WarpScanKernel<<<1, TOTAL_ITEMS>>>( + d_in, + d_out, + d_aggregate, + scan_op, + initial_value, + d_elapsed); + + printf("\tElapsed clocks: "); + DisplayDeviceResults(d_elapsed, 1); + + CubDebugExit(cudaPeekAtLastError()); + CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + printf("\tScan results: "); + int compare = CompareDeviceResults(h_reference, d_out, TOTAL_ITEMS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + + // Copy out and display aggregate + if (TEST_MODE == AGGREGATE) + { + printf("\tScan aggregate: "); + compare = CompareDeviceResults(h_aggregate, d_aggregate, TOTAL_ITEMS, g_verbose, g_verbose); + printf("%s\n", compare ? "FAIL" : "PASS"); + AssertEquals(0, compare); + } + + // Cleanup + if (h_in) delete[] h_in; + if (h_reference) delete[] h_reference; + if (h_aggregate) delete[] h_aggregate; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + if (d_aggregate) CubDebugExit(g_allocator.DeviceFree(d_aggregate)); + if (d_elapsed) CubDebugExit(g_allocator.DeviceFree(d_elapsed)); +} + + +/** + * Run battery of tests for different primitive variants + */ +template < + int LOGICAL_WARP_THREADS, + typename ScanOpT, + typename T> +void Test( + GenMode gen_mode, + ScanOpT scan_op, + T initial_value) +{ + // Exclusive + Test(gen_mode, scan_op, T()); + Test(gen_mode, scan_op, T()); + + // Exclusive (non-specialized, so we can use initial-value) + Test(gen_mode, WrapperFunctor(scan_op), initial_value); + Test(gen_mode, WrapperFunctor(scan_op), initial_value); + + // Inclusive + Test(gen_mode, scan_op, NullType()); + Test(gen_mode, scan_op, NullType()); +} + + +/** + * Run battery of tests for different data types and scan ops + */ +template +void Test(GenMode gen_mode) +{ + // Get device ordinal + int device_ordinal; + CubDebugExit(cudaGetDevice(&device_ordinal)); + + // Get ptx version + int ptx_version; + CubDebugExit(PtxVersion(ptx_version)); + + // primitive + Test(gen_mode, Sum(), (char) 99); + Test(gen_mode, Sum(), (short) 99); + Test(gen_mode, Sum(), (int) 99); + Test(gen_mode, Sum(), (long) 99); + Test(gen_mode, Sum(), (long long) 99); + if (gen_mode != RANDOM) { + // Only test numerically stable inputs + Test(gen_mode, Sum(), (float) 99); + if (ptx_version > 100) + Test(gen_mode, Sum(), (double) 99); + } + + // primitive (alternative scan op) + Test(gen_mode, Max(), (unsigned char) 99); + Test(gen_mode, Max(), (unsigned short) 99); + Test(gen_mode, Max(), (unsigned int) 99); + Test(gen_mode, Max(), (unsigned long long) 99); + + // vec-2 + Test(gen_mode, Sum(), make_uchar2(17, 21)); + Test(gen_mode, Sum(), make_ushort2(17, 21)); + Test(gen_mode, Sum(), make_uint2(17, 21)); + Test(gen_mode, Sum(), make_ulong2(17, 21)); + Test(gen_mode, Sum(), make_ulonglong2(17, 21)); + if (gen_mode != RANDOM) { + // Only test numerically stable inputs + Test(gen_mode, Sum(), make_float2(17, 21)); + if (ptx_version > 100) + Test(gen_mode, Sum(), make_double2(17, 21)); + } + + // vec-4 + Test(gen_mode, Sum(), make_char4(17, 21, 32, 85)); + Test(gen_mode, Sum(), make_short4(17, 21, 32, 85)); + Test(gen_mode, Sum(), make_int4(17, 21, 32, 85)); + Test(gen_mode, Sum(), make_long4(17, 21, 32, 85)); + Test(gen_mode, Sum(), make_longlong4(17, 21, 32, 85)); + if (gen_mode != RANDOM) { + // Only test numerically stable inputs + Test(gen_mode, Sum(), make_float4(17, 21, 32, 85)); + if (ptx_version > 100) + Test(gen_mode, Sum(), make_double4(17, 21, 32, 85)); + } + + // complex + Test(gen_mode, Sum(), TestFoo::MakeTestFoo(17, 21, 32, 85)); + Test(gen_mode, Sum(), TestBar(17, 21)); + +} + + +/** + * Run battery of tests for different problem generation options + */ +template +void Test() +{ + Test(UNIFORM); + Test(INTEGER_SEED); + Test(RANDOM); +} + + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + g_verbose = args.CheckCmdLineFlag("v"); + args.GetCmdLineArgument("repeat", g_repeat); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--repeat=]" + "[--v] " + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#ifdef QUICK_TEST + + // Compile/run quick tests + Test<32, AGGREGATE, int>(UNIFORM, Sum(), (int) 0); + Test<32, AGGREGATE, float>(UNIFORM, Sum(), (float) 0); + Test<32, AGGREGATE, long long>(UNIFORM, Sum(), (long long) 0); + Test<32, AGGREGATE, double>(UNIFORM, Sum(), (double) 0); + + typedef KeyValuePair T; + cub::Sum sum_op; + Test<32, AGGREGATE, T>(UNIFORM, ReduceBySegmentOp(sum_op), T()); + +#else + + // Compile/run thorough tests + for (int i = 0; i <= g_repeat; ++i) + { + // Test logical warp sizes + Test<32>(); + Test<16>(); + Test<9>(); + Test<2>(); + } + +#endif + + return 0; +} + + + + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/tune/.gitignore b/hash-graph-dehornetify/externals/cub-1.8.0/tune/.gitignore new file mode 100644 index 0000000..5e56e04 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/tune/.gitignore @@ -0,0 +1 @@ +/bin diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/tune/Makefile b/hash-graph-dehornetify/externals/cub-1.8.0/tune/Makefile new file mode 100644 index 0000000..926b340 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/tune/Makefile @@ -0,0 +1,192 @@ +#/****************************************************************************** +# * Copyright (c) 2011, Duane Merrill. All rights reserved. +# * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions are met: +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in the +# * documentation and/or other materials provided with the distribution. +# * * Neither the name of the NVIDIA CORPORATION nor the +# * names of its contributors may be used to endorse or promote products +# * derived from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +# * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY +# * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# * +#******************************************************************************/ + +#------------------------------------------------------------------------------- +# Build script for project +#------------------------------------------------------------------------------- + +NVCC = "$(shell which nvcc)" +NVCC_VERSION = $(strip $(shell nvcc --version | grep release | sed 's/.*release //' | sed 's/,.*//')) + +# detect OS +OSUPPER = $(shell uname -s 2>/dev/null | tr [:lower:] [:upper:]) + +#------------------------------------------------------------------------------- +# Libs +#------------------------------------------------------------------------------- + + +#------------------------------------------------------------------------------- +# Includes +#------------------------------------------------------------------------------- + +INC = -I. -I.. -I../test + +#------------------------------------------------------------------------------- +# Libs +#------------------------------------------------------------------------------- + +LIBS += -lcudart + +#------------------------------------------------------------------------------- +# Defines +#------------------------------------------------------------------------------- + +DEFINES = + +#------------------------------------------------------------------------------- +# SM Arch +#------------------------------------------------------------------------------- + +ifdef sm + SM_ARCH = $(sm) +else + SM_ARCH = 200 +endif + +# Only one arch per tuning binary +ifeq (350, $(findstring 350, $(SM_ARCH))) + SM_TARGETS = -arch=sm_35 + SM_ARCH = 350 +endif +ifeq (300, $(findstring 300, $(SM_ARCH))) + SM_TARGETS = -arch=sm_30 + SM_ARCH = 300 +endif +ifeq (200, $(findstring 200, $(SM_ARCH))) + SM_TARGETS = -arch=sm_20 + SM_ARCH = 200 +endif +ifeq (130, $(findstring 130, $(SM_ARCH))) + SM_TARGETS = -arch=sm_13 + SM_ARCH = 130 +endif +ifeq (110, $(findstring 110, $(SM_ARCH))) + SM_TARGETS = -arch=sm_11 + SM_ARCH = 110 +endif +ifeq (100, $(findstring 100, $(SM_ARCH))) + SM_TARGETS = -arch=sm_10 + SM_ARCH = 100 +endif + + +#------------------------------------------------------------------------------- +# Compiler Flags +#------------------------------------------------------------------------------- + +NVCCFLAGS = -Xptxas -v -Xcudafe -\# + +# Help the compiler/linker work with huge numbers of kernels on Windows +ifeq (WIN_NT, $(findstring WIN_NT, $(OSUPPER))) + NVCCFLAGS += -Xcompiler /bigobj -Xcompiler /Zm500 +endif + +# 32/64-bit (32-bit device pointers by default) +ifeq ($(force32), 1) + CPU_ARCH = -m32 + CPU_ARCH_SUFFIX = i386 +else + CPU_ARCH = -m64 + CPU_ARCH_SUFFIX = x86_64 +endif + +# CUDA ABI enable/disable (enabled by default) +ifneq ($(abi), 0) + ABI_SUFFIX = abi +else + NVCCFLAGS += -Xptxas -abi=no + ABI_SUFFIX = noabi +endif + +# NVVM/Open64 middle-end compiler (nvvm by default) +ifeq ($(open64), 1) + NVCCFLAGS += -open64 + PTX_SUFFIX = open64 +else + PTX_SUFFIX = nvvm +endif + +# Verbose toolchain output from nvcc +ifeq ($(verbose), 1) + NVCCFLAGS += -v +endif + +# Keep intermediate compilation artifacts +ifeq ($(keep), 1) + NVCCFLAGS += -keep +endif + +# Data type size to compile a schmoo binary for +ifdef tunesize + TUNE_SIZE = $(tunesize) +else + TUNE_SIZE = 4 +endif + + +SUFFIX = $(TUNE_SIZE)B_sm$(SM_ARCH)_$(PTX_SUFFIX)_$(NVCC_VERSION)_$(ABI_SUFFIX)_$(CPU_ARCH_SUFFIX) + +#------------------------------------------------------------------------------- +# Dependency Lists +#------------------------------------------------------------------------------- + +rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d)) + +DEPS = ./Makefile \ + ../test/test_util.h \ + $(call rwildcard,../cub/,*.cuh) + + +#------------------------------------------------------------------------------- +# make default +#------------------------------------------------------------------------------- + +default: + + +#------------------------------------------------------------------------------- +# make clean +#------------------------------------------------------------------------------- + +clean : + rm -f bin/*$(CPU_ARCH_SUFFIX)* + rm -f *.i* *.cubin *.cu.c *.cudafe* *.fatbin.c *.ptx *.hash *.cu.cpp *.o + + + +#------------------------------------------------------------------------------- +# make tune_device_reduce +#------------------------------------------------------------------------------- + +tune_device_reduce: bin/tune_device_reduce_$(SUFFIX) + +bin/tune_device_reduce_$(SUFFIX) : tune_device_reduce.cu $(DEPS) + mkdir -p bin + $(NVCC) $(DEFINES) $(SM_TARGETS) -o bin/tune_device_reduce_$(SUFFIX) tune_device_reduce.cu $(NVCCFLAGS) $(CPU_ARCH) $(INC) $(LIBS) -O3 -DTUNE_ARCH=$(SM_ARCH) -DTUNE_SIZE=$(TUNE_SIZE) + diff --git a/hash-graph-dehornetify/externals/cub-1.8.0/tune/tune_device_reduce.cu b/hash-graph-dehornetify/externals/cub-1.8.0/tune/tune_device_reduce.cu new file mode 100644 index 0000000..ec0cf57 --- /dev/null +++ b/hash-graph-dehornetify/externals/cub-1.8.0/tune/tune_device_reduce.cu @@ -0,0 +1,763 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + +/****************************************************************************** + * Evaluates different tuning configurations of DeviceReduce. + * + * The best way to use this program: + * (1) Find the best all-around single-block tune for a given arch. + * For example, 100 samples [1 ..512], 100 timing iterations per config per sample: + * ./bin/tune_device_reduce_sm200_nvvm_5.0_abi_i386 --i=100 --s=100 --n=512 --single --device=0 + * (2) Update the single tune in device_reduce.cuh + * (3) Find the best all-around multi-block tune for a given arch. + * For example, 100 samples [single-block tile-size .. 50,331,648], 100 timing iterations per config per sample: + * ./bin/tune_device_reduce_sm200_nvvm_5.0_abi_i386 --i=100 --s=100 --device=0 + * (4) Update the multi-block tune in device_reduce.cuh + * + ******************************************************************************/ + +// Ensure printing of CUDA runtime errors to console +#define CUB_STDERR + +#include +#include +#include +#include +#include "../test/test_util.h" + +using namespace cub; +using namespace std; + + +//--------------------------------------------------------------------- +// Globals, constants and typedefs +//--------------------------------------------------------------------- + +#ifndef TUNE_ARCH +#define TUNE_ARCH 100 +#endif + +int g_max_items = 48 * 1024 * 1024; +int g_samples = 100; +int g_timing_iterations = 2; +bool g_verbose = false; +bool g_single = false; +bool g_verify = true; +CachingDeviceAllocator g_allocator; + + +//--------------------------------------------------------------------- +// Host utility subroutines +//--------------------------------------------------------------------- + +/** + * Initialize problem + */ +template +void Initialize( + GenMode gen_mode, + T *h_in, + int num_items) +{ + for (int i = 0; i < num_items; ++i) + { + InitValue(gen_mode, h_in[i], i); + } +} + +/** + * Sequential reduction + */ +template +T Reduce( + T *h_in, + ReductionOp reduction_op, + int num_items) +{ + T retval = h_in[0]; + for (int i = 1; i < num_items; ++i) + retval = reduction_op(retval, h_in[i]); + + return retval; +} + + + +//--------------------------------------------------------------------- +// Full tile test generation +//--------------------------------------------------------------------- + + + +/** + * Wrapper structure for generating and running different tuning configurations + */ +template < + typename T, + typename OffsetT, + typename ReductionOp> +struct Schmoo +{ + //--------------------------------------------------------------------- + // Types + //--------------------------------------------------------------------- + + /// Pairing of kernel function pointer and corresponding dispatch params + template + struct DispatchTuple + { + KernelPtr kernel_ptr; + DeviceReduce::KernelDispachParams params; + + float avg_throughput; + float best_avg_throughput; + OffsetT best_size; + float hmean_speedup; + + + DispatchTuple() : + kernel_ptr(0), + params(DeviceReduce::KernelDispachParams()), + avg_throughput(0.0), + best_avg_throughput(0.0), + hmean_speedup(0.0), + best_size(0) + {} + }; + + /** + * Comparison operator for DispatchTuple.avg_throughput + */ + template + static bool MinSpeedup(const Tuple &a, const Tuple &b) + { + float delta = a.hmean_speedup - b.hmean_speedup; + + return ((delta < 0.02) && (delta > -0.02)) ? + (a.best_avg_throughput < b.best_avg_throughput) : // Negligible average performance differences: defer to best performance + (a.hmean_speedup < b.hmean_speedup); + } + + + + /// Multi-block reduction kernel type and dispatch tuple type + typedef void (*MultiBlockDeviceReduceKernelPtr)(T*, T*, OffsetT, GridEvenShare, GridQueue, ReductionOp); + typedef DispatchTuple MultiDispatchTuple; + + /// Single-block reduction kernel type and dispatch tuple type + typedef void (*SingleBlockDeviceReduceKernelPtr)(T*, T*, OffsetT, ReductionOp); + typedef DispatchTuple SingleDispatchTuple; + + + //--------------------------------------------------------------------- + // Fields + //--------------------------------------------------------------------- + + vector multi_kernels; // List of generated multi-block kernels + vector single_kernels; // List of generated single-block kernels + + + //--------------------------------------------------------------------- + // Kernel enumeration methods + //--------------------------------------------------------------------- + + /** + * Must have smem that fits in the SM + * Must have vector load length that divides items per thread + */ + template + struct SmemSize + { + enum + { + BYTES = sizeof(typename BlockReduceTiles::TempStorage), + IS_OK = ((BYTES < ArchProps::SMEM_BYTES) && + (TilesReducePolicy::ITEMS_PER_THREAD % TilesReducePolicy::VECTOR_LOAD_LENGTH == 0)) + }; + }; + + + /** + * Specialization that allows kernel generation with the specified TilesReducePolicy + */ + template < + typename TilesReducePolicy, + bool IsOk = SmemSize::IS_OK> + struct Ok + { + /// Enumerate multi-block kernel and add to the list + template + static void GenerateMulti( + KernelsVector &multi_kernels, + int subscription_factor) + { + MultiDispatchTuple tuple; + tuple.params.template Init(subscription_factor); + tuple.kernel_ptr = ReducePrivatizedKernel; + multi_kernels.push_back(tuple); + } + + + /// Enumerate single-block kernel and add to the list + template + static void GenerateSingle(KernelsVector &single_kernels) + { + SingleDispatchTuple tuple; + tuple.params.template Init(); + tuple.kernel_ptr = ReduceSingleKernel; + single_kernels.push_back(tuple); + } + }; + + /** + * Specialization that rejects kernel generation with the specified TilesReducePolicy + */ + template + struct Ok + { + template + static void GenerateMulti(KernelsVector &multi_kernels, int subscription_factor) {} + + template + static void GenerateSingle(KernelsVector &single_kernels) {} + }; + + + /// Enumerate block-scheduling variations + template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int VECTOR_LOAD_LENGTH, + BlockReduceAlgorithm BLOCK_ALGORITHM, + CacheLoadModifier LOAD_MODIFIER> + void Enumerate() + { + // Multi-block kernels + Ok >::GenerateMulti(multi_kernels, 1); + Ok >::GenerateMulti(multi_kernels, 2); + Ok >::GenerateMulti(multi_kernels, 4); + Ok >::GenerateMulti(multi_kernels, 8); +#if TUNE_ARCH >= 200 + Ok >::GenerateMulti(multi_kernels, 1); +#endif + + // Single-block kernels + Ok >::GenerateSingle(single_kernels); + } + + + /// Enumerate load modifier variations + template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int VECTOR_LOAD_LENGTH, + BlockReduceAlgorithm BLOCK_ALGORITHM> + void Enumerate() + { + Enumerate(); +#if TUNE_ARCH >= 350 + Enumerate(); +#endif + } + + + /// Enumerate block algorithms + template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD, + int VECTOR_LOAD_LENGTH> + void Enumerate() + { + Enumerate(); + Enumerate(); + } + + + /// Enumerate vectorization variations + template < + int BLOCK_THREADS, + int ITEMS_PER_THREAD> + void Enumerate() + { + Enumerate(); + Enumerate(); + Enumerate(); + } + + + /// Enumerate thread-granularity variations + template + void Enumerate() + { + Enumerate(); + Enumerate(); + Enumerate(); + + Enumerate(); + Enumerate(); + Enumerate(); + + Enumerate(); + Enumerate(); + Enumerate(); + + Enumerate(); + Enumerate(); + Enumerate(); + + Enumerate(); + Enumerate(); + Enumerate(); + } + + + /// Enumerate block size variations + void Enumerate() + { + printf("\nEnumerating kernels\n"); fflush(stdout); + + Enumerate<32>(); + Enumerate<64>(); + Enumerate<96>(); + Enumerate<128>(); + Enumerate<160>(); + Enumerate<192>(); + Enumerate<256>(); + Enumerate<512>(); + } + + + //--------------------------------------------------------------------- + // Test methods + //--------------------------------------------------------------------- + + /** + * Test a configuration + */ + void TestConfiguration( + MultiDispatchTuple &multi_dispatch, + SingleDispatchTuple &single_dispatch, + T* d_in, + T* d_out, + T* h_reference, + OffsetT num_items, + ReductionOp reduction_op) + { + // Clear output + if (g_verify) CubDebugExit(cudaMemset(d_out, 0, sizeof(T))); + + // Allocate temporary storage + void *d_temp_storage = NULL; + size_t temp_storage_bytes = 0; + CubDebugExit(DeviceReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + multi_dispatch.kernel_ptr, + single_dispatch.kernel_ptr, + FillAndResetDrainKernel, + multi_dispatch.params, + single_dispatch.params, + d_in, + d_out, + num_items, + reduction_op)); + CubDebugExit(g_allocator.DeviceAllocate(&d_temp_storage, temp_storage_bytes)); + + // Warmup/correctness iteration + CubDebugExit(DeviceReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + multi_dispatch.kernel_ptr, + single_dispatch.kernel_ptr, + FillAndResetDrainKernel, + multi_dispatch.params, + single_dispatch.params, + d_in, + d_out, + num_items, + reduction_op)); + + if (g_verify) CubDebugExit(cudaDeviceSynchronize()); + + // Copy out and display results + int compare = (g_verify) ? + CompareDeviceResults(h_reference, d_out, 1, true, false) : + 0; + + // Performance + GpuTimer gpu_timer; + float elapsed_millis = 0.0; + for (int i = 0; i < g_timing_iterations; i++) + { + gpu_timer.Start(); + + CubDebugExit(DeviceReduce::Dispatch( + d_temp_storage, + temp_storage_bytes, + multi_dispatch.kernel_ptr, + single_dispatch.kernel_ptr, + FillAndResetDrainKernel, + multi_dispatch.params, + single_dispatch.params, + d_in, + d_out, + num_items, + reduction_op)); + + gpu_timer.Stop(); + elapsed_millis += gpu_timer.ElapsedMillis(); + } + + // Mooch + CubDebugExit(cudaDeviceSynchronize()); + + float avg_elapsed = elapsed_millis / g_timing_iterations; + float avg_throughput = float(num_items) / avg_elapsed / 1000.0 / 1000.0; + float avg_bandwidth = avg_throughput * sizeof(T); + + multi_dispatch.avg_throughput = CUB_MAX(avg_throughput, multi_dispatch.avg_throughput); + if (avg_throughput > multi_dispatch.best_avg_throughput) + { + multi_dispatch.best_avg_throughput = avg_throughput; + multi_dispatch.best_size = num_items; + } + + single_dispatch.avg_throughput = CUB_MAX(avg_throughput, single_dispatch.avg_throughput); + if (avg_throughput > single_dispatch.best_avg_throughput) + { + single_dispatch.best_avg_throughput = avg_throughput; + single_dispatch.best_size = num_items; + } + + if (g_verbose) + { + printf("\t%.2f GB/s, multi_dispatch( ", avg_bandwidth); + multi_dispatch.params.Print(); + printf(" ), single_dispatch( "); + single_dispatch.params.Print(); + printf(" )\n"); + fflush(stdout); + } + + AssertEquals(0, compare); + + // Cleanup temporaries + if (d_temp_storage) CubDebugExit(g_allocator.DeviceFree(d_temp_storage)); + } + + + /** + * Evaluate multi-block configurations + */ + void TestMulti( + T* h_in, + T* d_in, + T* d_out, + ReductionOp reduction_op) + { + // Simple single kernel tuple for use with multi kernel sweep + typedef typename DeviceReduce::TunedPolicies::SinglePolicy SimpleSinglePolicy; + SingleDispatchTuple simple_single_tuple; + simple_single_tuple.params.template Init(); + simple_single_tuple.kernel_ptr = ReduceSingleKernel; + + double max_exponent = log2(double(g_max_items)); + double min_exponent = log2(double(simple_single_tuple.params.tile_size)); + unsigned int max_int = (unsigned int) -1; + + for (int sample = 0; sample < g_samples; ++sample) + { + printf("\nMulti-block sample %d, ", sample); + + int num_items; + if (sample == 0) + { + // First sample: use max items + num_items = g_max_items; + printf("num_items: %d", num_items); fflush(stdout); + } + else + { + // Sample a problem size from [2^g_min_exponent, g_max_items]. First 2/3 of the samples are log-distributed, the other 1/3 are uniformly-distributed. + unsigned int bits; + RandomBits(bits); + double scale = double(bits) / max_int; + + if (sample < g_samples / 2) + { + // log bias + double exponent = ((max_exponent - min_exponent) * scale) + min_exponent; + num_items = pow(2.0, exponent); + num_items = CUB_MIN(num_items, g_max_items); + printf("num_items: %d (2^%.2f)", num_items, exponent); fflush(stdout); + } + else + { + // uniform bias + num_items = CUB_MAX(pow(2.0, min_exponent), scale * g_max_items); + num_items = CUB_MIN(num_items, g_max_items); + printf("num_items: %d (%.2f * %d)", num_items, scale, g_max_items); fflush(stdout); + } + } + if (g_verbose) + printf("\n"); + else + printf(", "); + + // Compute reference + T h_reference = Reduce(h_in, reduction_op, num_items); + + // Run test on each multi-kernel configuration + float best_avg_throughput = 0.0; + for (int j = 0; j < multi_kernels.size(); ++j) + { + multi_kernels[j].avg_throughput = 0.0; + + TestConfiguration(multi_kernels[j], simple_single_tuple, d_in, d_out, &h_reference, num_items, reduction_op); + + best_avg_throughput = CUB_MAX(best_avg_throughput, multi_kernels[j].avg_throughput); + } + + // Print best throughput for this problem size + printf("Best: %.2fe9 items/s (%.2f GB/s)\n", best_avg_throughput, best_avg_throughput * sizeof(T)); + + // Accumulate speedup (inverse for harmonic mean) + for (int j = 0; j < multi_kernels.size(); ++j) + multi_kernels[j].hmean_speedup += best_avg_throughput / multi_kernels[j].avg_throughput; + } + + // Find max overall throughput and compute hmean speedups + float overall_max_throughput = 0.0; + for (int j = 0; j < multi_kernels.size(); ++j) + { + overall_max_throughput = CUB_MAX(overall_max_throughput, multi_kernels[j].best_avg_throughput); + multi_kernels[j].hmean_speedup = float(g_samples) / multi_kernels[j].hmean_speedup; + } + + // Sort by cumulative speedup + sort(multi_kernels.begin(), multi_kernels.end(), MinSpeedup); + + // Print ranked multi configurations + printf("\nRanked multi_kernels:\n"); + for (int j = 0; j < multi_kernels.size(); ++j) + { + printf("\t (%d) params( ", multi_kernels.size() - j); + multi_kernels[j].params.Print(); + printf(" ) hmean speedup: %.3f, best throughput %.2f @ %d elements (%.2f GB/s, %.2f%%)\n", + multi_kernels[j].hmean_speedup, + multi_kernels[j].best_avg_throughput, + (int) multi_kernels[j].best_size, + multi_kernels[j].best_avg_throughput * sizeof(T), + multi_kernels[j].best_avg_throughput / overall_max_throughput); + } + + printf("\nMax multi-block throughput %.2f (%.2f GB/s)\n", overall_max_throughput, overall_max_throughput * sizeof(T)); + } + + + /** + * Evaluate single-block configurations + */ + void TestSingle( + T* h_in, + T* d_in, + T* d_out, + ReductionOp reduction_op) + { + // Construct a NULL-ptr multi-kernel tuple that forces a single-kernel pass + MultiDispatchTuple multi_tuple; + + double max_exponent = log2(double(g_max_items)); + unsigned int max_int = (unsigned int) -1; + + for (int sample = 0; sample < g_samples; ++sample) + { + printf("\nSingle-block sample %d, ", sample); + + int num_items; + if (sample == 0) + { + // First sample: use max items + num_items = g_max_items; + printf("num_items: %d", num_items); fflush(stdout); + } + else + { + // Sample a problem size from [2, g_max_items], log-distributed + unsigned int bits; + RandomBits(bits); + double scale = double(bits) / max_int; + double exponent = ((max_exponent - 1) * scale) + 1; + num_items = pow(2.0, exponent); + printf("num_items: %d (2^%.2f)", num_items, exponent); fflush(stdout); + } + + if (g_verbose) + printf("\n"); + else + printf(", "); + + // Compute reference + T h_reference = Reduce(h_in, reduction_op, num_items); + + // Run test on each single-kernel configuration (pick first multi-config to use, which shouldn't be + float best_avg_throughput = 0.0; + for (int j = 0; j < single_kernels.size(); ++j) + { + single_kernels[j].avg_throughput = 0.0; + + TestConfiguration(multi_tuple, single_kernels[j], d_in, d_out, &h_reference, num_items, reduction_op); + + best_avg_throughput = CUB_MAX(best_avg_throughput, single_kernels[j].avg_throughput); + } + + // Print best throughput for this problem size + printf("Best: %.2fe9 items/s (%.2f GB/s)\n", best_avg_throughput, best_avg_throughput * sizeof(T)); + + // Accumulate speedup (inverse for harmonic mean) + for (int j = 0; j < single_kernels.size(); ++j) + single_kernels[j].hmean_speedup += best_avg_throughput / single_kernels[j].avg_throughput; + } + + // Find max overall throughput and compute hmean speedups + float overall_max_throughput = 0.0; + for (int j = 0; j < single_kernels.size(); ++j) + { + overall_max_throughput = CUB_MAX(overall_max_throughput, single_kernels[j].best_avg_throughput); + single_kernels[j].hmean_speedup = float(g_samples) / single_kernels[j].hmean_speedup; + } + + // Sort by cumulative speedup + sort(single_kernels.begin(), single_kernels.end(), MinSpeedup); + + // Print ranked single configurations + printf("\nRanked single_kernels:\n"); + for (int j = 0; j < single_kernels.size(); ++j) + { + printf("\t (%d) params( ", single_kernels.size() - j); + single_kernels[j].params.Print(); + printf(" ) hmean speedup: %.3f, best throughput %.2f @ %d elements (%.2f GB/s, %.2f%%)\n", + single_kernels[j].hmean_speedup, + single_kernels[j].best_avg_throughput, + (int) single_kernels[j].best_size, + single_kernels[j].best_avg_throughput * sizeof(T), + single_kernels[j].best_avg_throughput / overall_max_throughput); + } + + printf("\nMax single-block throughput %.2f (%.2f GB/s)\n", overall_max_throughput, overall_max_throughput * sizeof(T)); + } + +}; + + + +//--------------------------------------------------------------------- +// Main +//--------------------------------------------------------------------- + +/** + * Main + */ +int main(int argc, char** argv) +{ + // Initialize command line + CommandLineArgs args(argc, argv); + args.GetCmdLineArgument("n", g_max_items); + args.GetCmdLineArgument("s", g_samples); + args.GetCmdLineArgument("i", g_timing_iterations); + g_verbose = args.CheckCmdLineFlag("v"); + g_single = args.CheckCmdLineFlag("single"); + g_verify = !args.CheckCmdLineFlag("noverify"); + + // Print usage + if (args.CheckCmdLineFlag("help")) + { + printf("%s " + "[--device=] " + "[--n=]" + "[--s=]" + "[--i=]" + "[--single]" + "[--v]" + "[--noverify]" + "\n", argv[0]); + exit(0); + } + + // Initialize device + CubDebugExit(args.DeviceInit()); + +#if (TUNE_SIZE == 1) + typedef unsigned char T; +#elif (TUNE_SIZE == 2) + typedef unsigned short T; +#elif (TUNE_SIZE == 4) + typedef unsigned int T; +#elif (TUNE_SIZE == 8) + typedef unsigned long long T; +#else + // Default + typedef unsigned int T; +#endif + + typedef unsigned int OffsetT; + Sum reduction_op; + + // Enumerate kernels + Schmoo schmoo; + schmoo.Enumerate(); + + // Allocate host arrays + T *h_in = new T[g_max_items]; + + // Initialize problem + Initialize(UNIFORM, h_in, g_max_items); + + // Initialize device arrays + T *d_in = NULL; + T *d_out = NULL; + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_in, sizeof(T) * g_max_items)); + CubDebugExit(g_allocator.DeviceAllocate((void**)&d_out, sizeof(T) * 1)); + CubDebugExit(cudaMemcpy(d_in, h_in, sizeof(T) * g_max_items, cudaMemcpyHostToDevice)); + + // Test kernels + if (g_single) + schmoo.TestSingle(h_in, d_in, d_out, reduction_op); + else + schmoo.TestMulti(h_in, d_in, d_out, reduction_op); + + // Cleanup + if (h_in) delete[] h_in; + if (d_in) CubDebugExit(g_allocator.DeviceFree(d_in)); + if (d_out) CubDebugExit(g_allocator.DeviceFree(d_out)); + + return 0; +} + + + diff --git a/hash-graph-dehornetify/externals/rmm/librmm.so b/hash-graph-dehornetify/externals/rmm/librmm.so new file mode 100644 index 0000000..0e92521 Binary files /dev/null and b/hash-graph-dehornetify/externals/rmm/librmm.so differ diff --git a/hash-graph-dehornetify/include/MultiHashGraph.cuh b/hash-graph-dehornetify/include/MultiHashGraph.cuh new file mode 100644 index 0000000..a7c6758 --- /dev/null +++ b/hash-graph-dehornetify/include/MultiHashGraph.cuh @@ -0,0 +1,293 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include //--profile-from-start off + +// #include +// #include +// #include +// #include +// #include +// #include + +#include +#include +#include +#include + +#include + +// #include "rmm.h" +// #include "rmm.hpp" +// #include "rmm/rmm_api.h" +// #include "rmm/detail/memory_manager.hpp" + + +// #include + +#include + +// #include "rmm/rmm.h" +// #include "rmm.h" + +#include + +// using namespace mgpu; + +#define CHECK_ERROR(str) \ + {cudaDeviceSynchronize(); cudaError_t err; err = cudaGetLastError(); if(err!=0) {printf("ERROR %s: %d %s\n", str, err, cudaGetErrorString(err)); fflush(stdout); exit(0);}} + +// #define CUDA_PROFILE +// #define HOST_PROFILE + +//#define INDEX_TRACK +//#define MANAGED_MEM +// #define B32 + +#ifdef B32 +using hkey_t = uint32_t; +using index_t = int64_t; +using HashKey = uint32_t; +#else +using hkey_t = int64_t; +using index_t = int64_t; +using HashKey = int64_t; +#endif + + +struct keyval_key +{ + hkey_t key; +}; + +struct keyval_ind +{ + hkey_t key; + // uint64_t gpuId; + index_t ind; +}; + +struct keypair +{ + // index_t left; + index_t right; +}; + +struct inputData +{ + hkey_t *d_keys; + HashKey *d_hash; + index_t len; +}; + +#ifdef INDEX_TRACK +typedef keyval_ind keyval; +#else +typedef keyval_key keyval; +#endif + +inline bool operator==(const keyval &kv1, const keyval &kv2) { + // return kv1.key == kv2.key && kv1.ind == kv2.ind; + return kv1.key == kv2.key; +} + +// Overload Modern GPU memory allocation and free to use RMM +// class rmm_mgpu_context_t : public mgpu::standard_context_t +// { +// public: +// rmm_mgpu_context_t(bool print_prop = true, cudaStream_t stream_ = 0) : +// mgpu::standard_context_t(print_prop, stream_) {} +// ~rmm_mgpu_context_t() {} +// +// virtual void* alloc(size_t size, memory_space_t space) { +// void *p = nullptr; +// if(size) { +// if (memory_space_device == space) { +// if (RMM_SUCCESS != RMM_ALLOC(&p, size, stream())) +// throw cuda_exception_t(cudaPeekAtLastError()); +// } +// else { +// cudaError_t result = cudaMallocHost(&p, size); +// if (cudaSuccess != result) throw cuda_exception_t(result); +// } +// } +// return p; +// } +// +// virtual void free(void* p, memory_space_t space) { +// if (p) { +// if (memory_space_device == space) { +// if (RMM_SUCCESS != RMM_FREE(p, stream())) +// throw cuda_exception_t(cudaPeekAtLastError()); +// } +// else { +// cudaError_t result = cudaFreeHost(&p); +// if (cudaSuccess != result) throw cuda_exception_t(result); +// } +// } +// } +// }; + +class MultiHashGraph { +public: + MultiHashGraph(inputData *h_dVals, index_t countSize, index_t maxkey, + // context_t &context, index_t tableSize, + index_t tableSize, + index_t binCount, index_t lrbBins, index_t gpuCount); ~MultiHashGraph(); + + void build(bool buildSplits, index_t tid); + void buildSingle(); + + static void intersect(MultiHashGraph &mgA, MultiHashGraph &mgB, index_t *h_Common, + keypair **h_dOutput, index_t tid); + + void destroyMulti(); + + char **h_dFinalKeys; + index_t *h_hashOff; + index_t *h_counterOff; + index_t *h_offsetOff; + index_t *h_edgesOff; + index_t *h_lrbOff; + + // Structures for allocating bins to GPUs. + // Public so that another HG can use the same splits. + index_t *h_binSplits; + index_t **h_dBinSplits; + + // int64_t *d_Common; + // int64_t *d_GlobalCounter; + index_t **h_dCommon; + index_t **h_dGlobalCounter; + + index_t countSize; + // int64_t tableSize; + index_t tableSize; + index_t gpuCount; + + // Public for correctness check + hkey_t *h_vals; + + char **h_dCountCommon; + char *uvmPtr; + char *uvmPtrIntersect; + index_t *prefixArray; + index_t *prefixArrayIntersect; + index_t totalSize; + index_t totalSizeIntersect; + + size_t **h_dExSumTemp; + size_t exSumTempBytes; + + +private: + + // mem_t d_vals; + // hkey_t **h_dVals; + inputData *h_dVals; + hkey_t *d_vals; + // mem_t d_hash; + // mem_t d_counter; + // mem_t d_offset; + // mem_t d_edges; + + + // Structures for initial binning + index_t *h_binSizes; + // index_t *d_binSizes; + index_t **h_dBinSizes; + index_t **h_hBinSizes; + + index_t *h_psBinSizes; + index_t *d_psBinSizes; + + // Allocating physical bins for binning keys + hkey_t **h_keyBins; + index_t *d_binCounter; + index_t *h_binCounter; + + // Structures for keeping keys on each GPU. + hkey_t **h_dKeys; + + // Structures for storing and binning hashes on each GPU. + HashKey **h_dHashes; + index_t **h_dBinCounter; + index_t **h_hBinCounter; + + // Structures for prefix summing hash bins across GPUs (on host). + index_t *h_hashBinSize; + index_t *h_psHashBinSize; + + // Structure for allocating hash bins for each GPU. + index_t *h_hashSplits; + + // Structure for sending hash bin allocations to each GPU. + // Keeps track of which bins go to which GPU. + index_t **h_dHashSplits; + + // Structures for counting the key/hash buffer sizes on each GPU. + index_t **h_dBufferCounter; + + index_t **h_bufferCounter; + + // Used for initial key binning + // hkey_t **h_dKeyBinBuff; + keyval **h_dKeyBinBuff; + HashKey **h_dHashBinBuff; + index_t **h_dKeyBinOff; + index_t **h_hKeyBinOff; + + // Actual key/hash buffers per GPU on each GPU for hash values. + hkey_t **h_dKeyBuff; + + HashKey **h_dHashBuff; + index_t **h_dOffset; + index_t **h_hOffset; + + // size_t *h_keyPitches; + // size_t *h_hashPitches; + + // Final, consolidated list of key/hashes on each GPU. + // hkey_t **h_dFinalKeys; + HashKey **h_dFinalHash; + index_t **h_dFinalCounter; + index_t **h_hFinalCounter; + + index_t **h_dFinalOffset; + index_t **h_hFinalOffset; + + // HashGraph construction structures. + index_t **h_dOffsets; + index_t **h_dCounter; + // keyval **h_dEdges; + hkey_t **h_dEdges; + + // LRB constructs + index_t **h_dLrbCounter; + index_t **h_dLrbCountersPrefix; + + index_t maxkey; + index_t binCount; + index_t lrbBins; + + bool multiDestroyed = false; +}; diff --git a/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh b/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh new file mode 100644 index 0000000..f41f9b3 --- /dev/null +++ b/hash-graph-dehornetify/include/MultiHashGraphDeviceOperators.cuh @@ -0,0 +1,593 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +// #define ID_HASH + +__forceinline__ __host__ __device__ uint32_t rotl32( uint32_t x, int8_t r ) { + return (x << r) | (x >> (32 - r)); +} +__forceinline__ __host__ __device__ uint32_t fmix32( uint32_t h ) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} +__forceinline__ __host__ __device__ uint32_t hash_murmur(const HashKey& key) { + +#ifdef ID_HASH + return (uint32_t) key; +#endif + + constexpr int len = sizeof(int); + const uint8_t * const data = (const uint8_t*)&key; + constexpr int nblocks = len / 4; + uint32_t h1 = 0; + constexpr uint32_t c1 = 0xcc9e2d51; + constexpr uint32_t c2 = 0x1b873593; + //---------- + // body + const uint32_t * const blocks = (const uint32_t *)(data + nblocks*4); + for(int i = -nblocks; i; i++) + { + uint32_t k1 = blocks[i];//getblock32(blocks,i); + k1 *= c1; + k1 = rotl32(k1,15); + k1 *= c2; + h1 ^= k1; + h1 = rotl32(h1,13); + h1 = h1*5+0xe6546b64; + } + //---------- + // tail + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + uint32_t k1 = 0; + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; + }; + //---------- + // finalization + h1 ^= len; + h1 = fmix32(h1); + return h1; +} + +// __global__ void basicHashD(uint64_t valCount, hkey_t *valsArr, HashKey *hashArr, int64_t tableSize) { +__global__ void basicHashD(index_t valCount, hkey_t *valsArr, HashKey *hashArr, index_t tableSize) { + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + + for (auto i = id; i < valCount; i += stride) { + hashArr[i] = (HashKey)(hash_murmur(valsArr[i]) % tableSize); + } +} + +// __global__ void hashValuesD(uint64_t valCount, keyval *valsArr, keyval *hashArr, int64_t tableSize) { +// __global__ void hashValuesD(uint64_t valCount, keyval *valsArr, HashKey *hashArr, int64_t tableSize, +__global__ void hashValuesD(index_t valCount, keyval *valsArr, HashKey *hashArr, index_t tableSize, + index_t devNum) { + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + + for (auto i = id; i < valCount; i += stride) { + // hashArr[i].key = (HashKey)(hash_murmur(valsArr[i].key) % tableSize); + hashArr[i] = (HashKey)(hash_murmur(valsArr[i].key) % tableSize); + } +} + +// __global__ void countHashD(uint64_t valCount, HashKey *hashArr, index_t *countArr) { +__global__ void countHashD(index_t valCount, HashKey *hashArr, index_t *countArr) { + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + atomicAdd((unsigned long long int*)(countArr + hashArr[i]), 1); + // atomicAdd((countArr + hashArr[i]), 1); + } +} + +__global__ void countHashD32(index_t valCount, HashKey *hashArr, int32_t *countArr) { + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + atomicAdd(countArr + hashArr[i], 1); + } +} + +// __global__ void copyToGraphD(uint64_t valCount, hkey_t *valsArr, HashKey *hashArr, index_t *countArr, +__global__ void copyToGraphD(index_t valCount, hkey_t *valsArr, HashKey *hashArr, index_t *countArr, + index_t *offsetArr, keyval *edges, HashKey tableSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey hashVal=hashArr[i]; + int pos = atomicAdd((unsigned long long int*)(countArr + hashVal),1)+offsetArr[hashVal]; + // int pos = atomicAdd((countArr + hashVal),1)+offsetArr[hashVal]; +#ifdef INDEX_TRACK + edges[pos]={valsArr[i],i}; +#else + edges[pos] = { valsArr[i] }; +#endif + } +} + +__global__ void copyToGraphD32(index_t valCount, hkey_t *valsArr, HashKey *hashArr, int32_t *countArr, + index_t *offsetArr, keyval *edges, index_t tableSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey hashVal=hashArr[i]; + int pos = atomicAdd(countArr + hashVal,1)+offsetArr[hashVal]; + // edges[pos] = valsArr[i]; + // edges[pos]={valsArr[i],i}; +#ifdef INDEX_TRACK + edges[pos]={valsArr[i],i}; +#else + edges[pos] = { valsArr[i] }; +#endif + // edges[pos]={valsArr[i], 0, i}; + } +} + +__global__ void countBinSizes(HashKey *d_vals, index_t size, index_t *d_binSizes, + index_t binRange) { + + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + + for (index_t i = id; i < size; i += stride) { + // index_t bin = d_vals[i] / binRange; + index_t bin = d_vals[i] / binRange; + atomicAdd((unsigned long long int*)(&d_binSizes[bin]), 1); + } +} + +__global__ void fillSequence(hkey_t *d_vals, int64_t size) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < size; i += stride) { + d_vals[i] = i; + } +} + +__global__ void countHashBinSizes(HashKey *d_vals, index_t size, index_t *d_binSizes, + index_t binRange) { + + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + for (auto i = id; i < size; i += stride) { + index_t bin = d_vals[i] / binRange; + atomicAdd((unsigned long long int*)(&d_binSizes[bin]), 1); + } +} + +__global__ void countBufferSizes(index_t *hashSplits, index_t size, index_t *bufferCounter, + index_t gpuCount, HashKey *hashVals) { + + int64_t id = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = blockDim.x * gridDim.x; + for (auto i = id; i < size; i += stride) { + HashKey hash = hashVals[i]; + // TODO: This might make things slow. + for (index_t j = 0; j < gpuCount; j++) { + if (hashSplits[j] <= hash && hash < hashSplits[j + 1]) { + atomicAdd((unsigned long long int*)(&bufferCounter[j]), 1); + break; + } + } + } +} + +// __global__ void countKeyBuffSizes(HashKey *hashVals, index_t size, index_t *counter, +// index_t *splits, index_t gpuCount) { +// +// int64_t id = blockIdx.x * blockDim.x + threadIdx.x; +// int64_t stride = blockDim.x * gridDim.x; +// for (auto i = id; i < size; i += stride) { +// HashKey hash = hashVals[i]; +// // TODO: This might make things slow. +// for (index_t j = 0; j < gpuCount; j++) { +// if (hash < splits[j + 1]) { +// atomicAdd((unsigned long long int*)(&counter[j]), 1); +// break; +// } +// } +// } +// } + +__global__ void countKeyBuffSizes(HashKey *hashVals, index_t size_, index_t *counter, + index_t *splits, index_t gpuCount_) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + uint32_t gpuCount = gpuCount_; + int size=size_; + __shared__ index_t internalCounters[16]; + if(threadIdx.x= l) { + size_t mid = l + (r - l) / 2; + + // If the element is present at the middle itself + if (bins[mid] == x) + return mid; + + // If element is smaller than mid, then it can only be present in left subarray + if (bins[mid] > x) + return binarySearch(bins, l, mid - 1, x); + + // Else the element can only be present in right subarray + return binarySearch(bins, mid + 1, r, x); + } + + // We reach here when element is not present in array and return the bin id + // of the smallest value greater than x + return l; +} + +void countBinSizes(inputData *h_dVals, index_t **h_hBinSizes, index_t **h_dBinSizes, + index_t *h_binSizes, index_t *h_psBinSizes, index_t *h_binSplits, + index_t **h_dBinSplits, index_t countSize, index_t tableSize, + index_t binRange, index_t binCount, index_t gpuCount, index_t tid) { + + // Count bin sizes for the keys stored on each GPU. + // Bins are of hash values. + + countBinSizes<<>>(h_dVals[tid].d_hash, h_dVals[tid].len, + // countBinSizes<<>>(h_dVals[tid].d_keys + h_dVals[tid].len, h_dVals[tid].len, + h_dBinSizes[tid], binRange); + + // Consolidate bin sizes across GPUs. + cudaMemcpyAsync(h_hBinSizes[tid], h_dBinSizes[tid], binCount * sizeof(index_t), + cudaMemcpyDeviceToHost); + + #pragma omp barrier + + #pragma omp master + { + for (index_t i = 0; i < gpuCount; i++) { + for (index_t j = 0; j < binCount; j++) { + h_binSizes[j] += h_hBinSizes[i][j]; + } + } + + h_psBinSizes[0] = 0; + for (index_t i = 1; i < binCount; i++) { + h_psBinSizes[i] = h_psBinSizes[i - 1] + h_binSizes[i - 1]; + } + h_psBinSizes[binCount] = countSize; + } // master + + #pragma omp barrier + + +#ifdef DEBUG + #pragma omp barrier + #pragma omp master + { + std::cout << "h_binSizes:" << std::endl; + for (index_t i = 0; i < binCount; i++) { + if (h_binSizes[i] > 0) { + std::cout << "i: " << i << " " << h_binSizes[i] << " "; + } + } + std::cout << std::endl; + } // debug master + #pragma omp barrier +#endif + + // Find split points in prefix sum to determine what bins should go to each GPU. + // TODO: This can probably be parallelized (maybe on device?) + // might not be worth though, not a lot of work + index_t avgKeyCount = std::ceil(countSize / ((float)gpuCount)); + index_t upperVal = avgKeyCount * (tid + 1); + index_t upperIdx = binarySearch(h_psBinSizes, 0, binCount, upperVal); + + index_t minRange = upperIdx * binRange; + h_binSplits[tid + 1] = std::min(minRange, tableSize); + + #pragma omp barrier + + cudaMemcpyAsync(h_dBinSplits[tid], h_binSplits, (gpuCount + 1) * sizeof(index_t), + cudaMemcpyHostToDevice); + +#ifdef DEBUG + #pragma omp barrier + #pragma omp master + { + std::cout << "h_binSplits" << std::endl; + for (index_t i = 0; i < gpuCount + 1; i++) { + std::cout << h_binSplits[i] << " "; + } + std::cout << std::endl; + } // debug master + #pragma omp barrier +#endif + +} + +void countKeyBuffSizes(inputData *h_dVals, index_t **h_dBinSplits, + index_t **h_bufferCounter, index_t **h_dBufferCounter, + index_t gpuCount, index_t tid) { + + // Clear counters + cudaMemset(h_dBufferCounter[tid], 0, gpuCount * sizeof(index_t)); + + // TODO: replace this with partitionRelabel from cuSort. + countKeyBuffSizes<<>>(h_dVals[tid].d_hash, h_dVals[tid].len, + // countKeyBuffSizes<<>>(h_dVals[tid].d_keys + h_dVals[tid].len, h_dVals[tid].len, + h_dBufferCounter[tid], + h_dBinSplits[tid], + gpuCount); + + // cudaMemcpyAsync(h_bufferCounter[tid], h_dBufferCounter[tid], gpuCount * sizeof(index_t), + cudaMemcpy(h_bufferCounter[tid], h_dBufferCounter[tid], gpuCount * sizeof(index_t), + cudaMemcpyDeviceToHost); + + +#ifdef DEBUG + std::cout << "h_keyBins" << std::endl; + for (index_t i = 0; i < binCount; i++) { + std::cout << "bin" << i << ": "; + for (index_t j = 0; j < h_binCounter[i]; j++) { + std::cout << h_keyBins[i][j] << " "; + } + std::cout << std::endl; + } + std::cout << std::endl; + + delete[] h_binCounter; +#endif + +} + +// void populateKeyBuffs(inputData *h_dVals, hkey_t **h_dKeyBinBuff, +void populateKeyBuffs(inputData *h_dVals, keyval **h_dKeyBinBuff, + index_t **h_dKeyBinOff, + index_t **h_hKeyBinOff, index_t **h_dBufferCounter, + index_t **h_bufferCounter, index_t **h_dBinSplits, + size_t **h_dExSumTemp, size_t exSumTempBytes, + index_t gpuCount, index_t tid) { + + // Compute offset into original key array based on binning + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + cub::DeviceScan::ExclusiveSum(NULL, _temp_storage_bytes, h_dBufferCounter[tid], + h_dKeyBinOff[tid], gpuCount); + + if (_temp_storage_bytes > exSumTempBytes) { + std::cerr << "ERROR: NOT ENOUGH TEMP SPACE ALLOCATED" << std::endl; + } + + cub::DeviceScan::ExclusiveSum(h_dExSumTemp[tid], _temp_storage_bytes, h_dBufferCounter[tid], + h_dKeyBinOff[tid], gpuCount); + + // cudaMemcpyAsync(h_hKeyBinOff[tid], h_dKeyBinOff[tid], (gpuCount + 1) * sizeof(index_t), + cudaMemcpy(h_hKeyBinOff[tid], h_dKeyBinOff[tid], (gpuCount + 1) * sizeof(index_t), + cudaMemcpyDeviceToHost); + + // Reset counters to 0 to actually fill buffers. + cudaMemset(h_dBufferCounter[tid], 0, gpuCount * sizeof(index_t)); + + + // Buffer keys values according to which GPU they should be sent to based on hash range. + binHashValues<<>>(h_dVals[tid].len, h_dVals[tid].d_keys, + h_dVals[tid].d_hash, + // h_dVals[tid].d_keys + h_dVals[tid].len, + h_dKeyBinBuff[tid], + h_dKeyBinOff[tid], h_dBinSplits[tid], + h_dBufferCounter[tid], gpuCount, tid); + + // Copy counters and offsets to host + // cudaMemcpyAsync(h_bufferCounter[tid], h_dBufferCounter[tid], gpuCount * sizeof(index_t), + cudaMemcpy(h_bufferCounter[tid], h_dBufferCounter[tid], gpuCount * sizeof(index_t), + cudaMemcpyDeviceToHost); + +#ifdef DEBUG + #pragma omp barrier + #pragma omp master + { + std::cout << "buffSizes:" << "\n"; + for (index_t i = 0; i < gpuCount; i++) { + std::cout << "gpu: " << i << " count: " << h_bufferCounter[i] << "\n"; + } + std::cout << std::endl; + } + #pragma omp barrier +#endif +} + +// void countFinalKeys(index_t **h_bufferCounter, hkey_t **h_dFinalKeys, +#ifdef MANAGED_MEM +void countFinalKeys(index_t **h_bufferCounter, char **h_dFinalKeys, + index_t **h_hFinalCounters, + index_t **h_hFinalOffset, index_t **h_dFinalOffset, + index_t *h_binSplits, index_t gpuCount, index_t tid, + char *uvmPtr, index_t *prefixArray, index_t totalSize) { +#else +void countFinalKeys(index_t **h_bufferCounter, char **h_dFinalKeys, + index_t **h_hFinalCounters, + index_t **h_hFinalOffset, index_t **h_dFinalOffset, + index_t *h_binSplits, index_t gpuCount, index_t tid) { +#endif + + // h_hFinalCounters is the transpose of h_bufferCounter + // h_hFinalCounters[i][j] is the number of keys GPU i receives from GPU j. + #pragma omp barrier + for (index_t j = 0; j < gpuCount; j++) { + h_hFinalCounters[tid][j] = h_bufferCounter[j][tid]; + } + + // Prefix sum over all final counters. + h_hFinalOffset[tid][0] = 0; + for (index_t j = 1; j < gpuCount + 1; j++) { + h_hFinalOffset[tid][j] = h_hFinalOffset[tid][j - 1] + h_hFinalCounters[tid][j - 1]; + } + + // cudaMemcpyAsync(h_dFinalOffset[tid], h_hFinalOffset[tid], (gpuCount + 1) * sizeof(index_t), + // cudaMemcpyHostToDevice); + + index_t keyCount = h_hFinalOffset[tid][gpuCount]; + index_t hashRange = h_binSplits[tid + 1] - h_binSplits[tid]; + +#ifdef DEBUG + #pragma omp barrier + #pragma omp master + { + std::cout << "buffSizes:" << "\n"; + for (index_t i = 0; i < gpuCount; i++) { + std::cout << "gpu: " << i << " count: " << h_hFinalOffset[i][gpuCount] << "\n"; + } + std::cout << std::endl; + } + #pragma omp barrier +#endif + + // cudaMalloc(&h_dFinalKeys[tid], (4 * keyCount * sizeof(hkey_t)) + + // [ keys | hash | edges | lrbArray | offset | counter ] + // (len = keyCount, type = keyval) (len = hashRange + 1, type = index_t) + // except hash, hash is type HashKey + // cudaMalloc(&h_dFinalKeys[tid], (4 * keyCount * sizeof(keyval)) + + // (2 * (hashRange + 1) * sizeof(index_t))); + // RMM_ALLOC(&h_dFinalKeys[tid], keyCount * sizeof(keyval) + +#ifdef MANAGED_MEM + #pragma omp barrier + #pragma omp master + { + prefixArray[0] = 0; + for (index_t i = 1; i < gpuCount; i++) { + index_t tidKeyCount = h_hFinalOffset[i - 1][gpuCount]; + index_t tidHashRange = h_binSplits[i] - h_binSplits[i - 1]; + index_t size = tidKeyCount * sizeof(keyval) + + tidKeyCount * sizeof(HashKey) + + (2 * tidKeyCount * sizeof(keyval)) + + (2 * (tidHashRange + 1) * sizeof(index_t)); + + prefixArray[i] = prefixArray[i - 1] + size; + } + prefixArray[gpuCount] = totalSize; + + h_dFinalKeys[0] = uvmPtr; + for (index_t i = 1; i < gpuCount; i++) { + h_dFinalKeys[i] = uvmPtr + prefixArray[i]; + } + } + #pragma omp barrier + + cudaMemPrefetchAsync(h_dFinalKeys[tid], prefixArray[tid + 1] - prefixArray[tid], tid); + +#else + cudaMalloc(&h_dFinalKeys[tid], keyCount * sizeof(keyval) + + keyCount * sizeof(HashKey) + + (2 * keyCount * sizeof(keyval)) + + (2 * (hashRange + 1) * sizeof(index_t))); + index_t size = keyCount * sizeof(keyval) + + keyCount * sizeof(HashKey) + + (2 * keyCount * sizeof(keyval)) + + (2 * (hashRange + 1) * sizeof(index_t)); +#endif +} + +// void allToAll(inputData *h_dVals, hkey_t **h_dFinalKeys, +void allToAll(inputData *h_dVals, char **h_dFinalKeys, + // index_t **h_hFinalOffset, hkey_t **h_dKeyBinBuff, + index_t **h_hFinalOffset, keyval **h_dKeyBinBuff, + index_t **h_hKeyBinOff, index_t **h_hFinalCounters, index_t gpuCount, + index_t tid) { + + for (index_t j = 0; j < gpuCount; j++) { + // Ship keys + hashes from GPU i to GPU j + index_t keyCount = h_hFinalCounters[tid][j]; + // index_t keyCount = h_hFinalCounters[j][tid]; + // cudaMemcpyAsync(h_dFinalKeys[j] + h_hFinalOffset[j][tid], h_dKeyBinBuff[tid] + h_hKeyBinOff[tid][j], + // keyCount * sizeof(keyval), cudaMemcpyDeviceToDevice); + // cudaMemcpyAsync(h_dFinalKeys[j] + (h_hFinalOffset[j][tid] * sizeof(keyval)), + // h_dKeyBinBuff[tid] + h_hKeyBinOff[tid][j], + // keyCount * sizeof(keyval), cudaMemcpyDeviceToDevice); + cudaMemcpyAsync(h_dFinalKeys[tid] + (h_hFinalOffset[tid][j] * sizeof(keyval)), + h_dKeyBinBuff[j] + h_hKeyBinOff[j][tid], + keyCount * sizeof(keyval), cudaMemcpyDeviceToDevice); + + } +} + +template +void buildTable(hkey_t *d_vals, HashKey *d_hash, int32_t *d_counter, + index_t *d_offSet, keyval *d_edges, index_t valCount, + index_t tableSize, index_t valsOffset=0) { + + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + // hashValuesD<<>>(valCount, d_vals + valsOffset, + // d_hash.data(), (HashKey) tableSize); + basicHashD<<>>(valCount, d_vals + valsOffset, + d_hash, (HashKey) tableSize); + + countHashD32<<>>(valCount, d_hash, d_counter); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + // d_counter = fill(0, (size_t)tableSize, context); + cudaMemset(d_counter, 0, tableSize * sizeof(int32_t)); + cudaMemcpy(d_offSet + tableSize, &valCount, sizeof(index_t), cudaMemcpyHostToDevice); + cudaFree(_d_temp_storage); + + copyToGraphD32<<>>(valCount, d_vals + valsOffset, + d_hash, d_counter, d_offSet, + d_edges, tableSize); +} + diff --git a/hash-graph-dehornetify/include/SingleHashGraph.cuh b/hash-graph-dehornetify/include/SingleHashGraph.cuh new file mode 100644 index 0000000..68b4cab --- /dev/null +++ b/hash-graph-dehornetify/include/SingleHashGraph.cuh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include //--profile-from-start off + +// #include +// #include +// #include +// #include +// #include +// #include +// #include + +#include + +// #include "rmm/rmm.h" +// #include "rmm.h" + +#include + +using hkey_t = int64_t; +using index_t = int32_t; +using HashKey = uint32_t; + +// using namespace mgpu; + +struct keyval +{ + hkey_t key; + // index_t ind; +}; + +// Overload Modern GPU memory allocation and free to use RMM +// class rmm_mgpu_context_t : public mgpu::standard_context_t +// { +// public: +// rmm_mgpu_context_t(bool print_prop = true, cudaStream_t stream_ = 0) : +// mgpu::standard_context_t(print_prop, stream_) {} +// ~rmm_mgpu_context_t() {} +// +// virtual void* alloc(size_t size, memory_space_t space) { +// void *p = nullptr; +// if(size) { +// if (memory_space_device == space) { +// if (RMM_SUCCESS != RMM_ALLOC(&p, size, stream())) +// throw cuda_exception_t(cudaPeekAtLastError()); +// } +// else { +// cudaError_t result = cudaMallocHost(&p, size); +// if (cudaSuccess != result) throw cuda_exception_t(result); +// } +// } +// return p; +// } +// +// virtual void free(void* p, memory_space_t space) { +// if (p) { +// if (memory_space_device == space) { +// if (RMM_SUCCESS != RMM_FREE(p, stream())) +// throw cuda_exception_t(cudaPeekAtLastError()); +// } +// else { +// cudaError_t result = cudaFreeHost(&p); +// if (cudaSuccess != result) throw cuda_exception_t(result); +// } +// } +// } +// }; + +class SingleHashGraph { +public: + // SingleHashGraph(int64_t countSize, int64_t maxkey, context_t &context, int64_t tableSize); + SingleHashGraph(int64_t countSize, int64_t maxkey, int64_t tableSize, int64_t lrbBins); + ~SingleHashGraph(); + + // void build(int64_t countSize, context_t &context, int64_t tableSize); + void build(int64_t countSize, int64_t tableSize); + +private: + + // mem_t d_vals; + // mem_t d_hash; + // mem_t d_counter; + // mem_t d_offset; + // mem_t d_edges; + + hkey_t* d_vals; + HashKey* d_hash; + index_t* d_counter; + index_t* d_offset; + keyval* d_edges; + + int64_t lrbBins; + index_t *d_lrbCounter; + index_t *d_lrbCounterPrefix; + keyval *d_lrbArray; +}; diff --git a/hash-graph-dehornetify/include/SingleHashGraphOperators.cuh b/hash-graph-dehornetify/include/SingleHashGraphOperators.cuh new file mode 100644 index 0000000..77bf2a6 --- /dev/null +++ b/hash-graph-dehornetify/include/SingleHashGraphOperators.cuh @@ -0,0 +1,154 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +__forceinline__ __host__ __device__ uint32_t rotl32( uint32_t x, int8_t r ) { + return (x << r) | (x >> (32 - r)); +} + +__forceinline__ __host__ __device__ uint32_t fmix32( uint32_t h ) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + return h; +} + +__forceinline__ __host__ __device__ uint32_t hash_murmur(const HashKey& key) { + + constexpr int len = sizeof(int); + const uint8_t * const data = (const uint8_t*)&key; + constexpr int nblocks = len / 4; + uint32_t h1 = 0; + constexpr uint32_t c1 = 0xcc9e2d51; + constexpr uint32_t c2 = 0x1b873593; + //---------- + + // body + const uint32_t * const blocks = (const uint32_t *)(data + nblocks*4); + for(int i = -nblocks; i; i++) + { + uint32_t k1 = blocks[i];//getblock32(blocks,i); + k1 *= c1; + k1 = rotl32(k1,15); + k1 *= c2; + h1 ^= k1; + h1 = rotl32(h1,13); + h1 = h1*5+0xe6546b64; + } + //---------- + // tail + const uint8_t * tail = (const uint8_t*)(data + nblocks*4); + uint32_t k1 = 0; + switch(len & 3) + { + case 3: k1 ^= tail[2] << 16; + case 2: k1 ^= tail[1] << 8; + case 1: k1 ^= tail[0]; + k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; + }; + //---------- + // finalization + h1 ^= len; + h1 = fmix32(h1); + return h1; +} + +__global__ void hashValuesD(index_t valCount, hkey_t *valsArr, HashKey *hashArr, HashKey tableSize) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + // hashArr[i] = hash_murmur(valsArr[i]) % tableSize; + hashArr[i] = hash_murmur(valsArr[i]) % tableSize; + // hashArr[i] = (HashKey)(hash_murmur(valsArr[i].key) % tableSize); + } +} + +__global__ void countHashD(index_t valCount, HashKey *hashArr, index_t *countArr) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + atomicAdd(countArr + hashArr[i], 1); + } +} + +__global__ void copyToGraphD(index_t valCount, hkey_t *valsArr, HashKey *hashArr, index_t *countArr, + index_t *offsetArr, keyval *edges, index_t tableSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey hashVal=hashArr[i]; + int pos = atomicAdd(countArr + hashVal,1)+offsetArr[hashVal]; + // edges[pos]={valsArr[i],i}; + edges[pos]={valsArr[i]}; + } +} + +__global__ void lrbCountHashD(index_t valCount, HashKey *hashArr, int32_t *d_lrbCounters, + index_t lrbBinSize) { + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + + for (auto i = id; i < valCount; i += stride) { + index_t ha = (index_t)(hashArr[i]/lrbBinSize); + atomicAdd(d_lrbCounters + ha,1); + } +} + +__global__ void lrbRehashD(index_t valCount, hkey_t *valsArr, HashKey *hashArr, +// __global__ void lrbRehashD(index_t valCount, keyval *valsArr, HashKey *hashArr, + int32_t *d_lrbCounters, keyval *d_lrbHashReordered, + int32_t *d_lrbCountersPrefix, index_t lrbBinSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey ha = hashArr[i]/lrbBinSize; + // if(blockIdx.x==0) + // printf("%d ", ha); + index_t pos = atomicAdd(d_lrbCounters + ha,1)+ d_lrbCountersPrefix[ha]; + // d_lrbHashReordered[pos]={valsArr[i],i}; + d_lrbHashReordered[pos] = { valsArr[i] }; + } +} + +__global__ void lrbCountHashGlobalD(index_t valCount, int32_t *countArr, + keyval *d_lrbHashReordered, index_t tableSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey ha = hash_murmur(d_lrbHashReordered[i].key)%tableSize; + // HashKey ha = d_lrbHashReordered[i].key; + atomicAdd(countArr + ha,1); + } +} + +__global__ void lrbCopyToGraphD(index_t valCount, int32_t *countArr, index_t *offsetArr, + keyval *edges, keyval *d_lrbHashReordered, + index_t tableSize) { + + int id = blockIdx.x * blockDim.x + threadIdx.x; + int stride = blockDim.x * gridDim.x; + for (auto i = id; i < valCount; i += stride) { + HashKey hashVal = hash_murmur(d_lrbHashReordered[i].key)%tableSize; + // HashKey hashVal=d_lrbHashReordered[i].key; + int pos = atomicAdd(countArr + hashVal,1)+offsetArr[hashVal]; + edges[pos]=d_lrbHashReordered[i]; + } +} diff --git a/hash-graph-dehornetify/mem_old/memory.cpp b/hash-graph-dehornetify/mem_old/memory.cpp new file mode 100644 index 0000000..0213dc2 --- /dev/null +++ b/hash-graph-dehornetify/mem_old/memory.cpp @@ -0,0 +1,298 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * @brief Device Memory Manager implementation. + * + * Efficient allocation, deallocation and tracking of GPU memory. + * + */ + +#include "rmm.h" +// #include "rmm/rmm.h" +#include "memory_manager.h" +#include +#include +#include +#include + +// Set true to enable free/total memory logging at each RMM call (expensive) +#define RMM_USAGE_LOGGING false + +/** ---------------------------------------------------------------------------* + * @brief Macro wrapper to check for error in RMM API calls. + * ---------------------------------------------------------------------------**/ +#define RMM_CHECK(call) do { \ + rmmError_t error = (call); \ + if( error != RMM_SUCCESS ) return error; \ +} while(0) + +/** ---------------------------------------------------------------------------* + * @brief Macro wrapper for RMM API calls to return appropriate RMM errors. + * ---------------------------------------------------------------------------**/ +#define RMM_CHECK_CUDA(call) do { \ + cudaError_t cudaError = (call); \ + if( cudaError == cudaErrorMemoryAllocation ) \ + return RMM_ERROR_OUT_OF_MEMORY; \ + else if( cudaError != cudaSuccess ) \ + return RMM_ERROR_CUDA_ERROR; \ +} while(0) + +namespace rmm +{ + // RAII logger class + class LogIt + { + public: + LogIt(Logger::MemEvent_t event, + void* ptr, + size_t size, + cudaStream_t stream, + const char* filename, + unsigned int line, + bool usageLogging=RMM_USAGE_LOGGING) + : event(event), device(0), ptr(ptr), size(size), stream(stream), + usageLogging(usageLogging), line(line) + { + if (filename) file = filename; + if (Manager::getOptions().enable_logging) + { + cudaGetDevice(&device); + start = std::chrono::system_clock::now(); + } + } + + /// Sometimes you need to start logging before the pointer address is + /// known + inline void setPointer(void* p) { + if (Manager::getOptions().enable_logging) ptr = p; + } + + ~LogIt() + { + if (Manager::getOptions().enable_logging) + { + Logger::TimePt end = std::chrono::system_clock::now(); + size_t freeMem = 0, totalMem = 0; + if (usageLogging) rmmGetInfo(&freeMem, &totalMem, stream); + Manager::getLogger().record(event, device, ptr, start, end, + freeMem, totalMem, size, stream, + file, line); + } + } + + private: + rmm::Logger::MemEvent_t event; + int device; + void* ptr; + size_t size; + cudaStream_t stream; + rmm::Logger::TimePt start; + std::string file; + unsigned int line; + bool usageLogging; + }; + + inline bool usePoolAllocator() + { + return Manager::getOptions().allocation_mode == PoolAllocation; + } +}; + +#ifndef GETNAME +#define GETNAME(x) case x: return #x; +#endif + +// Stringify RMM error code. +const char * rmmGetErrorString(rmmError_t errcode) { + switch (errcode) { + // There must be one entry per enum values in gdf_error. + GETNAME(RMM_SUCCESS) + GETNAME(RMM_ERROR_CUDA_ERROR) + GETNAME(RMM_ERROR_INVALID_ARGUMENT) + GETNAME(RMM_ERROR_NOT_INITIALIZED) + GETNAME(RMM_ERROR_OUT_OF_MEMORY) + GETNAME(RMM_ERROR_UNKNOWN) + GETNAME(RMM_ERROR_IO) + default: + // This means we are missing an entry above for a rmmError_t value. + return "Internal error. Unknown error code."; + } +} + +// Initialize memory manager state and storage. +rmmError_t rmmInitialize(rmmOptions_t *options) +{ + if (0 != options) + { + rmm::Manager::setOptions(*options); + } + + if (rmm::usePoolAllocator()) + { + cnmemDevice_t dev; + RMM_CHECK_CUDA( cudaGetDevice(&(dev.device)) ); + // Note: cnmem defaults to half GPU memory + dev.size = rmm::Manager::getOptions().initial_pool_size; + dev.numStreams = 1; + cudaStream_t streams[1]; streams[0] = 0; + dev.streams = streams; + dev.streamSizes = 0; + RMM_CHECK_CNMEM( cnmemInit(1, &dev, 0) ); + } + return RMM_SUCCESS; +} + +// Shutdown memory manager. +rmmError_t rmmFinalize() +{ + if (rmm::usePoolAllocator()) + RMM_CHECK_CNMEM( cnmemFinalize() ); + + rmm::Manager::getInstance().finalize(); + + return RMM_SUCCESS; +} + +// Allocate memory and return a pointer to device memory. +rmmError_t rmmAlloc(void **ptr, size_t size, cudaStream_t stream, const char* file, unsigned int line) +{ + rmm::LogIt log(rmm::Logger::Alloc, 0, size, stream, file, line); + + if (!ptr && !size) { + return RMM_SUCCESS; + } + + if (!ptr) + return RMM_ERROR_INVALID_ARGUMENT; + + if (rmm::usePoolAllocator()) + { + RMM_CHECK( rmm::Manager::getInstance().registerStream(stream) ); + RMM_CHECK_CNMEM( cnmemMalloc(ptr, size, stream) ); + } + else + RMM_CHECK_CUDA(cudaMalloc(ptr, size)); + + + log.setPointer(*ptr); + return RMM_SUCCESS; +} + +// Reallocate device memory block to new size and recycle any remaining memory. +rmmError_t rmmRealloc(void **ptr, size_t new_size, cudaStream_t stream, const char* file, unsigned int line) +{ + rmm::LogIt log(rmm::Logger::Realloc, ptr, new_size, stream, file, line); + + if (!ptr && !new_size) { + return RMM_SUCCESS; + } + + if (!ptr) + return RMM_ERROR_INVALID_ARGUMENT; + + if (rmm::usePoolAllocator()) + { + RMM_CHECK( rmm::Manager::getInstance().registerStream(stream) ); + RMM_CHECK_CNMEM( cnmemFree(*ptr, stream) ); + RMM_CHECK_CNMEM( cnmemMalloc(ptr, new_size, stream) ); + } + else + { + RMM_CHECK_CUDA(cudaFree(*ptr)); + RMM_CHECK_CUDA(cudaMalloc(ptr, new_size)); + } + log.setPointer(*ptr); + return RMM_SUCCESS; +} + +// Release device memory and recycle the associated memory. +rmmError_t rmmFree(void *ptr, cudaStream_t stream, const char* file, unsigned int line) +{ + rmm::LogIt log(rmm::Logger::Free, ptr, 0, stream, file, line); + if (rmm::usePoolAllocator()) + RMM_CHECK_CNMEM( cnmemFree(ptr, stream) ); + else + RMM_CHECK_CUDA(cudaFree(ptr)); + return RMM_SUCCESS; +} + +// Get the offset of ptr from its base allocation +rmmError_t rmmGetAllocationOffset(ptrdiff_t *offset, + void *ptr, + cudaStream_t stream) +{ + void *base = (void*)0xffffffff; + CUresult res = cuMemGetAddressRange((CUdeviceptr*)&base, nullptr, + (CUdeviceptr)ptr); + if (res != CUDA_SUCCESS) + return RMM_ERROR_INVALID_ARGUMENT; + *offset = reinterpret_cast(ptr) - + reinterpret_cast(base); + return RMM_SUCCESS; +} + +// Get amounts of free and total memory managed by a manager associated +// with the stream. +rmmError_t rmmGetInfo(size_t *freeSize, size_t *totalSize, cudaStream_t stream) +{ + if (rmm::usePoolAllocator()) + { + RMM_CHECK( rmm::Manager::getInstance().registerStream(stream) ); + RMM_CHECK_CNMEM( cnmemMemGetInfo(freeSize, totalSize, stream) ); + } + else + RMM_CHECK_CUDA(cudaMemGetInfo(freeSize, totalSize)); + return RMM_SUCCESS; +} + +// Write the memory event stats log to specified path/filename +rmmError_t rmmWriteLog(const char* filename) +{ + try + { + std::ofstream csv; + csv.open(filename); + rmm::Manager::getLogger().to_csv(csv); + } + catch (const std::ofstream::failure& e) { + return RMM_ERROR_IO; + } + return RMM_SUCCESS; +} + +// Get the size opf the CSV log +size_t rmmLogSize() +{ + std::ostringstream csv; + rmm::Manager::getLogger().to_csv(csv); + return csv.str().size(); +} + +// Get the CSV log as a string +rmmError_t rmmGetLog(char *buffer, size_t buffer_size) +{ + try + { + std::ostringstream csv; + rmm::Manager::getLogger().to_csv(csv); + csv.str().copy(buffer, std::min(buffer_size, csv.str().size())); + } + catch (const std::ofstream::failure& e) { + return RMM_ERROR_IO; + } + return RMM_SUCCESS; +} diff --git a/hash-graph-dehornetify/mem_old/memory.h b/hash-graph-dehornetify/mem_old/memory.h new file mode 100644 index 0000000..a8cd9a9 --- /dev/null +++ b/hash-graph-dehornetify/mem_old/memory.h @@ -0,0 +1,193 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** ---------------------------------------------------------------------------* + * @brief Device Memory Manager public interface. + * + * Efficient allocation, deallocation and tracking of GPU memory. + * --------------------------------------------------------------------------**/ + +#pragma once + +typedef struct CUstream_st *cudaStream_t; +typedef long int offset_t; // would prefer ptrdiff_t but can't #include + // due to CFFI limitations + +/** ---------------------------------------------------------------------------* + * @brief RMM error codes + * --------------------------------------------------------------------------**/ +typedef enum +{ + RMM_SUCCESS = 0, //< Success result + RMM_ERROR_CUDA_ERROR, //< A CUDA error occurred + RMM_ERROR_INVALID_ARGUMENT, //< An invalid argument was passed (e.g.null pointer) + RMM_ERROR_NOT_INITIALIZED, //< RMM API called before rmmInitialize() + RMM_ERROR_OUT_OF_MEMORY, //< The memory manager was unable to allocate more memory + RMM_ERROR_UNKNOWN, //< An unknown error occurred + RMM_ERROR_IO, //< Stats output error + N_RMM_ERROR //< Count of error types +} rmmError_t; + +typedef enum +{ + CudaDefaultAllocation = 0, //< Use cudaMalloc for allocation + PoolAllocation, //< Use pool suballocation strategy +} rmmAllocationMode_t; + +typedef struct +{ + rmmAllocationMode_t allocation_mode; //< Allocation strategy to use + size_t initial_pool_size; //< When pool suballocation is enabled, + //< this is the initial pool size in bytes + bool enable_logging; //< Enable logging memory manager events +} rmmOptions_t; + +/** ---------------------------------------------------------------------------* + * @brief Initialize memory manager state and storage. + * + * @param[in] options Structure of options for the memory manager. Defaults are + * used if it is null. + * @return rmmError_t RMM_SUCCESS or RMM_ERROR_CUDA_ERROR on any CUDA error. + * --------------------------------------------------------------------------**/ +rmmError_t rmmInitialize(rmmOptions_t *options); + +/** ---------------------------------------------------------------------------* + * @brief Shutdown memory manager. + * + * @return rmmError_t RMM_SUCCESS, or RMM_NOT_INITIALIZED if rmmInitialize() has + * not been called, or RMM_ERROR_CUDA_ERROR on any CUDA error. + * ---------------------------------------------------------------------------**/ +rmmError_t rmmFinalize(); + +/** ---------------------------------------------------------------------------* + * @brief Stringify RMM error code. + * + * @param errcode The error returned by an RMM function + * @return const char* The input error code in string form + * --------------------------------------------------------------------------**/ +const char * rmmGetErrorString(rmmError_t errcode); + +/** ---------------------------------------------------------------------------* + * @brief Allocate memory and return a pointer to device memory. + * + * @param[out] ptr Returned pointer + * @param[in] size The size in bytes of the allocated memory region + * @param[in] stream The stream in which to synchronize this command + * @param[in] file The filename location of the call to this function, for tracking + * @param[in] line The line number of the call to this function, for tracking + * @return rmmError_t RMM_SUCCESS, or RMM_ERROR_NOT_INITIALIZED if rmmInitialize + * has not been called, RMM_ERROR_INVALID_ARGUMENT if ptr is + * null, RMM_ERROR_OUT_OF_MEMORY if unable to allocate the + * requested size, or RMM_CUDA_ERROR on any other CUDA error. + * --------------------------------------------------------------------------**/ +rmmError_t rmmAlloc(void **ptr, size_t size, cudaStream_t stream, + const char* file, unsigned int line); + +/** ---------------------------------------------------------------------------* + * @brief Reallocate device memory block to new size and recycle any remaining + * memory. + * + * @param[out] ptr Returned pointer + * @param[in] new_size The size in bytes of the allocated memory region + * @param[in] stream The stream in which to synchronize this command + * @param[in] file The filename location of the call to this function, for tracking + * @param[in] line The line number of the call to this function, for tracking + * @return rmmError_t RMM_SUCCESS, or RMM_ERROR_NOT_INITIALIZED if rmmInitialize + * has not been called, RMM_ERROR_INVALID_ARGUMENT if ptr is + * null, RMM_ERROR_OUT_OF_MEMORY if unable to allocate the + * requested size, or RMM_ERROR_CUDA_ERROR on any other CUDA + * error. + * --------------------------------------------------------------------------**/ +rmmError_t rmmRealloc(void **ptr, size_t new_size, cudaStream_t stream, + const char* file, unsigned int line); + +/** ---------------------------------------------------------------------------* + * @brief Release device memory and recycle the associated memory. + * + * @param[in] ptr The pointer to free + * @param[in] stream The stream in which to synchronize this command + * @param[in] file The filename location of the call to this function, for tracking + * @param[in] line The line number of the call to this function, for tracking + * @return rmmError_t RMM_SUCCESS, or RMM_ERROR_NOT_INITIALIZED if rmmInitialize + * has not been called,or RMM_ERROR_CUDA_ERROR on any CUDA + * error. + * --------------------------------------------------------------------------**/ +rmmError_t rmmFree(void *ptr, cudaStream_t stream, + const char* file, unsigned int line); + +/** ---------------------------------------------------------------------------* + * @brief Get the offset of ptr from its base allocation. + * + * This offset is the difference between the address stored in ptr and the + * base device memory allocation that it is a sub-allocation of within the pool. + * This is useful for, e.g. IPC, where cudaIpcOpenMemHandle() returns a pointer + * to the base * allocation, so the user needs the offset of the sub-allocation + * in order to correctly access its data. + * + * @param[out] offset The difference between ptr and the base allocation that ptr + * is sub-allocated from. + * @param[in] ptr The ptr to find the base allocation of. + * @param[in] stream The stream originally passed to rmmAlloc or rmmRealloc for + * ptr. + * @return rmmError_t RMM_SUCCESS if all goes well + * --------------------------------------------------------------------------**/ +rmmError_t rmmGetAllocationOffset(offset_t *offset, + void *ptr, + cudaStream_t stream); + +/** ---------------------------------------------------------------------------* + * @brief Get amounts of free and total memory managed by a manager associated + * with the stream. + * + * Returns in *free and *total, respectively, the free and total amount of + * memory available for allocation by the device in bytes. + * + * @param[out] freeSize The free memory in bytes available to the manager + * associated with stream + * @param[out] totalSize The total memory managed by the manager associated with + * stream + * @param[in] stream + * @return rmmError_t RMM_SUCCESS, or RMM_ERROR_NOT_INITIALIZED if rmmInitialize + * has not been called, or RMM_ERROR_CUDA_ERROR on any CUDA + * error + * --------------------------------------------------------------------------**/ +rmmError_t rmmGetInfo(size_t *freeSize, size_t *totalSize, cudaStream_t stream); + +/** ---------------------------------------------------------------------------* + * @brief Write the memory event stats log to specified path/filename + * + * Note: will overwrite the specified file. + * + * @param filename The full path and filename to write. + * @return rmmError_t RMM_SUCCESS or RMM_ERROR_IO on output failure. + * --------------------------------------------------------------------------**/ +rmmError_t rmmWriteLog(const char* filename); + +/** ---------------------------------------------------------------------------* + * @brief Get the size of the CSV log string in memory. + * + * @return size_t The size of the log (as a C string) in memory. + * --------------------------------------------------------------------------**/ +size_t rmmLogSize(); + +/** ---------------------------------------------------------------------------* + * @brief Get the RMM log as CSV in a C string. + * + * @param[out] buffer The buffer into which to store the CSV log string. + * @param[in] buffer_size The size allocated for buffer. + * @return rmmError_t RMM_SUCCESS, or RMM_IO_ERROR on any failure. + * --------------------------------------------------------------------------**/ +rmmError_t rmmGetLog(char* buffer, size_t buffer_size); \ No newline at end of file diff --git a/hash-graph-dehornetify/mem_old/memory_manager.cpp b/hash-graph-dehornetify/mem_old/memory_manager.cpp new file mode 100644 index 0000000..c5c08d5 --- /dev/null +++ b/hash-graph-dehornetify/mem_old/memory_manager.cpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "memory_manager.h" + +namespace rmm +{ + /** -----------------------------------------------------------------------* + * Record a memory manager event in the log. + * + * @param[in] event The type of event (Alloc, Realloc, or Free) + * @param[in] DeviceId The device to which this event applies. + * @param[in] ptr The device pointer being allocated or freed. + * @param[in] t The timestamp to record. + * @param[in] size The size of allocation (only needed for Alloc/Realloc). + * @param[in] stream The stream on which the allocation is happening + * (only needed for Alloc/Realloc). + * ----------------------------------------------------------------------**/ + void Logger::record(MemEvent_t event, int deviceId, void* ptr, + TimePt start, TimePt end, + size_t freeMem, size_t totalMem, + size_t size, cudaStream_t stream, + std::string filename, + unsigned int line) + + { + std::lock_guard guard(log_mutex); + if (Alloc == event) + current_allocations.insert(ptr); + else if (Free == event) + current_allocations.erase(ptr); + events.push_back({event, deviceId, ptr, size, stream, + freeMem, totalMem, current_allocations.size(), + start, end, filename, line}); + } + + /** -----------------------------------------------------------------------* + * @brief Output a comma-separated value string of the current log to the + * provided ostream + * + * @param[in] csv The output stream to put the CSV log string into. + * ----------------------------------------------------------------------**/ + void Logger::to_csv(std::ostream &csv) + { + csv << "Event Type,Device ID,Address,Stream,Size (bytes),Free Memory," + << "Total Memory,Current Allocs,Start,End,Elapsed,Location\n"; + + for (auto& e : events) + { + auto event_str = "Alloc"; + if (e.event == Realloc) event_str = "Realloc"; + if (e.event == Free) event_str = "Free"; + + std::chrono::duration elapsed = e.end-e.start; + + csv << event_str << "," << e.deviceId << "," << e.ptr << "," + << e.stream << "," << e.size << "," << e.freeMem << "," + << e.totalMem << "," << e.currentAllocations << "," + << std::chrono::duration(e.start-base_time).count() << "," + << std::chrono::duration(e.end-base_time).count() << "," + << elapsed.count() << "," << e.filename << ":" << e.line + << std::endl; + } + } + + void Logger::clear() + { + std::lock_guard guard(log_mutex); + events.clear(); + } +} \ No newline at end of file diff --git a/hash-graph-dehornetify/mem_old/memory_manager.h b/hash-graph-dehornetify/mem_old/memory_manager.h new file mode 100644 index 0000000..c8c419b --- /dev/null +++ b/hash-graph-dehornetify/mem_old/memory_manager.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2018, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** ---------------------------------------------------------------------------* + * @brief Memory Manager class + * + * Note: assumes at least C++11 + * ---------------------------------------------------------------------------**/ + +#ifndef MEMORY_MANAGER_H +#define MEMORY_MANAGER_H + +#include +#include +#include +#include +#include +#include +#include + +#include "memory.h" +#include "cnmem.h" + +/** ---------------------------------------------------------------------------* + * @brief Macro wrapper for CNMEM API calls to return appropriate RMM errors. + * ---------------------------------------------------------------------------**/ +#define RMM_CHECK_CNMEM(call) do { \ + cnmemStatus_t error = (call); \ + switch (error) { \ + case CNMEM_STATUS_SUCCESS: \ + break; /* don't return on success! */ \ + case CNMEM_STATUS_CUDA_ERROR: \ + return RMM_ERROR_CUDA_ERROR; \ + case CNMEM_STATUS_INVALID_ARGUMENT: \ + return RMM_ERROR_INVALID_ARGUMENT; \ + case CNMEM_STATUS_NOT_INITIALIZED: \ + return RMM_ERROR_NOT_INITIALIZED; \ + case CNMEM_STATUS_OUT_OF_MEMORY: \ + return RMM_ERROR_OUT_OF_MEMORY; \ + case CNMEM_STATUS_UNKNOWN_ERROR: \ + default: \ + return RMM_ERROR_UNKNOWN; \ + } \ +} while(0) + +typedef struct CUstream_st *cudaStream_t; + +namespace rmm +{ + class Logger + { + public: + Logger() { base_time = std::chrono::system_clock::now(); } + + typedef enum { + Alloc = 0, + Realloc, + Free + } MemEvent_t; + + using TimePt = std::chrono::system_clock::time_point; + + /// Record a memory manager event in the log. + void record(MemEvent_t event, int deviceId, void* ptr, + TimePt start, TimePt end, + size_t freeMem, size_t totalMem, + size_t size, cudaStream_t stream, + std::string filename, + unsigned int line); + + void clear(); + + /// Write the log to comma-separated value file + void to_csv(std::ostream &csv); + private: + std::set current_allocations; + + struct MemoryEvent { + MemEvent_t event; + int deviceId; + void* ptr; + size_t size; + cudaStream_t stream; + size_t freeMem; + size_t totalMem; + size_t currentAllocations; + TimePt start; + TimePt end; + std::string filename; + unsigned int line; + }; + + TimePt base_time; + std::vector events; + std::mutex log_mutex; + }; + + class Manager + { + public: + static Manager& getInstance(){ + // Myers' singleton. Thread safe and unique. Note: C++11 required. + static Manager instance; + return instance; + } + + static Logger& getLogger() { return getInstance().logger; } + + static void setOptions(const rmmOptions_t &options) { + getInstance().options = options; + } + static rmmOptions_t getOptions() { return getInstance().options; } + + void finalize() { + std::lock_guard guard(streams_mutex); + registered_streams.clear(); + logger.clear(); + } + + /** ---------------------------------------------------------------------------* + * @brief Register a new stream into the device memory manager. + * + * Also returns success if the stream is already registered. + * + * @param stream The stream to register + * @return rmmError_t RMM_SUCCESS if all goes well, RMM_ERROR_INVALID_ARGUMENT + * if the stream is invalid. + * ---------------------------------------------------------------------------**/ + rmmError_t registerStream(cudaStream_t stream) { + std::lock_guard guard(streams_mutex); + if (registered_streams.empty() || 0 == registered_streams.count(stream)) { + registered_streams.insert(stream); + if (stream && PoolAllocation == options.allocation_mode) // don't register the null stream with CNMem + RMM_CHECK_CNMEM( cnmemRegisterStream(stream) ); + } + return RMM_SUCCESS; + } + + private: + Manager() : options({ CudaDefaultAllocation, false, 0 }) {} + ~Manager() = default; + Manager(const Manager&) = delete; + Manager& operator=(const Manager&) = delete; + + std::mutex streams_mutex; + std::set registered_streams; + Logger logger; + + rmmOptions_t options; + }; +} + +#endif // MEMORY_MANAGER_H \ No newline at end of file diff --git a/hash-graph-dehornetify/src/MultiHashGraph.cu b/hash-graph-dehornetify/src/MultiHashGraph.cu new file mode 100644 index 0000000..b4247c7 --- /dev/null +++ b/hash-graph-dehornetify/src/MultiHashGraph.cu @@ -0,0 +1,862 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "MultiHashGraph.cuh" +#include "MultiHashGraphDeviceOperators.cuh" +#include "MultiHashGraphHostOperators.cuh" + +#include +#include +#include +#include +#include + +#include + +#include //--profile-from-start off + +// #include +// #include +// #include +// #include +// #include +// #include + +#include +#include + +#include + +// #include "rmm/rmm.h" +// #include "rmm.h" +// using namespace mgpu; +using namespace std::chrono; + + +// Uncomment once we remove "using namespace hornets_nest" +// const int BLOCK_SIZE_OP2 = 256; + +// #define ERROR_CHECK +// #define PRINT_KEYS +#define LRB_BUILD + +#ifdef HOST_PROFILE +uint64_t tidFocused = 2; +#endif + +// #define DEBUG + +MultiHashGraph::MultiHashGraph(inputData *h_dVals, index_t countSize, index_t maxkey, + // context_t &context, index_t tableSize, + index_t tableSize, + index_t binCount, index_t lrbBins, + index_t gpuCount) { + + + index_t binRange = std::ceil(maxkey / ((float)binCount)); + BLOCK_COUNT = std::ceil(countSize / ((float) BLOCK_SIZE_OP2)); + BLOCK_COUNT = std::min(BLOCK_COUNT, 65535); + + std::cout << "bin_count: " << binCount << std::endl; + std::cout << "bin_range: " << binRange << std::endl; + std::cout << "table_size: " << tableSize << std::endl; + + std::cout << "BLOCK_COUNT: " << BLOCK_COUNT << " BLOCKS_SIZE: " << BLOCK_SIZE_OP2 << "\n"; + + h_vals = new hkey_t[countSize](); + + // Input is arrays of key on different devices. + index_t avgKeyCount = std::ceil(countSize / ((double) gpuCount)); + index_t h_valIdx = 0; + + // h_dKeyBinBuff = new hkey_t*[gpuCount](); + h_dKeyBinBuff = new keyval*[gpuCount](); + + index_t seed = 0; + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + + index_t lo = avgKeyCount * i; + index_t hi = avgKeyCount * (i + 1); + hi = std::min(hi, countSize); + + index_t keyCount = hi - lo; + + cudaMemcpy(h_vals + h_valIdx, h_dVals[i].d_keys, keyCount * sizeof(hkey_t), + cudaMemcpyDeviceToHost); + + h_valIdx += keyCount; + + cudaMalloc(&h_dKeyBinBuff[i], keyCount * sizeof(keyval)); + } + +#ifdef PRINT_KEYS + std::sort(h_vals, h_vals + countSize); + std::cout << "keys: " << std::endl; + for (uint32_t i = 0; i < countSize; i++) { + std::cout << h_vals[i] << " "; + } + std::cout << std::endl; +#endif + + // Structures for initial binning + h_binSizes = new index_t[binCount](); // Consolidated bin sizes across devices + h_hBinSizes = new index_t*[gpuCount](); // Bin sizes per device + h_dBinSizes = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dBinSizes[i], binCount * sizeof(index_t)); + h_hBinSizes[i] = new index_t[binCount](); + } + + h_psBinSizes = new index_t[binCount + 1](); + + // Structures for allocating bins to GPUs (i.e. hash ranges). + h_binSplits = new index_t[gpuCount + 1](); + h_dBinSplits = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dBinSplits[i], (gpuCount + 1) * sizeof(index_t)); + } + cudaSetDevice(0); + + // Structures for counting the key/hash buffer sizes on each GPU. + h_bufferCounter = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + h_bufferCounter[i] = new index_t[gpuCount](); + } + + h_dBufferCounter = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dBufferCounter[i], gpuCount * sizeof(index_t)); + cudaMemset(h_dBufferCounter[i], 0, gpuCount * sizeof(index_t)); + } + cudaSetDevice(0); + + h_hKeyBinOff = new index_t*[gpuCount](); + h_dKeyBinOff = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dKeyBinOff[i], (gpuCount + 1) * sizeof(index_t)); + h_hKeyBinOff[i] = new index_t[gpuCount + 1](); + } + + // h_dFinalKeys = new hkey_t*[gpuCount](); + h_dFinalKeys = new char*[gpuCount](); + + h_hFinalCounter = new index_t*[gpuCount](); + h_dFinalOffset = new index_t*[gpuCount](); + h_hFinalOffset = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + h_hFinalCounter[i] = new index_t[gpuCount](); + h_hFinalOffset[i] = new index_t[gpuCount](); + cudaMalloc(&h_dFinalOffset[i], (gpuCount + 1) * sizeof(index_t)); + } + + h_dOffsets = new index_t*[gpuCount](); + h_dCounter = new index_t*[gpuCount](); + h_dEdges = new hkey_t*[gpuCount](); + + h_dExSumTemp = new size_t*[gpuCount](); + // exSumTempBytes = 1279; + // exSumTempBytes = 2000; + // exSumTempBytes = 3000000; + exSumTempBytes = std::max(2048L, (long)(tableSize / 10)); + // exSumTempBytes = tableSize / 10; + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dExSumTemp[i], exSumTempBytes); + } + + this->h_dVals = h_dVals; + this->countSize = countSize; + this->maxkey = maxkey; + this->tableSize = tableSize; + this->binCount = binCount; + this->lrbBins = lrbBins; + this->gpuCount = gpuCount; + + h_hashOff = new index_t[gpuCount](); + h_counterOff = new index_t[gpuCount](); + h_offsetOff = new index_t[gpuCount](); + h_edgesOff = new index_t[gpuCount](); + h_lrbOff = new index_t[gpuCount](); + + // cudaMalloc(&d_Common, 1 * sizeof(index_t)); + // cudaMemset(d_Common, 0, 1 * sizeof(index_t)); + + // cudaMalloc(&d_GlobalCounter, 1 * sizeof(index_t)); + // cudaMemset(d_GlobalCounter, 0, 1 * sizeof(index_t)); + h_dCommon = new index_t*[gpuCount](); + h_dGlobalCounter = new index_t*[gpuCount](); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dCommon[i], 1 * sizeof(index_t)); + cudaMemset(h_dCommon[i], 0, 1 * sizeof(index_t)); + + cudaMalloc(&h_dGlobalCounter[i], 1 * sizeof(index_t)); + cudaMemset(h_dGlobalCounter[i], 0, 1 * sizeof(index_t)); + } + +#ifdef LRB_BUILD + h_dLrbCounter = new index_t*[gpuCount](); + h_dLrbCountersPrefix = new index_t*[gpuCount](); + + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMalloc(&h_dLrbCounter[i], (lrbBins + 2) * sizeof(index_t)); + cudaMemset(h_dLrbCounter[i], 0, (lrbBins + 2) * sizeof(index_t)); + + cudaMalloc(&h_dLrbCountersPrefix[i], (lrbBins + 2) * sizeof(index_t)); + cudaMemset(h_dLrbCountersPrefix[i], 0,(lrbBins + 2) * sizeof(index_t)); + } +#endif + +#ifdef MANAGED_MEM + index_t size = countSize * sizeof(keyval) + + countSize * sizeof(HashKey) + + (2 * countSize * sizeof(keyval)) + + (2 * (tableSize + gpuCount) * sizeof(index_t)); + std::cout << "managed alloc size: " << size << std::endl; + this->totalSize = size; + uvmPtr = nullptr; + + cudaMallocManaged(&uvmPtr, size); + index_t equalChunk = size / gpuCount; + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMemPrefetchAsync(uvmPtr + equalChunk * i, equalChunk, i); + } + prefixArray = new index_t[gpuCount + 1](); + + h_dCountCommon = new char*[gpuCount](); +#endif + + CHECK_ERROR("constructor"); + +} + +MultiHashGraph::~MultiHashGraph() { +#if 0 + if (!multiDestroyed) { + destroyMulti(); + } + delete[] h_vals; +#endif +} + +void MultiHashGraph::destroyMulti() { + // cudaFree(d_binSizes); +#if 0 + cudaFree(d_psBinSizes); + + for (uindex_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaFree(h_dKeys[i]); + cudaFree(h_dBinCounter[i]); + cudaFree(h_dHashes[i]); + cudaFree(h_dBufferCounter[i]); + cudaFree(h_dHashSplits[i]); + delete[] h_hBinCounter[i]; + } + cudaSetDevice(0); + delete[] h_dKeys; + delete[] h_dHashSplits; + delete[] h_dBinCounter; + delete[] h_dHashes; + delete[] h_hBinCounter; + delete[] h_dBufferCounter; + + for (uindex_t i = 0; i < binCount; i++) { + delete[] h_keyBins[i]; + } + + delete[] h_keyBins; + delete[] h_binSizes; + cudaFree(d_binCounter); + delete[] h_binCounter; + delete[] h_binSplits; + + multiDestroyed = true; +#endif +} + +bool compareByKey(const keyval &kv1, const keyval &kv2) { + return kv1.key < kv2.key; +} + +// void lrbBuildMultiTable(hkey_t *d_vals, HashKey *d_hash, index_t *d_counter, +void lrbBuildMultiTable(keyval *d_vals, HashKey *d_hash, index_t *d_counter, +// void lrbBuildMultiTable(keyval *d_vals, keyval *d_hash, index_t *d_counter, + index_t *d_offSet, keyval *d_edges, index_t *d_splits, + index_t valCount, index_t tableSize, index_t ogTableSize, + keyval *d_lrbArray, index_t *d_lrbCounters, + index_t *d_lrbCountersPrefix, size_t *d_exSumTemp, + size_t exSumTempBytes, index_t lrbBins, index_t lrbBinSize, + index_t devNum) { + + cudaMemset(d_counter, 0, (tableSize + 1) * sizeof(index_t)); + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + + + hashValuesD<<>>(valCount, d_vals, d_hash, + (HashKey) ogTableSize, devNum); + decrHash<<>>(d_hash, valCount, d_splits, + devNum); + + lrbCountHashD<<>>(valCount, d_hash, d_lrbCounters, + lrbBinSize); + _d_temp_storage = nullptr; _temp_storage_bytes = 0; + + cub::DeviceScan::ExclusiveSum(NULL, _temp_storage_bytes, d_lrbCounters, + d_lrbCountersPrefix, lrbBins + 1); + + if (_temp_storage_bytes > exSumTempBytes) { + std::cerr << "ERROR: NOT ENOUGH TEMP SPACE ALLOCATED" << std::endl; + } + + + cub::DeviceScan::ExclusiveSum(d_exSumTemp, _temp_storage_bytes, d_lrbCounters, + d_lrbCountersPrefix, lrbBins + 1); + + cudaMemcpy(d_lrbCountersPrefix + lrbBins, &lrbBins, sizeof(index_t), + cudaMemcpyHostToDevice); + + cudaMemset(d_lrbCounters, 0, (lrbBins + 1) * sizeof(index_t)); + + lrbRehashD<<>>(valCount, d_vals, d_hash, + d_lrbCounters, d_lrbArray, + d_lrbCountersPrefix, lrbBinSize, + devNum); + + lrbCountHashGlobalD<<>>(valCount, d_counter, + d_lrbArray, d_splits, + ogTableSize, devNum); + + _d_temp_storage = nullptr; _temp_storage_bytes = 0; + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + // cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + // // RMM_ALLOC(&_d_temp_storage, _temp_storage_bytes, 0); + // cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + // d_offSet, tableSize); + + if (_temp_storage_bytes > exSumTempBytes) { + std::cerr << "ERROR: NOT ENOUGH TEMP SPACE ALLOCATED" << std::endl; + std::cerr << _temp_storage_bytes << " " << exSumTempBytes << std::endl; + } + cub::DeviceScan::ExclusiveSum(d_exSumTemp, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + cudaMemcpy(d_offSet + tableSize, &valCount, sizeof(index_t), cudaMemcpyHostToDevice); + // cudaFree(_d_temp_storage); + // RMM_FREE(_d_temp_storage, 0); + + cudaMemset(d_counter, 0, tableSize * sizeof(index_t)); + + lrbCopyToGraphD<<>>(valCount, d_counter, d_offSet, + d_edges, d_lrbArray, d_splits, + ogTableSize, devNum); +} + +#ifndef LRB_BUILD +void buildMultiTable(hkey_t *d_vals, HashKey *d_hash, index_t *d_counter, + index_t *d_offSet, keyval *d_edges, index_t *d_splits, index_t valCount, + index_t tableSize, index_t devNum) { + + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + decrHash<<>>(d_vals, d_hash, valCount, d_splits, devNum); + + countHashD<<>>(valCount, d_hash, d_counter); + + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes, d_counter, + d_offSet, tableSize); + + cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + // d_counter = fill(0, (size_t)tableSize, context); + cudaMemset(d_counter, 0, tableSize * sizeof(index_t)); + cudaMemcpy(d_offSet + tableSize, &valCount, sizeof(index_t), cudaMemcpyHostToDevice); + + if (_d_temp_storage > 0) { + cudaFree(_d_temp_storage); + } + + copyToGraphD<<>>(valCount, d_vals, d_hash, d_counter, d_offSet, + d_edges, tableSize); +} +#endif + +void MultiHashGraph::build(bool findSplits, index_t tid) { + // index_t binRange = std::ceil(maxkey / ((float)binCount)); + index_t binRange = std::ceil(tableSize / ((float)binCount)); + + cudaSetDevice(0); + cudaEvent_t start, stop; + + float buildTime = 0.0f; // milliseoncds + high_resolution_clock::time_point t1; + high_resolution_clock::time_point t2; + + // Hash all keys on each device. + cudaSetDevice(tid); + + basicHashD<<>>(h_dVals[tid].len, h_dVals[tid].d_keys, + // h_dVals[tid].d_keys + h_dVals[tid].len, tableSize); + h_dVals[tid].d_hash, tableSize); +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("hashValues"); +#endif + + if (findSplits) { +#ifdef HOST_PROFILE + t1 = high_resolution_clock::now(); +#endif + // Count the number of keys in each key bin and determine the hash range per device. + countBinSizes(h_dVals, h_hBinSizes, h_dBinSizes, h_binSizes, h_psBinSizes, h_binSplits, + h_dBinSplits, countSize, tableSize, binRange, binCount, gpuCount, tid); +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "countBinSizes time: " << (buildTime / 1000.0) << std::endl; + } +#endif + +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("countBinSizes"); +#endif + } + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + t1 = high_resolution_clock::now(); + } +#endif + // Count the number of keys that each GPU needs to ship to each other GPU based + // on ranges. + countKeyBuffSizes(h_dVals, h_dBinSplits, h_bufferCounter, h_dBufferCounter, gpuCount, tid); +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "countKeyBuff time: " << (buildTime / 1000.0) << std::endl; + } +#endif + + +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("countKeyBuffSizes"); +#endif + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + t1 = high_resolution_clock::now(); + } +#endif + // On each GPU, buffer all the keys going to each other GPU. + populateKeyBuffs(h_dVals, h_dKeyBinBuff, h_dKeyBinOff, h_hKeyBinOff, + h_dBufferCounter, h_bufferCounter, h_dBinSplits, h_dExSumTemp, + exSumTempBytes, gpuCount, tid); +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "populateKeyBuffs time: " << (buildTime / 1000.0) << std::endl; + } +#endif +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("populateKeyBuffs"); +#endif + + +#ifdef INDEX_TRACK + cudaFree(h_dVals[tid].d_keys); + cudaFree(h_dVals[tid].d_hash); + // RMM_FREE(h_dVals[tid].d_keys, 0); + // RMM_FREE(h_dVals[tid].d_hash, 0); +#endif + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + t1 = high_resolution_clock::now(); + } +#endif + + // On each GPU, count the number of keys that will get shipped to it. +#ifdef MANAGED_MEM + countFinalKeys(h_bufferCounter, h_dFinalKeys, h_hFinalCounter, + h_hFinalOffset, h_dFinalOffset, h_binSplits, gpuCount, tid, + uvmPtr, prefixArray, totalSize); +#else + countFinalKeys(h_bufferCounter, h_dFinalKeys, h_hFinalCounter, + h_hFinalOffset, h_dFinalOffset, h_binSplits, gpuCount, tid); +#endif + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "countFinalKeys time: " << (buildTime / 1000.0) << std::endl; + } +#endif +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("countFinalKeys"); +#endif + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + t1 = high_resolution_clock::now(); + } +#endif + + #pragma omp barrier + + // Ship all the keys to their respective GPUs. + allToAll(h_dVals, h_dFinalKeys, h_hFinalOffset, h_dKeyBinBuff, + h_hKeyBinOff, h_hFinalCounter, gpuCount, tid); + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "allToAll time: " << (buildTime / 1000.0) << std::endl; + } +#endif + + // #pragma omp barrier + // cudaDeviceSynchronize(); + +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("allToAll"); +#endif + +#ifdef HOST_PROFILE + if (tid == tidFocused) { + t1 = high_resolution_clock::now(); + } +#endif + + // Build hashgraph on each GPU. + index_t hashRange = h_binSplits[tid + 1] - h_binSplits[tid]; + // cudaMalloc(&h_dOffsets[tid], 2 * (hashRange + 1) * sizeof(index_t)); + + index_t keyCount = h_hFinalOffset[tid][gpuCount]; + +#ifdef LRB_BUILD + index_t lrbBinSize = std::ceil(hashRange / (float)(lrbBins)); + + if (lrbBinSize == 0) { + // std::cout << "ERROR: TOO MANY LRB BINS" << std::endl; + printf("ERROR tid: %ld hashRange: %ld\n", tid, hashRange); + exit(0); + } + + h_hashOff[tid] = keyCount * sizeof(keyval); + h_counterOff[tid] = (keyCount * sizeof(keyval)) + + (keyCount * sizeof(HashKey)) + + (2 * keyCount * sizeof(keyval)) + + ((hashRange + 1) * sizeof(index_t)); + h_offsetOff[tid] = (keyCount * sizeof(keyval)) + + (keyCount * sizeof(HashKey)) + + (2 * keyCount * sizeof(keyval)); + h_edgesOff[tid] = (keyCount * sizeof(keyval)) + + (keyCount * sizeof(HashKey)); + h_lrbOff[tid] = (keyCount * sizeof(keyval)) + + (keyCount * sizeof(HashKey)) + + (keyCount * sizeof(keyval)); + + lrbBuildMultiTable((keyval *) h_dFinalKeys[tid], + (HashKey *) (h_dFinalKeys[tid] + h_hashOff[tid]), // d_hash + (index_t *)(h_dFinalKeys[tid] + h_counterOff[tid]), // d_counter + (index_t *)(h_dFinalKeys[tid] + h_offsetOff[tid]), // d_offSet + (keyval *)(h_dFinalKeys[tid] + h_edgesOff[tid]), // d_edges + h_dBinSplits[tid], keyCount, hashRange, tableSize, + (keyval *)(h_dFinalKeys[tid] + h_lrbOff[tid]), // d_lrbArray + h_dLrbCounter[tid], + h_dLrbCountersPrefix[tid], h_dExSumTemp[tid], exSumTempBytes, + lrbBins, lrbBinSize, tid); +#ifdef HOST_PROFILE + if (tid == tidFocused) { + cudaDeviceSynchronize(); + t2 = high_resolution_clock::now(); + buildTime = duration_cast( t2 - t1 ).count(); + std::cout << "building time: " << (buildTime / 1000.0) << std::endl; + } +#endif + +#else + + buildMultiTable(h_dFinalKeys[tid], h_dFinalHash[tid], h_dCounter[tid], h_dOffsets[tid], + h_dEdges[tid], h_dBinSplits[tid], keyCount, hashRange, tid); + +#endif + + cudaSetDevice(0); +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("build error"); +#endif +} + +void MultiHashGraph::intersect(MultiHashGraph &mhgA, MultiHashGraph &mhgB, index_t *h_Common, + keypair **h_dOutput, index_t tid) { + + index_t gpuCount = mhgA.gpuCount; + + cudaSetDevice(tid); + + index_t *d_offsetA = (index_t *)(mhgA.h_dFinalKeys[tid] + mhgA.h_offsetOff[tid]); + keyval *d_edgesA = (keyval *)(mhgA.h_dFinalKeys[tid] + mhgA.h_edgesOff[tid]); + index_t *d_counterA = (index_t *)(mhgA.h_dFinalKeys[tid] + mhgA.h_counterOff[tid]); + + index_t *d_offsetB = (index_t *)(mhgB.h_dFinalKeys[tid] + mhgB.h_offsetOff[tid]); + keyval *d_edgesB = (keyval *)(mhgB.h_dFinalKeys[tid] + mhgB.h_edgesOff[tid]); + index_t *d_counterB = (index_t *)(mhgB.h_dFinalKeys[tid] + mhgB.h_counterOff[tid]); + + index_t *d_Common = mhgA.h_dCommon[tid]; + index_t *d_GlobalCounter = mhgA.h_dGlobalCounter[tid]; + + size_t *d_exSumTemp = mhgA.h_dExSumTemp[tid]; + size_t exSumTempBytes = mhgA.exSumTempBytes; + + index_t tableSize = mhgA.h_binSplits[tid + 1] - mhgA.h_binSplits[tid]; + if (tableSize != mhgB.h_binSplits[tid + 1] - mhgB.h_binSplits[tid]) { + std::cerr << "ERROR: TABLE SIZE NOT SAME BETWEN TWO HG'S" << std::endl; + exit(0); + } + + // TODO: might be able to reuse stuff from cudaMalloc() from building. + index_t *d_countCommon = nullptr; + index_t *d_outputPositions = nullptr; + + // cudaMalloc(&d_countCommon, (size_t)(tableSize + 1) * sizeof(index_t)); + // cudaMalloc(&d_outputPositions, (size_t)(tableSize + 1) * sizeof(index_t)); +#ifdef MANAGED_MEM + d_countCommon = (index_t *) mhgA.h_dCountCommon[tid]; + cudaMemPrefetchAsync(d_countCommon, + mhgA.prefixArrayIntersect[tid + 1] - mhgA.prefixArrayIntersect[tid], + tid); +#else + cudaMalloc(&d_countCommon, (size_t)(2 * ((tableSize + 1) * sizeof(index_t)))); +#endif + d_outputPositions = d_countCommon + tableSize + 1; + + // RMM_ALLOC(&d_countCommon, (size_t)(tableSize + 1) * sizeof(index_t), 0); + // RMM_ALLOC(&d_outputPositions, (size_t)(tableSize + 1) * sizeof(index_t), 0); + + // cudaMemsetAsync(d_countCommon, 0, (size_t)(tableSize + 1) * sizeof(index_t)); + // cudaMemsetAsync(d_outputPositions, 0, (size_t)(tableSize + 1) * sizeof(index_t)); + cudaMemsetAsync(d_countCommon, 0, (size_t)(2 * ((tableSize + 1) * sizeof(index_t)))); + + simpleIntersect<<>>(tableSize, d_offsetA, d_edgesA, d_offsetB, + d_edgesB, d_countCommon, NULL, true); + // forAll (vertices,simpleIntersect{d_offSetA.data(),d_edgesA.data(), d_offSetB.data(), + // d_edgesB.data(),d_countCommon.data(),NULL}); + + + // index_t h_Common; + void *_d_temp_storage=nullptr; size_t _temp_storage_bytes=0; + + _d_temp_storage=nullptr; _temp_storage_bytes=0; + cub::DeviceReduce::Sum(_d_temp_storage, _temp_storage_bytes, d_countCommon, + d_Common, tableSize); + + if (_temp_storage_bytes > exSumTempBytes) { + std::cerr << "ERROR: NOT ENOUGH TEMP SPACE ALLOCATED" << std::endl; + } + // RMM_ALLOC(&_d_temp_storage, _temp_storage_bytes, 0); + cub::DeviceReduce::Sum(d_exSumTemp, _temp_storage_bytes, d_countCommon, + d_Common, tableSize); + cudaMemcpy(&h_Common[tid], d_Common, 1 * sizeof(index_t), cudaMemcpyDeviceToHost); + // gpu::copyToHost(d_Common.data(), 1, &h_Common); + // RMM_FREE(_d_temp_storage, 0); + // gpu::free(_d_temp_storage); + + _d_temp_storage=nullptr; _temp_storage_bytes=0; + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes, d_countCommon, + d_outputPositions, tableSize); + + if (_temp_storage_bytes > exSumTempBytes) { + std::cerr << "ERROR: NOT ENOUGH TEMP SPACE ALLOCATED" << std::endl; + } + + // RMM_ALLOC(&_d_temp_storage, _temp_storage_bytes, 0); + cub::DeviceScan::ExclusiveSum(d_exSumTemp, _temp_storage_bytes, d_countCommon, + d_outputPositions, tableSize); + // RMM_FREE(_d_temp_storage, 0); + // gpu::free(_d_temp_storage); + + + // printf("Size of the ouput is : %ld\n", h_Common[tid]); fflush(stdout); + + if (h_Common[tid] > 0) { + // d_output = mem_t(h_Common,context,memory_space_device); + cudaMalloc(&h_dOutput[tid], h_Common[tid] * sizeof(keypair)); + // RMM_ALLOC(&h_dOutput[tid], h_Common[tid] * sizeof(keypair), 0); + } + + + simpleIntersect<<>>(tableSize, d_offsetA, d_edgesA, d_offsetB, + d_edgesB, d_outputPositions, + h_dOutput[tid], false); + // forAll (tableSize,simpleIntersect{d_offSetA.data(),d_edgesA.data(), + // d_offSetB.data(),d_edgesB.data(),d_outputPositions.data(), + // d_output.data()}); + +#ifdef ERROR_CHECK + cudaDeviceSynchronize(); + CHECK_ERROR("intersect error"); +#endif +} + +void MultiHashGraph::buildSingle() { + + cudaSetDevice(0); + std::cout << "single countSize: " << countSize << std::endl; + std::cout << "single tableSize: " << tableSize << std::endl; + + // mem_t d_hashA(countSize, context, memory_space_device); + // mem_t d_counterA = fill((int32_t)0, (size_t)(tableSize+1), context); + // mem_t d_offsetA = fill((index_t)0, (size_t)(tableSize+1), context); + // mem_t d_edgesA(countSize,context,memory_space_device); + HashKey *d_hashA; + int32_t *d_counterA; + index_t * d_offsetA; + keyval *d_edgesA; + + cudaMalloc(&d_hashA, countSize * sizeof(HashKey)); + cudaMalloc(&d_counterA, (tableSize + 1) * sizeof(int32_t)); + cudaMalloc(&d_offsetA, (tableSize + 1) * sizeof(index_t)); + cudaMalloc(&d_edgesA, countSize * sizeof(keyval)); + + cudaMemset(d_counterA, 0, (tableSize + 1) * sizeof(int32_t)); + cudaMemset(d_offsetA, 0, (tableSize + 1) * sizeof(index_t)); + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + float buildTime = 0.0f; // milliseoncds + + cudaMalloc(&d_vals, countSize * sizeof(hkey_t)); + cudaMemcpy(d_vals, h_vals, countSize * sizeof(hkey_t), cudaMemcpyHostToDevice); + + cudaEventRecord(start); + + buildTable(d_vals, d_hashA, d_counterA, d_offsetA, d_edgesA, (index_t)countSize, + (index_t)(tableSize)); + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&buildTime, start, stop); + + std::cout << "single buildTable() time: " << (buildTime / 1000.0) << "\n"; // seconds + + index_t *h_offset = new index_t[tableSize + 1](); + HashKey *h_hash = new HashKey[countSize](); + keyval *h_edges = new keyval[countSize](); + + cudaMemcpy(h_offset, d_offsetA, (tableSize + 1) * sizeof(index_t), + cudaMemcpyDeviceToHost); + + cudaMemcpy(h_hash, d_hashA, countSize * sizeof(HashKey), + cudaMemcpyDeviceToHost); + cudaMemcpy(h_edges, d_edgesA, countSize * sizeof(keyval), + cudaMemcpyDeviceToHost); + + // Everything in multi-GPU HG is in single-GPU HG + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + + index_t hashRange = h_binSplits[i + 1] - h_binSplits[i]; + index_t keyCount = h_hFinalOffset[i][gpuCount]; + + index_t *h_hOffsets = new index_t[hashRange + 1](); + keyval *h_hEdges = new keyval[keyCount](); + + cudaMemcpy(h_hOffsets, h_dFinalKeys[i] + h_offsetOff[i], + (hashRange + 1) * sizeof(index_t), + cudaMemcpyDeviceToHost); + + cudaMemcpy(h_hEdges, h_dFinalKeys[i] + h_edgesOff[i], + keyCount * sizeof(keyval), + cudaMemcpyDeviceToHost); + + for (index_t j = 0; j < hashRange; j++) { + + index_t hash = j + h_binSplits[i]; + + index_t multiDegree = h_hOffsets[j + 1] - h_hOffsets[j]; + index_t singleDegree = h_offset[hash + 1] - h_offset[hash]; + + if (multiDegree != singleDegree) { + std::cerr << "Degree error hash: " << hash << " multi: " << + multiDegree << " single: " << singleDegree << "\n"; + } + + std::vector multiGPU; + for (index_t k = h_hOffsets[j]; k < h_hOffsets[j + 1]; k++) { + keyval edge = h_hEdges[k]; + multiGPU.push_back(edge.key); + } + + std::vector singleGPU; + for (index_t k = h_offset[hash]; k < h_offset[hash + 1]; k++) { + keyval edge = h_edges[k]; + singleGPU.push_back(edge.key); + } + + std::sort(multiGPU.begin(), multiGPU.end()); + std::sort(singleGPU.begin(), singleGPU.end()); + + if (multiGPU != singleGPU) { + std::cerr << "List error\n"; + + std::cerr << "multiGPU:\n"; + for (hkey_t kv : multiGPU) { + std::cerr << kv << " "; + } + std::cerr << std::endl; + + std::cerr << "singleGPU:\n"; + for (hkey_t kv : singleGPU) { + std::cerr << kv << " "; + } + std::cerr << std::endl; + } + } + } + cudaSetDevice(0); + + cudaFree(d_vals); + CHECK_ERROR("buildSingle"); +} diff --git a/hash-graph-dehornetify/src/SingleHashGraph.cu b/hash-graph-dehornetify/src/SingleHashGraph.cu new file mode 100644 index 0000000..7f42f0c --- /dev/null +++ b/hash-graph-dehornetify/src/SingleHashGraph.cu @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "SingleHashGraph.cuh" +#include "SingleHashGraphOperators.cuh" +#include +#include +#include +#include + +#include +#include +#include +#include + +#include //--profile-from-start off + +// #include +// #include +// #include +// #include +// #include +// #include + +#include +#include + +// #include "rmm/rmm.h" +// #include "rmm.h" + +// using namespace mgpu; + + +using hkey_t = int64_t; +using index_t = int32_t; +using HashKey = uint32_t; + +// Uncomment once we remove "using namespace hornets_nest" +// const int BLOCK_SIZE_OP2 = 256; + +#define DISABLE_A +#define MULTI_GPU + +// #define DEBUG +#define CORRECTNESS + +int BLOCK_COUNT = -1; +int BLOCK_SIZE_OP2 = 256; // TODO: Double check this. + +// #define SEQ_KEYS + +#define LRB_BUILD + +struct prg { + hkey_t lo, hi; + + __host__ __device__ prg(hkey_t _lo=0, hkey_t _hi=0) : lo(_lo), hi(_hi) {}; + + __host__ __device__ hkey_t operator()(unsigned long long index) const { + thrust::default_random_engine rng(index); + thrust::uniform_int_distribution dist(lo, hi); + rng.discard(index); + return dist(rng); + } +}; + +SingleHashGraph::SingleHashGraph(int64_t countSize, int64_t maxkey, // context_t &context, + int64_t tableSize, int64_t lrbBins) + // : + // d_hash(countSize, context, memory_space_device), + // d_edges(countSize, context, memory_space_device) + { + + // d_vals = fill_random_64((hkey_t)0, maxkey, countSize, false, context); + // d_vals = fill_sequence((hkey_t)0, maxkey, context); + // d_counter = fill((index_t)0, (size_t)(tableSize + 1), context); + // d_offset = fill((index_t)0, (size_t)(tableSize + 1), context); + + cudaMalloc(&d_vals, countSize * sizeof(hkey_t)); + cudaMalloc(&d_hash, countSize * sizeof(HashKey)); + cudaMalloc(&d_counter, (tableSize + 1) * sizeof(index_t)); + cudaMalloc(&d_offset, (tableSize + 1) * sizeof(index_t)); + cudaMalloc(&d_edges, countSize * sizeof(keyval)); + +#ifdef LRB_BUILD + cudaMalloc(&d_lrbCounter, (lrbBins + 2) * sizeof(index_t)); + cudaMalloc(&d_lrbCounterPrefix, (lrbBins + 2) * sizeof(index_t)); + cudaMalloc(&d_lrbArray, countSize * sizeof(keyval)); + + cudaMemset(d_lrbCounter, 0, (lrbBins + 2) * sizeof(index_t)); + this->lrbBins = lrbBins; +#endif + + cudaMemset(d_counter, 0, (tableSize + 1) * sizeof(index_t)); + cudaMemset(d_offset, 0, (tableSize + 1) * sizeof(index_t)); + +#ifdef SEQ_KEYS + HashKey *h_vals = new HashKey[countSize](); + for (HashKey i = 0; i < countSize; i++) { + h_vals[i] = i; + } + cudaMemcpy(d_vals, h_vals, countSize * sizeof(HashKey), cudaMemcpyHostToDevice); +#else + hkey_t seed = 0; + thrust::counting_iterator index_sequence_begin(seed); + thrust::transform(thrust::device, index_sequence_begin, index_sequence_begin + countSize, + d_vals, prg(0, maxkey)); +#endif + + BLOCK_COUNT = std::ceil(countSize / ((float) BLOCK_SIZE_OP2)); + +} + +SingleHashGraph::~SingleHashGraph() { +} + +void lrbBuildTable(hkey_t *d_vals, HashKey *d_hash, index_t *d_counter, + index_t *d_offSet, keyval *d_edges, + index_t valCount, index_t tableSize, + keyval *d_lrbArray, index_t *d_lrbCounters, + index_t *d_lrbCountersPrefix, index_t lrbBins, index_t lrbBinSize) { + + cudaMemset(d_counter, 0, (tableSize + 1) * sizeof(index_t)); + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + + hashValuesD<<>>(valCount, d_vals, d_hash, + (HashKey) tableSize); + + lrbCountHashD<<>>(valCount, d_hash, d_lrbCounters, + lrbBinSize); + _d_temp_storage = nullptr; _temp_storage_bytes = 0; + + cub::DeviceScan::ExclusiveSum(NULL, _temp_storage_bytes, d_lrbCounters, + d_lrbCountersPrefix, lrbBins + 1); + + cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes, d_lrbCounters, + d_lrbCountersPrefix, lrbBins + 1); + + cudaMemcpy(d_lrbCountersPrefix + lrbBins, &lrbBins, sizeof(index_t), + cudaMemcpyHostToDevice); + + cudaMemset(d_lrbCounters, 0, (lrbBins + 1) * sizeof(index_t)); + + lrbRehashD<<>>(valCount, d_vals, d_hash, + d_lrbCounters, d_lrbArray, + d_lrbCountersPrefix, lrbBinSize); + + lrbCountHashGlobalD<<>>(valCount, d_counter, + d_lrbArray, tableSize); + + _d_temp_storage = nullptr; _temp_storage_bytes = 0; + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + // RMM_ALLOC(&_d_temp_storage, _temp_storage_bytes, 0); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes,d_counter, + d_offSet, tableSize); + cudaMemcpy(d_offSet + tableSize, &valCount, sizeof(index_t), cudaMemcpyHostToDevice); + cudaFree(_d_temp_storage); + // RMM_FREE(_d_temp_storage, 0); + + cudaMemset(d_counter, 0, tableSize * sizeof(index_t)); + + lrbCopyToGraphD<<>>(valCount, d_counter, d_offSet, + d_edges, d_lrbArray, tableSize); +} + +// template +// void buildTable(mem_t &d_vals, mem_t &d_hash, mem_t &d_counter, +// mem_t &d_offSet, mem_t &d_edges, index_t valCount, +// index_t tableSize, context_t& context, int32_t valsOffset=0) { +void buildTable(hkey_t *d_vals, HashKey *d_hash, index_t *d_counter, + index_t *d_offSet, keyval *d_edges, index_t valCount, + index_t tableSize, int32_t valsOffset=0) { + + void* _d_temp_storage { nullptr }; + size_t _temp_storage_bytes { 0 }; + + hashValuesD<<>>(valCount, d_vals + valsOffset, + d_hash, (HashKey) tableSize); + + countHashD<<>>(valCount, d_hash, d_counter); + + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes, d_counter, + d_offSet, tableSize); + cudaMalloc(&_d_temp_storage, _temp_storage_bytes); + cub::DeviceScan::ExclusiveSum(_d_temp_storage, _temp_storage_bytes, d_counter, + d_offSet, tableSize); + + // d_counter = fill(0, (size_t)tableSize, context); + cudaMemset(d_counter, 0, tableSize * sizeof(index_t)); + cudaMemcpy(d_offSet + tableSize, &valCount, sizeof(index_t), cudaMemcpyHostToDevice); + cudaFree(_d_temp_storage); + + copyToGraphD<<>>(valCount, d_vals + valsOffset, + d_hash, d_counter, d_offSet, + d_edges, tableSize); +} + +// void SingleHashGraph::build(int64_t countSize, context_t &context, int64_t tableSize) { +void SingleHashGraph::build(int64_t countSize, int64_t tableSize) { + +#ifdef LRB_BUILD + index_t lrbBinSize = std::ceil(tableSize / (float)(lrbBins)); + + lrbBuildTable(d_vals, d_hash, d_counter, + d_offset, d_edges, + (index_t)countSize, (index_t)tableSize, + d_lrbArray, d_lrbCounter, + d_lrbCounterPrefix, lrbBins, lrbBinSize); + +#else + buildTable(d_vals, d_hash, d_counter, d_offset, d_edges, (index_t)countSize, + (index_t) tableSize); + // (index_t) tableSize, context); +#endif +} diff --git a/hash-graph-dehornetify/test/MultiHashGraphTest.cu b/hash-graph-dehornetify/test/MultiHashGraphTest.cu new file mode 100644 index 0000000..2fe95b0 --- /dev/null +++ b/hash-graph-dehornetify/test/MultiHashGraphTest.cu @@ -0,0 +1,427 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "MultiHashGraph.cuh" + +#include +#include + +// #define RAND_KEYS +// #define PRINT_KEYS + +// #define BUILD_TEST + +struct prg { + hkey_t lo, hi; + + __host__ __device__ prg(hkey_t _lo=0, hkey_t _hi=0) : lo(_lo), hi(_hi) {}; + + __host__ __device__ hkey_t operator()(index_t index) const { + thrust::default_random_engine rng(index); + thrust::uniform_int_distribution dist(lo, hi); + rng.discard(index); + return dist(rng); + } +}; + +// A recursive binary search function. It returns location of x in given array arr[l..r] is present, +// otherwise it returns the bin id with the smallest value larger than x +int64_t binarySearch(hkey_t *bins, int32_t l, int64_t r, int32_t x) { + if (r >= l) { + int64_t mid = l + (r - l) / 2; + + // If the element is present at the middle itself + if (bins[mid] == x) + return mid; + + // If element is smaller than mid, then it can only be present in left subarray + if (bins[mid] > x) + return binarySearch(bins, l, mid - 1, x); + + // Else the element can only be present in right subarray + return binarySearch(bins, mid + 1, r, x); + } + + // We reach here when element is not present in array and return the bin id + // of the smallest value greater than x + return l; +} + +void enablePeerAccess(uint32_t gpuCount) { + // Enable P2P access between each pair of GPUs. + for (index_t j = 0; j < gpuCount; j++) { + cudaSetDevice(j); + for (index_t i = 0; i < gpuCount; i++) { + if (j != i) { + int isCapable; + cudaDeviceCanAccessPeer(&isCapable, j, i); + if (isCapable == 1) { + cudaError_t err = cudaDeviceEnablePeerAccess(i, 0); + if (err == cudaErrorPeerAccessAlreadyEnabled) { + cudaGetLastError(); + } + } + } + } + } +} + +void generateInput(inputData *h_dVals, index_t countSize, index_t maxkey, uint32_t gpuCount, + index_t seed) { + std::cout << "generating input" << std::endl; + + index_t avgKeyCount = std::ceil(countSize / ((double) gpuCount)); + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + + index_t lo = avgKeyCount * i; + index_t hi = avgKeyCount * (i + 1); + hi = std::min(hi, countSize); + + index_t keyCount = hi - lo; + + cudaMalloc(&h_dVals[i].d_keys, keyCount * sizeof(hkey_t)); + cudaMalloc(&h_dVals[i].d_hash, keyCount * sizeof(HashKey)); + // RMM_ALLOC(&h_dVals[i].d_keys, keyCount * sizeof(hkey_t), 0); + // RMM_ALLOC(&h_dVals[i].d_hash, keyCount * sizeof(HashKey), 0); + +#ifdef RAND_KEYS + // Randomly generate input keys on each device. + thrust::counting_iterator index_sequence_begin(seed); + thrust::transform(thrust::device, index_sequence_begin, index_sequence_begin + keyCount, + h_dVals[i].d_keys, prg(0, maxkey - 1)); +#else + hkey_t *h_tmpKeys = new hkey_t[keyCount](); + for (index_t j = lo; j < hi; j++) { + h_tmpKeys[j - lo] = j; + } + cudaMemcpy(h_dVals[i].d_keys, h_tmpKeys, keyCount * sizeof(hkey_t), cudaMemcpyHostToDevice); +#endif + + h_dVals[i].len = keyCount; + +#ifdef PRINT_KEYS + std::cout << "keys gpu " << i << std::endl; + thrust::device_ptr td_keys = thrust::device_pointer_cast(h_dVals[i].d_keys); + for (uint32_t j = 0; j < keyCount; j++) { + std::cout << *(td_keys + j) << " "; + } + std::cout << std::endl; +#endif + + seed += keyCount; + + } + std::cout << "done generating input" << std::endl; +} + +int main(int argc, char **argv) { + + int deviceCount = 0; + cudaGetDeviceCount(&deviceCount); + + std::cout << "deviceCount: " << deviceCount << std::endl; + + char hostname[HOST_NAME_MAX]; + gethostname(hostname, HOST_NAME_MAX); + + std::cout << "hostname: " << hostname << std::endl; + + index_t countSizeA = 1L << 24; + index_t maxkey = 1L << 26; + + uint32_t binCount = 16000; + uint32_t gpuCount = 4; + + index_t lrbBins = -1; + + bool checkCorrectness = false; + bool buildTest = false; + + index_t countSizeB = 1L << 22; + + if (argc >= 2 && argc < 9) { + std::cerr << "Please specify all arguments.\n"; + return 1; + } + + if (argc >= 3) { + index_t size = strtoull(argv[1], NULL, 0); + countSizeA = size; + + index_t key = strtoull(argv[2], NULL, 0); + maxkey = key; + + binCount = atoi(argv[3]); + gpuCount = atoi(argv[4]); + + lrbBins = strtoull(argv[5], NULL, 0); + + // char *correctnessFlag = atoi(argv[5]); + // if (correctnessFlag > 0) { + if (!strcmp(argv[6], "check")) { + checkCorrectness = true; + } + + countSizeB = strtoull(argv[7], NULL, 0); + + if (!strcmp(argv[8], "build")) { + buildTest = true; + } + } + + index_t tableSize = maxkey; + + std::cout << "countSizeA: " << countSizeA << std::endl; + std::cout << "maxkey: " << maxkey << std::endl; + + // rmm_mgpu_context_t contextA; + // rmm_mgpu_context_t contextB; + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + float buildTime = 0.0f; // milliseoncds + + // enablePeerAccess(gpuCount); + + // rmmOptions_t rmmO; + + // rmmO.initial_pool_size = 1L << 60; + // rmmO.allocation_mode = PoolAllocation; + // rmmO.enable_logging = false; + // rmmO.num_devices = 16; + + // int *devices = (int *)malloc(gpuCount * sizeof(int)); + // for (index_t i = 0; i < gpuCount; i++) { + // devices[i] = i; + // } + // + // rmmO.devices = devices; + + // rmmInitialize(&rmmO); + + + if (buildTest) { + inputData *h_dVals = new inputData[gpuCount](); + generateInput(h_dVals, countSizeA, maxkey, gpuCount, 0); + + // MultiHashGraph mhg(h_dVals, countSizeA, maxkey, contextA, tableSize, binCount, lrbBins, gpuCount); + MultiHashGraph mhg(h_dVals, countSizeA, maxkey, tableSize, binCount, lrbBins, gpuCount); + + omp_set_num_threads(gpuCount); + +#ifdef CUDA_PROFILE + cudaProfilerStart(); +#endif + + cudaSetDevice(0); + cudaEventRecord(start); + + #pragma omp parallel + { + index_t tid = omp_get_thread_num(); + mhg.build(true, tid); + } // pragma + + cudaSetDevice(0); + cudaEventRecord(stop); + + cudaEventSynchronize(stop); + cudaEventElapsedTime(&buildTime, start, stop); + +#ifdef CUDA_PROFILE + cudaProfilerStop(); + CHECK_ERROR("end of build"); +#endif + + std::cout << "multi buildTable() time: " << (buildTime / 1000.0) << "\n"; // seconds + + if (checkCorrectness) { + mhg.destroyMulti(); + mhg.buildSingle(); + } + } else { + inputData *h_dValsA = new inputData[gpuCount](); + inputData *h_dValsB = new inputData[gpuCount](); + + generateInput(h_dValsA, countSizeA, maxkey, gpuCount, 0); + generateInput(h_dValsB, countSizeB, maxkey, gpuCount, countSizeA); + + std::cout << "hashgraph constructors" << std::endl; + // MultiHashGraph mhgA(h_dValsA, countSizeA, maxkey, contextA, tableSize, binCount, lrbBins, gpuCount); + // MultiHashGraph mhgB(h_dValsB, countSizeB, maxkey, contextB, tableSize, binCount, lrbBins, gpuCount); + MultiHashGraph mhgA(h_dValsA, countSizeA, maxkey, tableSize, binCount, lrbBins, gpuCount); + MultiHashGraph mhgB(h_dValsB, countSizeB, maxkey, tableSize, binCount, lrbBins, gpuCount); + std::cout << "done hashgraph constructors" << std::endl; + +#ifdef MANAGED_MEM + std::cout << "managed mem constructors" << std::endl; + index_t size = 2 * (tableSize + gpuCount) * sizeof(index_t); + cudaMallocManaged(&mhgA.uvmPtrIntersect, size); + mhgA.prefixArrayIntersect = new index_t[gpuCount + 1](); + mhgA.totalSizeIntersect = size; + std::cout << "done managed mem constructors" << std::endl; +#endif + + keypair **h_dOutput = new keypair*[gpuCount](); + index_t *h_Common = new index_t[gpuCount](); + + omp_set_num_threads(gpuCount); + +#ifdef CUDA_PROFILE + cudaProfilerStart(); +#endif + + cudaSetDevice(0); + cudaEventRecord(start); + + #pragma omp parallel + { + index_t tid = omp_get_thread_num(); + mhgA.build(true, tid); + + #pragma omp master + { + mhgB.h_binSplits = mhgA.h_binSplits; // small memory leak. + mhgB.h_dBinSplits = mhgA.h_dBinSplits; + +#ifdef MANAGED_MEM + mhgA.prefixArrayIntersect[0] = 0; + for (index_t i = 1; i < gpuCount; i++) { + index_t tidHashRange = mhgA.h_binSplits[i] - mhgA.h_binSplits[i - 1]; + index_t size = 2 * (tidHashRange + 1) * sizeof(index_t); + mhgA.prefixArrayIntersect[i] = mhgA.prefixArrayIntersect[i - 1] + size; + } + mhgA.prefixArrayIntersect[gpuCount] = mhgA.totalSizeIntersect; + + mhgA.h_dCountCommon[0] = mhgA.uvmPtrIntersect; + for (index_t i = 1; i < gpuCount; i++) { + mhgA.h_dCountCommon[i] = mhgA.uvmPtrIntersect + + mhgA.prefixArrayIntersect[i]; + } +#endif + } // master + + #pragma omp barrier + + mhgB.build(false, tid); // Build second HG but use same splits as first HG. + + #pragma omp barrier + + MultiHashGraph::intersect(mhgA, mhgB, h_Common, h_dOutput, tid); + } // pragma + + cudaSetDevice(0); + cudaEventRecord(stop); + +#ifdef CUDA_PROFILE + cudaProfilerStop(); + CHECK_ERROR("end of intersect"); +#endif + + cudaEventSynchronize(stop); + cudaEventElapsedTime(&buildTime, start, stop); + + std::cout << "multi intersect() time: " << (buildTime / 1000.0) << "\n"; // seconds + + if (checkCorrectness) { + mhgA.buildSingle(); + mhgB.buildSingle(); + + index_t outputSize = 0; + for (index_t i = 0; i < gpuCount; i++) { + outputSize += h_Common[i]; + } + + keypair *h_output = new keypair[outputSize](); + index_t h_idx = 0; + for (index_t i = 0; i < gpuCount; i++) { + cudaSetDevice(i); + cudaMemcpy(h_output + h_idx, h_dOutput[i], h_Common[i] * sizeof(keypair), + cudaMemcpyDeviceToHost); + h_idx += h_Common[i]; + } + + std::vector result; + result.reserve(outputSize); + for (index_t i = 0; i < outputSize; i++) { + result.push_back(h_output[i].right); + } + + if (result.size() != result.capacity()) { + std::cerr << "ERROR: RESULT ERROR" << std::endl; + exit(0); + } + + std::sort(mhgA.h_vals, mhgA.h_vals + countSizeA); + std::sort(mhgB.h_vals, mhgB.h_vals + countSizeB); + + std::vector ans; + ans.reserve(outputSize); + for (index_t i = 0; i < countSizeA; i++) { + index_t ogIdx = binarySearch(mhgB.h_vals, 0, countSizeB - 1, mhgA.h_vals[i]); + + index_t idx = ogIdx; + while (idx >= 0 && mhgB.h_vals[idx] == mhgA.h_vals[i]) { + ans.push_back(mhgA.h_vals[i]); + idx--; + } + + idx = ogIdx + 1; + while (idx < countSizeB && mhgB.h_vals[idx] == mhgA.h_vals[i]) { + ans.push_back(mhgA.h_vals[i]); + idx++; + } + // for (index_t j = 0; j < countSizeB; j++) { + // if (mhgA.h_vals[i] == mhgB.h_vals[j]) { + // ans.push_back(mhgA.h_vals[i]); + // } + + // if (mhgA.h_vals[i] < mhgB.h_vals[j]) { + // break; + // } + // } + } + + if (ans.size() != outputSize) { + std::cerr << "ERROR: INTERSECT OUTPUT HAS INCORRECT SIZE" << std::endl; + std::cerr << "ansSize: " << ans.size() << " outputSize: " << outputSize << std::endl; + // exit(0); + } + + std::sort(result.begin(), result.end()); + std::sort(ans.begin(), ans.end()); + + if (result != ans) { + std::cerr << "ERROR: INTERSECT OUTPUT HAS INCORRECT CONTENT" << std::endl; + + std::cout << "output: " << std::endl; + for (auto i = result.begin(); i != result.end(); ++i) { + std::cout << *i << " "; + } + std::cout << std::endl; + + std::cout << "ans: " << std::endl; + for (auto i = ans.begin(); i != ans.end(); ++i) { + std::cout << *i << " "; + } + std::cout << std::endl; + + exit(0); + } + } + } +} diff --git a/hash-graph-dehornetify/test/SingleHashGraphTest.cu b/hash-graph-dehornetify/test/SingleHashGraphTest.cu new file mode 100644 index 0000000..1fcb859 --- /dev/null +++ b/hash-graph-dehornetify/test/SingleHashGraphTest.cu @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2017-2018, NVIDIA CORPORATION. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "SingleHashGraph.cuh" + +int main(int argc, char **argv) { + + int64_t countSize = 1L << 24; + int64_t maxkey = 1L << 26; + int64_t tableSize = maxkey; + int64_t lrbBins = 16000; + + if (argc >= 3 && argc < 4) { + std::cerr << "Please specify all arguments.\n"; + return 1; + } + + if (argc >= 3) { + countSize = atoi(argv[1]); + // countSize = 1L << sizeExp; + + maxkey = atoi(argv[2]); + // maxkey = 1L << keyExp; + + lrbBins = atoi(argv[3]); + } + + cudaEvent_t start, stop; + cudaEventCreate(&start); + cudaEventCreate(&stop); + float buildTime = 0.0f; // milliseoncds + + // rmm_mgpu_context_t context; + + // SingleHashGraph shg(countSize, maxkey, context, tableSize); + SingleHashGraph shg(countSize, maxkey, tableSize, lrbBins); + + cudaEventRecord(start); + + // shg.build(countSize, context, tableSize); + shg.build(countSize, tableSize); + + cudaEventRecord(stop); + cudaEventSynchronize(stop); + cudaEventElapsedTime(&buildTime, start, stop); + + std::cout << "single buildTable() time: " << (buildTime / 1000.0) << "\n"; // seconds +} diff --git a/maxflow/.gitignore b/maxflow/.gitignore deleted file mode 100644 index 4e8b5fa..0000000 --- a/maxflow/.gitignore +++ /dev/null @@ -1,8 +0,0 @@ -*.swc -*.swo -*.swp -*.o -data/ -benchmarks/bench_log.csv -*.dat -galois/build diff --git a/maxflow/.gitmodules b/maxflow/.gitmodules deleted file mode 100644 index 851715b..0000000 --- a/maxflow/.gitmodules +++ /dev/null @@ -1,6 +0,0 @@ -[submodule "gunrock"] - path = gunrock - url = https://github.com/nsakharnykh/gunrock -[submodule "cub"] - path = cub - url = https://github.com/NVlabs/cub diff --git a/maxflow/.matrix_io.h.swm b/maxflow/.matrix_io.h.swm deleted file mode 100644 index ede2a31..0000000 Binary files a/maxflow/.matrix_io.h.swm and /dev/null differ diff --git a/maxflow/.matrix_io.h.swn b/maxflow/.matrix_io.h.swn deleted file mode 100644 index f04c5fd..0000000 Binary files a/maxflow/.matrix_io.h.swn and /dev/null differ diff --git a/maxflow/.maxflow.cpp.swm b/maxflow/.maxflow.cpp.swm deleted file mode 100644 index b36099d..0000000 Binary files a/maxflow/.maxflow.cpp.swm and /dev/null differ diff --git a/maxflow/.maxflow.cpp.swn b/maxflow/.maxflow.cpp.swn deleted file mode 100644 index 328d851..0000000 Binary files a/maxflow/.maxflow.cpp.swn and /dev/null differ diff --git a/maxflow/IO/gr_reader.h b/maxflow/IO/gr_reader.h deleted file mode 100644 index 94a99dd..0000000 --- a/maxflow/IO/gr_reader.h +++ /dev/null @@ -1,79 +0,0 @@ -#include -#include -#include "../allocator.h" -#include "../matrix.h" -#include "../config.h" -#include -#include - -using std::string; -using std::ifstream; -using std::cout; - -void read_gr(const string& filename, csr_graph& g) { - ifstream iff(filename); - uint64_t header[4]; - - iff.read((char*)header, sizeof(uint64_t) * 4); - - uint64_t n, nnz; - n = header[2]; - nnz = header[3]; - - uint64_t *degrees = (uint64_t*)malloc(sizeof(uint64_t) * n); - - iff.read((char*)degrees, sizeof(uint64_t) * n); - - uint32_t *outs = (uint32_t*)malloc(sizeof(uint32_t) * nnz); - iff.read((char*)outs, sizeof(uint32_t) * nnz); - - //Inc Sum -> Ex sum - int last = 0; - for(int i=0; i != n; ++i) { - int here = degrees[i] - last; - last = degrees[i]; - degrees[i] -= here; - } - - uint32_t buf; - if(nnz & 1) - iff.read((char*)&buf, sizeof(uint32_t)); //align on 64 bits - - uint32_t *w = (uint32_t*)malloc(sizeof(uint32_t) * nnz); - iff.read((char*)w, sizeof(uint32_t) * nnz); - - - //Copying into g - //If the data types are coherent, we could load directly in those - - g.row_offsets = (int*)my_malloc(sizeof(int) * n); - g.col_indices = (int*)my_malloc(sizeof(int) * nnz); - g.vals_cap = (flow_t*)my_malloc(sizeof(flow_t) * nnz); - - g.n = n; - g.nnz = nnz; - - - for(int i=0; i != n; ++i) - g.row_offsets[i] = (int)degrees[i]; - - for(int i=0; i != nnz; ++i) - g.col_indices[i] = (int)outs[i]; - - for(int i=0; i != nnz; ++i) - g.vals_cap[i] = (flow_t)w[i]; - - free(degrees); - free(outs); - free(w); - /* - for(int u=0; u != 20; ++u) { - cout << "Node " << u << " : "; - for(int i = g.row_offsets[u]; i != g.row_offsets[u+1]; ++i) { - cout << g.col_indices[i] << "(" << g.vals_cap[i] << ")\t"; - - } - cout << "\n"; - } - */ -} diff --git a/maxflow/IO/matrix_io.h b/maxflow/IO/matrix_io.h deleted file mode 100644 index 2ef66e2..0000000 --- a/maxflow/IO/matrix_io.h +++ /dev/null @@ -1,524 +0,0 @@ -#include -#include -using std::map; -using std::pair; - -// Routines to read/write matrix. -// Modified from http://crd-legacy.lbl.gov/~yunhe/cs267/final/source/utils/convert/matrix_io.c - -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -#define MM_MAX_LINE_LENGTH 1025 -#define MatrixMarketBanner "%%MatrixMarket" -#define MM_MAX_TOKEN_LENGTH 64 - -typedef char MM_typecode[4]; - -char *mm_typecode_to_str(MM_typecode matcode); - -int mm_read_banner(FILE *f, MM_typecode *matcode); -int mm_read_mtx_crd_size(FILE *f, int *M, int *N, int *nz); -int mm_read_mtx_array_size(FILE *f, int *M, int *N); - -int mm_write_banner(FILE *f, MM_typecode matcode); -int mm_write_mtx_crd_size(FILE *f, int M, int N, int nz); -int mm_write_mtx_array_size(FILE *f, int M, int N); - - -/********************* MM_typecode query fucntions ***************************/ - -#define mm_is_matrix(typecode) ((typecode)[0]=='M') - -#define mm_is_sparse(typecode) ( ((typecode)[1]=='C') || ((typecode)[1]=='S') ) -#define mm_is_sparserow(typecode) ((typecode)[1]=='S') -#define mm_is_coordinate(typecode)((typecode)[1]=='C') -#define mm_is_dense(typecode) ((typecode)[1]=='A') -#define mm_is_array(typecode) ((typecode)[1]=='A') - -#define mm_is_complex(typecode) ((typecode)[2]=='C') -#define mm_is_real(typecode) ((typecode)[2]=='R') -#define mm_is_pattern(typecode) ((typecode)[2]=='P') -#define mm_is_integer(typecode) ((typecode)[2]=='I') - -#define mm_is_symmetric(typecode)((typecode)[3]=='S') -#define mm_is_general(typecode) ((typecode)[3]=='G') -#define mm_is_skew(typecode) ((typecode)[3]=='K') -#define mm_is_hermitian(typecode)((typecode)[3]=='H') - -int mm_is_valid(MM_typecode matcode); /* too complex for a macro */ - - -/********************* MM_typecode modify fucntions ***************************/ - -#define mm_set_matrix(typecode) ((*typecode)[0]='M') -#define mm_set_coordinate(typecode) ((*typecode)[1]='C') -#define mm_set_sparserow(typecode) ((*typecode)[1]='S') -#define mm_set_array(typecode) ((*typecode)[1]='A') -#define mm_set_dense(typecode) mm_set_array(typecode) - -#define mm_set_complex(typecode)((*typecode)[2]='C') -#define mm_set_real(typecode) ((*typecode)[2]='R') -#define mm_set_pattern(typecode)((*typecode)[2]='P') -#define mm_set_integer(typecode)((*typecode)[2]='I') - - -#define mm_set_symmetric(typecode)((*typecode)[3]='S') -#define mm_set_general(typecode)((*typecode)[3]='G') -#define mm_set_skew(typecode) ((*typecode)[3]='K') -#define mm_set_hermitian(typecode)((*typecode)[3]='H') - -#define mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \ - (*typecode)[2]=' ',(*typecode)[3]='G') - -#define mm_initialize_typecode(typecode) mm_clear_typecode(typecode) - - -/********************* Matrix Market error codes ***************************/ - - -#define MM_COULD_NOT_READ_FILE 11 -#define MM_PREMATURE_EOF 12 -#define MM_NOT_MTX 13 -#define MM_NO_HEADER 14 -#define MM_UNSUPPORTED_TYPE 15 -#define MM_LINE_TOO_LONG 16 -#define MM_COULD_NOT_WRITE_FILE 17 - - -/******************** Matrix Market internal definitions ******************** - - MM_matrix_typecode: 4-character sequence - - ojbect sparse/ data storage - dense type scheme - - string position: [0] [1] [2] [3] - - Matrix typecode: M(atrix) C(oord) R(eal) G(eneral) - A(array) C(omplex) H(ermitian) - P(attern) S(ymmetric) - I(nteger) K(kew) - - ***********************************************************************/ - -#define MM_MTX_STR "matrix" -#define MM_ARRAY_STR "array" -#define MM_DENSE_STR "array" -#define MM_COORDINATE_STR "coordinate" -#define MM_SPARSEROW_STR "sparserow" -#define MM_COMPLEX_STR "complex" -#define MM_REAL_STR "real" -#define MM_INT_STR "integer" -#define MM_GENERAL_STR "general" -#define MM_SYMM_STR "symmetric" -#define MM_HERM_STR "hermitian" -#define MM_SKEW_STR "skew-symmetric" -#define MM_PATTERN_STR "pattern" - -int mm_read_banner(FILE *f, MM_typecode *matcode) -{ - char line[MM_MAX_LINE_LENGTH]; - char banner[MM_MAX_TOKEN_LENGTH]; - char mtx[MM_MAX_TOKEN_LENGTH]; - char crd[MM_MAX_TOKEN_LENGTH]; - char data_type[MM_MAX_TOKEN_LENGTH]; - char storage_scheme[MM_MAX_TOKEN_LENGTH]; - char *p; - - - mm_clear_typecode(matcode); - - if (fgets(line, MM_MAX_LINE_LENGTH, f) == NULL) - return MM_PREMATURE_EOF; - - if (sscanf(line, "%s %s %s %s %s", banner, mtx, crd, data_type, - storage_scheme) != 5) - return MM_PREMATURE_EOF; - - for (p=mtx; *p!='\0'; *p=tolower(*p),p++); /* convert to lower case */ - for (p=crd; *p!='\0'; *p=tolower(*p),p++); - for (p=data_type; *p!='\0'; *p=tolower(*p),p++); - for (p=storage_scheme; *p!='\0'; *p=tolower(*p),p++); - - /* check for banner */ - if (strncmp(banner, MatrixMarketBanner, strlen(MatrixMarketBanner)) != 0) - return MM_NO_HEADER; - - /* first field should be "mtx" */ - if (strcmp(mtx, MM_MTX_STR) != 0) - return MM_UNSUPPORTED_TYPE; - mm_set_matrix(matcode); - - - /* second field describes whether this is a sparse matrix (in coordinate - storgae) or a dense array */ - - - if (strcmp(crd, MM_SPARSEROW_STR) == 0) - mm_set_sparserow(matcode); - else - if (strcmp(crd, MM_COORDINATE_STR) == 0) - mm_set_coordinate(matcode); - else - if (strcmp(crd, MM_DENSE_STR) == 0) - mm_set_dense(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - /* third field */ - - if (strcmp(data_type, MM_REAL_STR) == 0) - mm_set_real(matcode); - else - if (strcmp(data_type, MM_COMPLEX_STR) == 0) - mm_set_complex(matcode); - else - if (strcmp(data_type, MM_PATTERN_STR) == 0) - mm_set_pattern(matcode); - else - if (strcmp(data_type, MM_INT_STR) == 0) - mm_set_integer(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - /* fourth field */ - - if (strcmp(storage_scheme, MM_GENERAL_STR) == 0) - mm_set_general(matcode); - else - if (strcmp(storage_scheme, MM_SYMM_STR) == 0) - mm_set_symmetric(matcode); - else - if (strcmp(storage_scheme, MM_HERM_STR) == 0) - mm_set_hermitian(matcode); - else - if (strcmp(storage_scheme, MM_SKEW_STR) == 0) - mm_set_skew(matcode); - else - return MM_UNSUPPORTED_TYPE; - - - return 0; -} - -#ifndef __NVCC__ -char *strdup (const char *s) { - char *d = (char*)my_malloc (strlen (s) + 1); // Allocate memory - if (d != NULL) strcpy (d,s); // Copy string if okay - return d; // Return new memory -} -#endif - -char *mm_typecode_to_str(MM_typecode matcode) -{ - char buffer[MM_MAX_LINE_LENGTH]; - const char *types[4]; - - /* check for MTX type */ - if (mm_is_matrix(matcode)) - types[0] = MM_MTX_STR; - - /* check for CRD or ARR matrix */ - if (mm_is_sparserow(matcode)) - types[1] = MM_SPARSEROW_STR; - else - if (mm_is_coordinate(matcode)) - types[1] = MM_COORDINATE_STR; - else - if (mm_is_dense(matcode)) - types[1] = MM_DENSE_STR; - else - return NULL; - - /* check for element data type */ - if (mm_is_real(matcode)) - types[2] = MM_REAL_STR; - else - if (mm_is_complex(matcode)) - types[2] = MM_COMPLEX_STR; - else - if (mm_is_pattern(matcode)) - types[2] = MM_PATTERN_STR; - else - if (mm_is_integer(matcode)) - types[2] = MM_INT_STR; - else - return NULL; - - - /* check for symmetry type */ - if (mm_is_general(matcode)) - types[3] = MM_GENERAL_STR; - else - if (mm_is_symmetric(matcode)) - types[3] = MM_SYMM_STR; - else - if (mm_is_hermitian(matcode)) - types[3] = MM_HERM_STR; - else - if (mm_is_skew(matcode)) - types[3] = MM_SKEW_STR; - else - return NULL; - - sprintf(buffer,"%s %s %s %s", types[0], types[1], types[2], types[3]); - return strdup(buffer); - -} - -/* generates random double in [low, high) */ -double random_double (double low, double high) -{ - //return ((high-low)*drand48()) + low; - return ((high-low)*rand()/RAND_MAX) + low; -} - -void coo2csr_in(int n, int nz, double *a, int **i_idx, int *j_idx); // in-place conversion, also replaces i_idx with new array of size (n+1) to save memory - -/* write CSR format */ -/* 1st line : % number_of_rows number_of_columns number_of_nonzeros - 2nd line : % base of index - 3rd line : row_number nz_r(=number_of_nonzeros_in_the_row) - next nz_r lines : column_index value(when a != NULL) - next line : row_number nz_r(=number_of_nonzeros_in_the_row) - next nz_r lines : column_index value(when a != NULL) - ... - */ - -void write_csr (char *fn, int m, int n, int nz, - int *row_start, int *col_idx, double *a) -{ - FILE *f; - int i, j; - - if ((f = fopen(fn, "w")) == NULL){ - printf ("can't open file <%s> \n", fn); - exit(1); - } - - fprintf (f, "%s %d %d %d\n", "%", m, n, nz); - - for (i=0; i \n", fn); - exit(1); - } - if (mm_read_banner(f, &matcode) != 0){ - printf("Could not process Matrix Market banner.\n"); - exit(1); - } - - /* This is how one can screen matrix types if their application */ - /* only supports a subset of the Matrix Market data types. */ - - if (! (mm_is_matrix(matcode) && mm_is_sparse(matcode)) ){ - printf("Sorry, this application does not support "); - printf("Market Market type: [%s]\n", mm_typecode_to_str(matcode)); - exit(1); - } - - /* skip comments */ - unsigned long pos; - char *line = NULL; - size_t len = 0; - size_t read; - do { - pos = ftell(f); - read = getline(&line, &len, f); - } while (read != -1 && line[0] == '%'); - fseek(f, pos, SEEK_SET); - - /* find out size of sparse matrix .... */ - if (fscanf(f, "%d %d %d", m, n, nz) != 3) { - printf("Error reading matrix header: m n nz\n"); - exit(1); - } - - - //We always create back edges if doesnt exist - - /* reserve memory for matrices */ - //if (mm_is_symmetric(matcode)){ - *i_idx = (int *) my_malloc(*nz *2 * sizeof(int)); - *j_idx = (int *) my_malloc(*nz *2 * sizeof(int)); - *a = (double *) my_malloc(*nz *2 * sizeof(double)); - - /* - } - else { - *i_idx = (int *) my_malloc(*nz * sizeof(int)); - *j_idx = (int *) my_malloc(*nz * sizeof(int)); - *a = (double *) my_malloc(*nz * sizeof(double)); - } - - if (!(*i_idx) || !(*j_idx) || !(*a)){ - printf ("cannot allocate memory for %d, %d, %d sparse matrix\n", *m, *n, *nz); - exit(1); - } - */ - - map,double> raw_edges; // map edge(u,v) - > indice of edges in *a - k=0; - for (i=0; i<*nz; i++) { - int u,v; - double d; - - if (mm_is_pattern(matcode)){ - if (fscanf(f, "%d %d", &u, &v) != 2) { - printf("Error reading matrix entry %i\n", i); - exit(1); - } - - - d = random_double(0.5, 1.0); - } - else if (mm_is_real(matcode)){ - if (fscanf(f, "%d %d %lg", &u, &v, &d) != 3) { - printf("Error reading matrix entry %i\n", i); - exit(1); - } - - } - u -= base; /* adjust from 1-based to 0-based */ - v -= base; - - raw_edges.insert({{u,v}, d}); - } - i=0; - for(auto& e : raw_edges) { - int u = e.first.first, v = e.first.second; - (*i_idx)[i] = u; - (*j_idx)[i] = v; - (*a)[i] = e.second; - ++i; - if(u != v && raw_edges.count({v,u}) == 0) { - (*i_idx)[*nz+k] = v; - (*j_idx)[*nz+k] = u; - (*a)[*nz+k] = (mm_is_symmetric(matcode)) ? e.second : 0.0; - k++; - } - } - - - *nz += k; - - fclose(f); - - coo2csr_in (*m, *nz, *a, i_idx, *j_idx); -} - -void sort(int *col_idx, double *a, int start, int end) -{ - int i, j, it; - double dt; - - for (i=end-1; i>start; i--) - for(j=start; j col_idx[j+1]){ - - if (a){ - dt=a[j]; - a[j]=a[j+1]; - a[j+1]=dt; - } - it=col_idx[j]; - col_idx[j]=col_idx[j+1]; - col_idx[j+1]=it; - - } -} - - - -/* converts COO format to CSR format, in-place, - if SORT_IN_ROW is defined, each row is sorted in column index. - On return, i_idx contains row_start position */ - -void coo2csr_in(int n, int nz, double *a, int **i_idx, int *j_idx) -{ - int *row_start; - int i, j; - int init, i_next, j_next, i_pos; - double dt, a_next; - - row_start = (int *)my_malloc((n+1)*sizeof(int)); - if (!row_start){ - printf ("coo2csr_in: cannot allocate temporary memory\n"); - exit (1); - } - for (i=0; i<=n; i++) row_start[i] = 0; - - /* determine row lengths */ - for (i=0; i \n", argv[0]); - exit(1); - } - - csr_graph g; // main graph structure - - // read mtx entry - read_mm_matrix(argv[1], &g.n, &g.n, &g.nnz, &g.row_offsets, &g.col_indices, &g.vals_cap); - - //Set edges to degrees for cap - g.set_edge_weights_rcp_degree(); - - std::ofstream out(argv[2]); - - - out << "p max " << g.n << " " << g.nnz << "\n"; - - - for(int u = 0; u < g.n; ++u) { - for(int i_edge = g.row_offsets[u]; i_edge != g.row_offsets[u+1]; ++i_edge) { - int cap = (1000000000.0 * g.vals_cap[i_edge]); - int v = g.col_indices[i_edge]; - out << "a " << u+1 << " " << v+1 << " " << cap << "\n"; - - if(g.edge(v, u) == -1) { - out << "a " << v+1 << " " << u+1 << " " << "0" << "\n"; - } - } - } - - return 0; -} diff --git a/maxflow/LICENSE b/maxflow/LICENSE deleted file mode 100644 index 3ba63d5..0000000 --- a/maxflow/LICENSE +++ /dev/null @@ -1,201 +0,0 @@ - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "{}" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright 2018 NVIDIA CORPORATION - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. diff --git a/maxflow/MPM/.MPM.cu.swm b/maxflow/MPM/.MPM.cu.swm deleted file mode 100644 index 96f63c2..0000000 Binary files a/maxflow/MPM/.MPM.cu.swm and /dev/null differ diff --git a/maxflow/MPM/.MPM.cu.swn b/maxflow/MPM/.MPM.cu.swn deleted file mode 100644 index b06ec05..0000000 Binary files a/maxflow/MPM/.MPM.cu.swn and /dev/null differ diff --git a/maxflow/MPM/.prune.cu.swn b/maxflow/MPM/.prune.cu.swn deleted file mode 100644 index 5e4d7bf..0000000 Binary files a/maxflow/MPM/.prune.cu.swn and /dev/null differ diff --git a/maxflow/MPM/.push_pull.cu.swn b/maxflow/MPM/.push_pull.cu.swn deleted file mode 100644 index 5007164..0000000 Binary files a/maxflow/MPM/.push_pull.cu.swn and /dev/null differ diff --git a/maxflow/MPM/MPM.cu b/maxflow/MPM/MPM.cu deleted file mode 100644 index 577259c..0000000 --- a/maxflow/MPM/MPM.cu +++ /dev/null @@ -1,190 +0,0 @@ -#include "MPM.h" -#include "../bfs/bfs.h" - -//Implementations of MPM functions members -#include "get_subgraph.cu" -#include "push_pull.cu" -#include "prune.cu" - -#include - -#include -#include "nvToolsExt.h" - -#include "../config.h" - -#define GPUID 0 -#define N_BLOCKS_MAX 65535 -#define N_THREADS 512 - - -MPM::MPM(csr_graph& _g) : g(_g) { - //TODO reduce number of mallocs - q_bfs = (int*)my_malloc((g.n+1) * sizeof(int)); - h = (int*)my_malloc((g.n) * sizeof(int)); - - node_mask = (char*)my_malloc(g.n * sizeof(char)); - queue_mask = (char*)my_malloc(g.n * sizeof(char)); - prune_mask = (char*)my_malloc(g.n * sizeof(char)); - have_been_pruned = (char*)my_malloc(g.n * sizeof(char)); - - node_g_to_sg = (int*)my_malloc(g.n * sizeof(int)); //TODO reuse Bfs - node_sg_to_g = (int*)my_malloc(g.n * sizeof(int)); - - edge_mask = (char*)my_malloc(g.nnz * sizeof(char)); - edge_mask_orig = (char*)my_malloc(g.nnz * sizeof(char)); - reverse_edge_map = (int*)my_malloc(g.nnz * sizeof(int)); - - cudaMalloc(&d_total_flow, sizeof(flow_t)); - e = (flow_t*)my_malloc(g.n * sizeof(flow_t)); - - //buffer for degree_in and degree_out - degree = (flow_t*)my_malloc((2 * g.n) * sizeof(flow_t)); - - bfs_offsets = (int*)my_malloc((g.n+1) * sizeof(int)); - sg_level_offsets = (int*)my_malloc((g.n+1) * sizeof(int)); - - cudaMalloc(&d_nsg, sizeof(int)); - - cudaMallocHost(&d_node_to_push, sizeof(int)); - cudaMallocHost(&d_flow_to_push, sizeof(flow_t)); - - cudaStreamCreate(&st1); - cudaStreamCreate(&st2); - - cudaMemset(d_total_flow, 0, sizeof(flow_t)); - cudaMemset(e, 0, sizeof(flow_t) * g.n); - cudaMemset(prune_mask, 0, sizeof(char) * g.n); - - - buf1 = (int*)my_malloc((g.n+1) * sizeof(int)); - buf2 = (int*)my_malloc((g.n+1) * sizeof(int)); - - sg_in.resize(g.n, g.nnz); - sg_out.resize(g.n, g.nnz); - - cf = g.vals_cap; //TODO alloc and copy - - //CUB memory - //Device Reduce - - cudaMalloc(&d_ppd, sizeof(post_prune_data)); - - cub::DeviceReduce::ArgMin(d_min_reduce, min_reduce_size, degree, &d_ppd->d_min, 2*g.n); - cudaMalloc(&d_min_reduce, min_reduce_size); - - //Partition (get subgraph) - cub::DevicePartition::Flagged(d_storage_partition, size_storage_partition, buf1, queue_mask, buf2, d_nsg, g.n); - cudaMalloc(&d_storage_partition, size_storage_partition); - - //Exclusive sum (get subgraph) - cub::DeviceScan::ExclusiveSum(d_storage_exclusive_sum, size_storage_exclusive_sum, buf1, buf2, g.n); - cudaMalloc(&d_storage_exclusive_sum, size_storage_exclusive_sum); - - - //Building reverse edge map - for(int u=0; u != g.n; ++u) { - for (int i = g.row_offsets[u]; i < g.row_offsets[u+1]; ++i) { - int v = g.col_indices[i]; - int uv = i; - int vu = g.edge(v,u); - reverse_edge_map[uv] = vu; - } - } - memFetch(); - cudaDeviceSynchronize(); -} - -__global__ void setup_mask_unsaturated_kernel(int num_edges, char *mask, flow_t *cf) -{ - for(int u= threadIdx.x + blockIdx.x * blockDim.x; - u < num_edges; - u += blockDim.x * gridDim.x) - mask[u] = (cf[u] > 0); -} - - - -bool setup_mask_unsaturated(int num_edges, char *mask, flow_t *cf) { - setup_mask_unsaturated_kernel<<>>(num_edges, mask, cf); - return true; -} - -//Main algorithm loop -flow_t MPM::maxflow(int _s, int _t, float *elapsed_time) { - s = _s; - t = _t; - - //TODO create cf - setup_mask_unsaturated(g.nnz, edge_mask_orig, cf); - - - int nsg; //number of nodes in subgraphh - - cudaDeviceSynchronize(); - - struct timespec start, end; - clock_gettime(CLOCK_MONOTONIC, &start); - cudaProfilerStart(); - while(bfs(g.row_offsets, g.col_indices, g.n, g.nnz, s, t, q_bfs, h, BFS_MARK_DEPTH, edge_mask_orig, bfs_offsets)) { - cudaDeviceSynchronize(); - cudaMemcpy(&ht, &h[t], sizeof(int), cudaMemcpyDeviceToHost); - init_level_graph(nsg); - cudaDeviceSynchronize(); - - nvtxRangePushA("saturate_subgraph"); - //Find node to push - usually done end of prune, but the first need to be done here - cub::DeviceReduce::ArgMin(d_min_reduce, min_reduce_size, degree_in+1, &(d_ppd->d_min), 2*(sg_in.n-1), st1); - - cudaMemcpy(&h_ppd, d_ppd, sizeof(post_prune_data), cudaMemcpyDeviceToHost); - do { - push_and_pull(); - prune(); - } while(!h_ppd.s_t_pruned); - nvtxRangePop(); - - } - - flow_t h_total_flow; - cudaMemcpy(&h_total_flow, d_total_flow, sizeof(flow_t), cudaMemcpyDeviceToHost); - - cudaProfilerStop(); - clock_gettime(CLOCK_MONOTONIC, &end); - *elapsed_time = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9; - - return h_total_flow; -} - -void MPM::memFetch() { - cudaMemPrefetchAsync(q_bfs, g.n * sizeof(int), 0, st1); - cudaMemPrefetchAsync(h, (g.n) * sizeof(int), 0, st1); - - cudaMemPrefetchAsync(node_mask, g.n * sizeof(char), 0, st1); - cudaMemPrefetchAsync(queue_mask, g.n * sizeof(char), 0, st1); - cudaMemPrefetchAsync(prune_mask, g.n * sizeof(char), 0, st1); - cudaMemPrefetchAsync(have_been_pruned, g.n * sizeof(char), 0, st1); - - cudaMemPrefetchAsync(node_g_to_sg, g.n * sizeof(int), 0, st1); //TODO reuse Bfs - cudaMemPrefetchAsync(node_sg_to_g, g.n * sizeof(int), 0, st1); - - cudaMemPrefetchAsync(edge_mask, g.nnz * sizeof(char), 0, st1); - cudaMemPrefetchAsync(edge_mask_orig, g.nnz * sizeof(char), 0, st1); - cudaMemPrefetchAsync(reverse_edge_map, g.nnz * sizeof(int), 0, st1); - - cudaMemPrefetchAsync(e, g.n * sizeof(flow_t), 0, st1); - - cudaMemPrefetchAsync(bfs_offsets, (g.n+1) * sizeof(int), 0, st1); - cudaMemPrefetchAsync(sg_level_offsets, (g.n+1) * sizeof(int), 0, st1); - - cudaMemPrefetchAsync(buf1, (g.n+1) * sizeof(int), 0, st1); - cudaMemPrefetchAsync(buf2, (g.n+1) * sizeof(int), 0, st1); - - cudaMemPrefetchAsync(g.row_offsets, g.n * sizeof(int), 0, st1); - cudaMemPrefetchAsync(g.col_indices, g.nnz * sizeof(int), 0, st1); - cudaMemPrefetchAsync(cf, g.nnz * sizeof(flow_t), 0, st1); -} - -MPM::~MPM() { - //TODO free on host - -} diff --git a/maxflow/MPM/MPM.h b/maxflow/MPM/MPM.h deleted file mode 100644 index 711db27..0000000 --- a/maxflow/MPM/MPM.h +++ /dev/null @@ -1,96 +0,0 @@ -#pragma once -#include "../matrix.h" -#include -#include "../cub/cub/cub.cuh" - -#include "../config.h" - - -using std::vector; - -using cub::KeyValuePair; -typedef KeyValuePair kvpid; - -struct post_prune_data { - kvpid d_min; - int prune_flag; //used to know if we're done with prune - int s_t_pruned; -}; - -class MPM { -public: - MPM(csr_graph &g); - flow_t maxflow(int s, int t, float *time); - __host__ __device__ virtual ~MPM(); - - //Should be private function members - void init_level_graph(int& nsg); - void get_node_to_push(); - void push_and_pull(); - void prune(); - void write_edges(); - void memFetch(); - - const csr_graph g; - - //Current query - int s, t; - - //BFS - int *q_bfs; - int *h; - int *bfs_offsets; - int ht; - - int *d_nsg; //size subgraph, on device - - int *d_node_to_push; - flow_t *d_flow_to_push; - - char *node_mask; // nodes active in layer graph - char *queue_mask; // nodes active in layer graph (using their bfs queue idx) - char *prune_mask; // nodes to prune - layer graph indexes - char *have_been_pruned; - char *edge_mask; // nodes to prune - layer graph indexes - char *edge_mask_orig; // nodes to prune - layer graph indexes - - flow_t *d_total_flow; //total flow pushed so far - on device - flow_t *e; //local excess - used in push/pull - flow_t *cf; //edge cap - edge flow - - //Degree - flow_t *degree; - flow_t *degree_in; - flow_t *degree_out; - - //Layer graph - csr_subgraph sg_in, sg_out; - int *node_sg_to_g; - int *node_g_to_sg; - int *sg_level_offsets; - int max_level_width; - - cudaStream_t st1, st2; //streams used by kernels - - //CUB - void *d_storage_partition = NULL; - size_t size_storage_partition = 0; - - void *d_storage_exclusive_sum = NULL; - size_t size_storage_exclusive_sum = 0; - - //Buffers - we may not need them - int *buf1, *buf2; - - //Can be removed if memory becomes a pb - int *reverse_edge_map; - - //Used by cub funcs in get_node_min - post_prune_data h_ppd, *d_ppd; - - void *d_min_reduce = NULL; - size_t min_reduce_size = 0; - -}; - - diff --git a/maxflow/MPM/MPM_gpu_kernels.cu b/maxflow/MPM/MPM_gpu_kernels.cu deleted file mode 100644 index ea44c9c..0000000 --- a/maxflow/MPM/MPM_gpu_kernels.cu +++ /dev/null @@ -1,92 +0,0 @@ -#include -#include //for cuda -#include "../cub/cub/cub.cuh" -#include "../allocator.h" - -#include //TODO just for debug (exit) - -#define THREADS_PER_VERTEX 32 -#define N_BLOCK_THREADS 512 //number of threads in a block -#define BLOCK_Y_SIZE (N_BLOCK_THREADS / THREADS_PER_VERTEX) - -//init edge events -cudaEvent_t memset_1, memset_2, start_init; -//argmin events -cudaEvent_t argmin_out, start_get_min; -//push and pull events -cudaEvent_t start_move_flow, end_move_flow; - -void createEvents() { - cudaEventCreate(&memset_1); - cudaEventCreate(&memset_2); - cudaEventCreate(&start_init); - cudaEventCreate(&start_get_min); - - cudaEventCreate(&argmin_out); - - cudaEventCreate(&start_move_flow); - cudaEventCreate(&end_move_flow); -} - - - -#include "../utils.cu" -#include "get_subgraph.cu" -#include "find_node_to_push.cu" -#include "prune.cu" -#include "push_pull.cu" -#include "device_gppp.cu" - -#define N_THREADS 512 - -#define N_MAX_BLOCKS 65534 - -dim3 getBlock2D() { - return dim3(THREADS_PER_VERTEX, BLOCK_Y_SIZE); -} -dim3 getGrid2D(int n) { - return dim3(1, min((n + BLOCK_Y_SIZE -1) / BLOCK_Y_SIZE, N_MAX_BLOCKS)); -} -dim3 getBlock1D() { - return dim3(N_THREADS); -} - -dim3 getGrid1D(int n) { - return dim3 (min((n + N_THREADS - 1) / N_THREADS, N_MAX_BLOCKS)); -} - -// -// Push/Pull function -// wiq_prune, ll be called by iterate_(in|out)_neighbors -// returns true if the caller should stop iterate -// -// DIRECTION : -// FORWARD : push from u to v -// BACKWARD : pull from u to v - - -#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); } -inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true) -{ - if (code != cudaSuccess) - { - fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line); - if (abort) exit(code); - } -} - -// -// Reduce-push-pull-prune kernel -// Used if layered graph is small enough -// - - - - - -// -// -// -// - - diff --git a/maxflow/MPM/MPM_implem.cu b/maxflow/MPM/MPM_implem.cu deleted file mode 100644 index 80c4fd2..0000000 --- a/maxflow/MPM/MPM_implem.cu +++ /dev/null @@ -1,8 +0,0 @@ -#include "MPM.h" -#include "../matrix.h" - -double maxflowimplementation(csr_graph* g, int s, int t, float *elapsed_time) { - MPM mpm(*g); - - return mpm.maxflow(s,t,elapsed_time); -} diff --git a/maxflow/MPM/csr_graph_mpm.h b/maxflow/MPM/csr_graph_mpm.h deleted file mode 100644 index 6d40fd3..0000000 --- a/maxflow/MPM/csr_graph_mpm.h +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -#pragma once - -#include "../matrix.h" - -//TODO inheritance nvgraph csr DS - -//csr graph using same edge idx than its parent -struct csr_graph_mpm : public csr_graph_reverse { - - //nodes degree - double *degree_in; - double *degree_out; - - //Mask to know if an edge is active - char *edge_mask; - int *edge_mask_orig; //active in the orig graph TODO refactoring - //Distance source-node - int *h; - - int *buf1; - int *buf2; - - int *sg_level_offsets; - - int ht; //depth of sink - //buffer must be of size at least g->n - csr_graph_mpm(const csr_graph& g, int *buffer) : - csr_graph_reverse(g, buffer), - cf(g.vals_cap) //TODO copy if we want the details - { - - buf1 = (int*)my_malloc(g.n * sizeof(int)); //TODO use bitset - buf2 = (int*)my_malloc(g.n * sizeof(int)); //TODO use bitset - sg_level_offsets = (int*)my_malloc((g.n+1) * sizeof(int)); //TODO use bitset - - h = (int*)my_malloc(g.n * sizeof(int)); - } - - - virtual void memFetch(int deviceID) { - cudaMemPrefetchAsync(cf, nnz * sizeof(double), deviceID, 0); - cudaMemPrefetchAsync(edge_mask, nnz * sizeof(int), deviceID, 0); - - cudaMemPrefetchAsync(buf2, n * sizeof(int), deviceID, 0); - cudaMemPrefetchAsync(buf1, n * sizeof(int), deviceID, 0); - - cudaMemPrefetchAsync(node_g_to_sg, n * sizeof(int), deviceID, 0); - cudaMemPrefetchAsync(node_sg_to_g, n * sizeof(int), deviceID, 0); - cudaMemPrefetchAsync(sg_level_offsets, (n+1) * sizeof(int), deviceID, 0); - - csr_graph_reverse::memFetch(deviceID); - } - - - //mess with the copy of struct to gpu - /* - virtual ~csr_graph_mpm() { - my_free(degree_in); - my_free(degree_out); - my_free(edge_mask); - my_free(h); - } - */ -}; - - diff --git a/maxflow/MPM/device_gppp.cu b/maxflow/MPM/device_gppp.cu deleted file mode 100644 index 8f7e27a..0000000 --- a/maxflow/MPM/device_gppp.cu +++ /dev/null @@ -1,116 +0,0 @@ -//Meta operation -//loop find node/push pull/prune in device mem - - - -#define RPPP_X 32 -#define RPPP_Y 16 -#define RPPP_NTHREADS (RPPP_X * RPPP_Y) - -__global__ void reduce_push_pull_prune(csr_graph g, - csr_subgraph sg_in, - csr_subgraph sg_out, - int ht, - int *s_t_pruned, - double *degree_in, - double *degree_out, - int *h, - int *node_sg_to_g, - char *edge_mask, - char *edge_mask_orig, - int *reverse_edge_map, - double *cf, - double *e, - char *prune_mask, - char *have_been_pruned, - int *sg_level_offsets, - double *d_total_flow) { - int n = sg_in.n; - /* - __shared__ char s_t_pruned; - __shared__ double degree_in[25]; - __shared__ double degree_out[25]; - - //use __ballot and bitset - __shared__ char edge_mask[12]; // bank conflits - use ballot for writing - //Dot not use mask for nodes for now - too complicated - //Not useful for 32-bits vals - //Load offsets ? - - //only one thread can set a bit to 1, and random access : bitset with atomicOr - //bitset - __shared__ char prune_mask[12]; - __shared__ char have_been_pruned[12]; - __shared__ double e[12]; - */ - - //Init - int ithread = threadIdx.x + (blockDim.x) * threadIdx.y; - - if(ithread == 0) { - *s_t_pruned = 0; - } - __syncthreads(); - //End init - - do { - // - // Step 1 : Find node to push - // - __shared__ int node_to_push; - __shared__ double flow_to_push; - device_get_node_to_push(degree_in, - degree_out, - n, - node_to_push, - flow_to_push, - ithread, - d_total_flow); - - int level_node_to_push = h[node_sg_to_g[node_to_push]]; - - __syncthreads(); - // - // Step 2 : Push/Pull - // - - device_push_pull(degree_in, - degree_out, - edge_mask, - edge_mask_orig, - reverse_edge_map, - cf, - e, - prune_mask, - node_to_push, - flow_to_push, - level_node_to_push, - sg_in, - sg_out, - sg_level_offsets, - ht, - ithread); - - __syncthreads(); - - // - // Step 3 : Prune - // - device_prune(ithread, - RPPP_NTHREADS, - degree_in, - degree_out, - cf, - prune_mask, - have_been_pruned, - edge_mask, - sg_in, - sg_out, - s_t_pruned); - - __syncthreads(); - - break; - } while(!*s_t_pruned); -} - diff --git a/maxflow/MPM/get_node_to_push.cu b/maxflow/MPM/get_node_to_push.cu deleted file mode 100644 index e5323b4..0000000 --- a/maxflow/MPM/get_node_to_push.cu +++ /dev/null @@ -1,32 +0,0 @@ -#include "MPM.h" -#include "../utils.cu" - - - -void MPM::get_node_to_push() { - - //printf("height winner : %i \n", *h_h_node_to_push); -} - -template -__device__ void device_get_node_to_push(double *in_degree, - double *out_degree, - int n, - int &node_to_push, - double &flow_to_push, - const int ithread, - double *d_total_flow) { - cub::ArgMin argmin; - kvpid argmin_in = blockArgMin(in_degree+1, n-1); //avoiding source - argmin_in.key += 1; - kvpid argmin_out = blockArgMin(out_degree, n-1); - - if(ithread == 0) { - kvpid m = argmin(argmin_in, argmin_out); - - node_to_push = m.key; - flow_to_push = m.value; - *d_total_flow += flow_to_push; - printf("-> pushing %i with %f (in d) \n", node_to_push, flow_to_push); - } -} diff --git a/maxflow/MPM/get_subgraph.cu b/maxflow/MPM/get_subgraph.cu deleted file mode 100644 index f3d3628..0000000 --- a/maxflow/MPM/get_subgraph.cu +++ /dev/null @@ -1,307 +0,0 @@ -#include "../cub/cub/cub.cuh" -#include "../utils.cu" - -#include "../config.h" -#include "nvToolsExt.h" - -#define INIT_G_BLOCK_X 32 -#define WRITE_EDGES_DIM_X 32 -#define NTHREADS 512 - -__global__ void reverse_hash(int *reversed, int *hash, int num_items) { - for(int u = blockDim.x * blockIdx.x + threadIdx.x; - u < num_items; - u += blockDim.x * gridDim.x) { - - reversed[hash[u]] = u; - } -} - -void MPM::write_edges() { - auto f_edge = [*this] __device__ (const int from, - const int to, - const int i_edge, - flow_t °ree_in_thread, - flow_t °ree_out_thread, - int &in_edge_offset, - int &out_edge_offset) { - - - int rev_i_edge = (i_edge != -1) ? reverse_edge_map[i_edge] : -1; - - int is_valid_out_edge = (i_edge != -1) && edge_mask[i_edge]; - int is_valid_in_edge = (rev_i_edge != -1) && edge_mask[rev_i_edge]; - - typedef cub::WarpScan WarpScan; - __shared__ typename WarpScan::TempStorage temp_storage_scan[NTHREADS/WRITE_EDGES_DIM_X]; - - // Compute exclusive warp-wide prefix sums - - int ithread = threadIdx.x + blockDim.x * threadIdx.y + (blockDim.x * blockDim.y) * threadIdx.z; - int warpid = ithread / WRITE_EDGES_DIM_X; - - int idx_in_edge = is_valid_in_edge, idx_out_edge = is_valid_out_edge; - int n_in_edge_in_warp, n_out_edge_in_warp; - WarpScan(temp_storage_scan[warpid]).ExclusiveSum(idx_in_edge,idx_in_edge, n_in_edge_in_warp); - __syncthreads(); - WarpScan(temp_storage_scan[warpid]).ExclusiveSum(idx_out_edge,idx_out_edge, n_out_edge_in_warp); - - //printf("(%i) u:%i edge:%i ; is in:%i (scan:%i) ; is out:%i (scan:%i) \n", threadIdx.x, from, i_edge, is_valid_in_edge, idx_in_edge, is_valid_out_edge, idx_out_edge); - //scan is done, lets return inactive edges - //printf("u=%i, tx=%i, active=%i, sum=%i, i_edge=%i \n", from, threadIdx.x, is_edge_active, write_idx_thread, i_edge); - - if(is_valid_out_edge) { - //Computing degree - degree_out_thread += cf[i_edge]; - - //Writing edges - int write_idx = out_edge_offset + idx_out_edge; - sg_out.parent_edge_indices[write_idx] = i_edge; - sg_out.col_indices[write_idx] = node_g_to_sg[to]; - - //printf("(%i,%i,%i) writing edge=%i (ig:%i) %i -> %i (g:%i) \n", threadIdx.x, threadIdx.y, threadIdx.z, write_idx, i_edge, from, g.node_g_to_sg[to], to); - } else if(is_valid_in_edge) { - degree_in_thread += cf[rev_i_edge]; - - //Writing edges - int write_idx = in_edge_offset + idx_in_edge; - sg_in.parent_edge_indices[write_idx] = rev_i_edge; - sg_in.col_indices[write_idx] = node_g_to_sg[to]; - } - - out_edge_offset += n_out_edge_in_warp; - in_edge_offset += n_in_edge_in_warp; - - }; - - auto f_node = [*this, f_edge] __device__ (const int u) { - typedef cub::WarpReduce WarpReduce; - __shared__ typename WarpReduce::TempStorage temp_storage_reduce[NTHREADS/WRITE_EDGES_DIM_X]; - flow_t in_degree_thread = 0, out_degree_thread = 0; - int out_edge_offset = sg_out.edge_offsets[u], in_edge_offset = sg_in.edge_offsets[u]; - int u_g = node_sg_to_g[u];//u in g; - - iterate_on_edges(u_g, g, f_edge, in_degree_thread, out_degree_thread, in_edge_offset, out_edge_offset); - - int ithread = threadIdx.x + blockDim.x * threadIdx.y + (blockDim.x * blockDim.y) * threadIdx.z; - int warpid = ithread / WRITE_EDGES_DIM_X; - - - flow_t total_in_degree = WarpReduce(temp_storage_reduce[warpid]).Sum(in_degree_thread); - __syncthreads(); - flow_t total_out_degree = WarpReduce(temp_storage_reduce[warpid]).Sum(out_degree_thread); - __syncthreads(); - - if(threadIdx.x == 0) { - degree_in[u] = total_in_degree; - degree_out[u] = total_out_degree; - } - }; - - - dim3 grid,block; - - block.x = WRITE_EDGES_DIM_X; - block.y = 1; - grid.x = 1; - grid.y = 1; - - - apply_on_graph(sg_in.n, f_node, grid, block, 0, st1); -} - -void MPM::init_level_graph(int &nsg) { - nvtxRangePushA("get_subgraph"); - - cudaMemset(edge_mask, 0, sizeof(char) * g.nnz); - cudaMemset(node_mask, 0, sizeof(char) * g.n); - cudaMemset(queue_mask, 0, sizeof(char) * g.n); - cudaMemset(d_ppd, 0, sizeof(post_prune_data)); //resetting s_t_pruned and prune_flag - - auto f_edge_init = [*this] __device__ (const int from, const int to, const int i_edge, int *n_in_edges, int *n_out_edges) { - if(i_edge != -1) { - int hto = h[to]; - int hfrom = h[from]; - if((hto + 1) == hfrom) { // going backward - int rev_i_edge = reverse_edge_map[i_edge]; - - if(edge_mask_orig[rev_i_edge]) { - edge_mask[rev_i_edge] = 1; //the edge is part of subgraph - node_mask[to] = 1; //to is part of subgraph - atomicAdd(n_in_edges, 1); - - } - } else if(edge_mask[i_edge] && (hfrom + 1) == hto) { //going forward - atomicAdd(n_out_edges, 1); - } - } - }; - - - auto f_node_init = [*this, f_edge_init] __device__ (int idx, int u) { - if(!node_mask[u]) return; - - if(threadIdx.x == 0) { - sg_in.edge_offsets[idx] = 0; - sg_out.edge_offsets[idx] = 0; - } - __syncthreads(); //this code shouldnt diverge - - iterate_on_edges(u, g, f_edge_init, &sg_in.edge_offsets[idx], &sg_out.edge_offsets[idx]); - - if(threadIdx.x == 0) { - queue_mask[idx] = 1; - //printf("Node %i, in=%i, out=%i \n", u, sg_in.edge_offsets[idx], sg_out.edge_offsets[idx]); - } - //printf("node %i, direction %i, sum %f \n", u, direction, degree_set[u]); - - }; - - //localgrid and block : what we actually need in our lambdas, the z-axis will be use and defined internaly in iterate_on_levels - dim3 localGrid(1, 1); - dim3 localBlock(INIT_G_BLOCK_X, 1); - - char one = 1; - cudaMemcpy(&node_mask[t], &one, sizeof(char), cudaMemcpyHostToDevice); - iterate_on_levels(ht, - 0, - q_bfs, - bfs_offsets, - f_node_init, - localGrid, - localBlock, - 1, // nodes per thread - 0, - 0); - //Creating new list containing only vertices in layered graph - - //TODO we dont need g.n, we just need bfs_offsets[ht...] - - cudaEvent_t nsg_on_host; - - //List nodes in layer - PartitionFlagged(node_sg_to_g, q_bfs, queue_mask, g.n, d_nsg, d_storage_partition, size_storage_partition); - cudaMemcpy(&nsg, d_nsg, sizeof(int), cudaMemcpyDeviceToHost); - //in edge count in layer - PartitionFlagged(buf1, sg_in.edge_offsets, queue_mask, g.n, d_nsg, d_storage_partition, size_storage_partition); - //out edge count in layer - PartitionFlagged(buf2, sg_out.edge_offsets, queue_mask, g.n, d_nsg, d_storage_partition, size_storage_partition); - - - sg_in.n = nsg; - sg_out.n = nsg; - //printf("nsg is %i \n", nsg); - - degree_in = degree; - degree_out = degree + nsg; - //printf("in layered graph : %i \n", nsg); - //TODO we could resize sg here (n) - - // - // Compute offsets - // - ExclusiveSum(sg_in.edge_offsets, buf1, nsg+1, d_storage_exclusive_sum, size_storage_exclusive_sum); //we dont care about whats in g.buf[nsg] (exclusive sum), but we need the nsg+1 first elements of the sum - ExclusiveSum(sg_out.edge_offsets, buf2, nsg+1, d_storage_exclusive_sum, size_storage_exclusive_sum); - - //TODO we could resize sg here (nnz) - - /* - - cudaDeviceSynchronize(); - for(int i=0; i != nsg; ++i) { - printf("sg_node %i (h=%i), g_node %i, in_edge %i, out_edge %i offset in %i, offset out %i \n", - i, - h[node_sg_to_g[i]], - node_sg_to_g[i], - buf1[i], - buf2[i], - sg_in.edge_offsets[i], - sg_out.edge_offsets[i]); - - } - */ - - dim3 block, grid; - block.x = 256; - grid.x = min((nsg + block.x - 1)/block.x, N_MAX_BLOCKS); - reverse_hash<<>>(node_g_to_sg, node_sg_to_g, nsg); - - //TODO cudaLaunch returns 0x7 in write_edges - write_edges(); - -/* - cudaDeviceSynchronize(); - printf("bfs off : \n"); - for(int i=0; i!=ht+2; ++i) - printf("%i\t", bfs_offsets[i]); - printf("\n"); -*/ - - SegmentedReduce(buf1, queue_mask, bfs_offsets, ht+2); - - - -/* - printf("counts : \n"); - for(int i=0; i!=ht+2; ++i) - printf("%i\t", buf1[i]); - printf("\n"); - -*/ - ExclusiveSum(sg_level_offsets, buf1, ht+2, d_storage_exclusive_sum, size_storage_exclusive_sum); - - cudaDeviceSynchronize(); - //CPU pagefaults - max_level_width = 0; - for(int i=0; i != (ht+1); ++i) - max_level_width = max(max_level_width, sg_level_offsets[i+1] - sg_level_offsets[i]); - //printf("max level width : %i \n", max_level_width); - - /* - cudaDeviceSynchronize(); - printf("SCAN : \n"); - for(int i=0; i!=ht+2; ++i) - printf("%i\t", g.sg_level_offsets[i]); - printf("\n"); - - - - - cudaDeviceSynchronize(); - printf("levels offsets: \n"); - for(int i=0; i != ht+2; ++i) - printf("(%i) %i : %i\n", i, buf1[i], sg_level_offsets[i]); - printf("\n"); - */ -/* - - for(int i=0; i != nsg; ++i) { - printf("sg_node %i (h=%i), g_node %i, din=%f, dout=%f \n out edges : ", - i, - h[node_sg_to_g[i]], - node_sg_to_g[i], - degree_in[i], - degree_out[i]); - - for(int i_edge = sg_out.edge_offsets[i]; - i_edge != sg_out.edge_offsets[i+1]; - ++i_edge) - printf("%i (g:%i)\t", sg_out.col_indices[i_edge], node_sg_to_g[sg_out.col_indices[i_edge]]); - - printf("\n in edges :"); - for(int i_edge = sg_in.edge_offsets[i]; - i_edge != sg_in.edge_offsets[i+1]; - ++i_edge) - printf("%i (g:%i)\t", sg_in.col_indices[i_edge], node_sg_to_g[sg_in.col_indices[i_edge]]); - - printf("\n"); - - } - -*/ - //TODO we use it in prune - cudaMemset(queue_mask, 0, sizeof(char) * g.n); - cudaDeviceSynchronize(); - - nvtxRangePop(); -} diff --git a/maxflow/MPM/prune.cu b/maxflow/MPM/prune.cu deleted file mode 100644 index 66eedb1..0000000 --- a/maxflow/MPM/prune.cu +++ /dev/null @@ -1,145 +0,0 @@ -#include "MPM.h" - - -#include "nvToolsExt.h" - -#include "../config.h" - -#define PRUNE_DIM_X 8 - -// -//Prune : removing nodes will null throughput -//First level is detected while executing push/pull -// - -__device__ bool edge_op_prune(const int node_pruned, - const int node_to_update, - const int i_edge, - char *prune_mask, - flow_t *degree_to_update, - flow_t *cf, - char *edge_mask, - const csr_subgraph &sg_in, - const csr_subgraph &sg_out, - post_prune_data *d_ppd, - const int cur_flag) { - - if(i_edge == -1 || !edge_mask[i_edge]) return false; //if this is not a real edge, quit now - flow_t cf_edge = cf[i_edge]; - - if(!isZero(cf_edge)) { - edge_mask[i_edge] = 0; - flow_t new_degree = atomicAdd(°ree_to_update[node_to_update], -cf_edge) - cf_edge; //TODO shouldnt have atomics - if(isZero(new_degree)) { - prune_mask[node_to_update] = 1; - d_ppd->prune_flag = cur_flag; - } - } - - - return false; //we have to iterate through all edges, we're not done - -} - - -template -__device__ void node_op_prune(const int u, - const csr_subgraph &sg_in, - const csr_subgraph &sg_out, - F1 f_forward_edge, - F2 f_backward_edge, - char *prune_mask, - flow_t *in_degree, - flow_t *out_degree, - post_prune_data *d_ppd, - const int flag) { - - if(!prune_mask[u]) return; - - prune_mask[u] = 0; - //have_been_pruned[u] = 1; - //if(threadIdx.x == 0) - // printf("pruning %i \n", u); - - if(u == 0 || u == (sg_in.n-1)) { //s is 0, t is sg_in-1 - d_ppd->s_t_pruned = 1; return; - } - - //those two operations dont need to be serial - //Deleting edges and updating neighbor's d_out - //TODO warp div - iterate_on_edges(u, sg_out, f_forward_edge, flag); - iterate_on_edges(u, sg_in, f_backward_edge, flag); - - in_degree[u] = FLOW_INF; - out_degree[u] = FLOW_INF; -} - - -void MPM::prune() { - - auto f_forward_edge = [*this] __device__ (const int node_pruned, - const int node_to_update, - const int i_edge, - const int flag) { - return edge_op_prune(node_pruned, node_to_update, i_edge, prune_mask, degree_in, cf, edge_mask, sg_in, sg_out, d_ppd, flag); - }; - - auto f_backward_edge = [*this] __device__ (const int node_pruned, - const int node_to_update, - const int i_edge, - const int flag) { - return edge_op_prune(node_pruned, node_to_update, i_edge, prune_mask, degree_out, cf, edge_mask, sg_out, sg_in, d_ppd, flag); //TODO remove sg_in, sg_out - }; - - auto f_node_flag = [*this, f_backward_edge, f_forward_edge] __device__ (const int node, const int flag) { - node_op_prune(node, sg_in, sg_out, f_forward_edge, f_backward_edge, prune_mask, degree_in, degree_out, d_ppd, flag); - }; - - - nvtxRangePushA("prune"); - - // - // End reduce - // - - dim3 localBlock, localGrid; - localBlock.x = PRUNE_DIM_X; - localBlock.y = 1; - - localGrid.x = 1; - localGrid.y = 1; - - bool done = false; - int niters = 1; //do 3 iters at first - int last_flag = 0; - - while(!done) { - for(int it=0; it != niters; ++it) { - ++last_flag; - auto f_node = [last_flag, f_node_flag] __device__ (const int node) { - f_node_flag(node, last_flag); - }; - apply_on_graph(sg_in.n, f_node, localGrid, localBlock, 0, st1); - } - //bug on cub - - //Find node to push - cub::DeviceReduce::ArgMin(d_min_reduce, min_reduce_size, degree_in+1, &d_ppd->d_min, 2*(sg_in.n-1), st1); - cudaMemcpyAsync(&h_ppd, d_ppd, sizeof(post_prune_data), cudaMemcpyDeviceToHost, st1); - cudaStreamSynchronize(st1); - done = (h_ppd.prune_flag != last_flag) || h_ppd.s_t_pruned; - niters *= 2; - //if(!done) - // printf("lets go again - iter=%i \n", niters); - } - - - //printf("s_t_pruned : %i \n", h_ppd.s_t_pruned); - - //TODO could be a simple memset on have_been_pruned if *s_t_pruned - nvtxRangePop(); -} - - - diff --git a/maxflow/MPM/push_pull.cu b/maxflow/MPM/push_pull.cu deleted file mode 100644 index ddfcebd..0000000 --- a/maxflow/MPM/push_pull.cu +++ /dev/null @@ -1,249 +0,0 @@ - -#include "../config.h" -#include "nvToolsExt.h" - -// -// Pull/Push flow from/to source -// - -#define MOVE_FLOW_DIM_X 32 -#define MOVE_FLOW_DIM_Y 1 - -__host__ __device__ int imin_to_node_to_push(int imin, int nsg) { - imin += 1;; - return (imin < nsg) ? imin : (imin - nsg); -} - -template -__global__ void first_push_pull(post_prune_data *d_ppd, - flow_t *d_total_flow, - csr_graph g, - int nsg, - int *h, - flow_t *e, - int *node_sg_to_g, - F1 f_node_push, - F2 f_node_pull) { - - int node_to_push = imin_to_node_to_push(d_ppd->d_min.key, nsg); - flow_t flow_to_push = d_ppd->d_min.value; - - //Flag need to be reset before next prune - d_ppd->prune_flag = 0; - - int ithread = threadIdx.y * blockDim.x + threadIdx.x; - - if(ithread == 0) { - *d_total_flow += flow_to_push; - //printf("---------- pushing %i with %f - gn=%i \n", node_to_push, flow_to_push, nsg); - } - - switch(threadIdx.y) { - case 0: //push - f_node_push(node_to_push, flow_to_push); - break; - case 1: //pull - f_node_pull(node_to_push, flow_to_push); - break; - } -} - -template -__device__ bool edge_op_move_flow(const int from, - const int to, - const int i_edge, - flow_t &to_push, - flow_t *degree_to, - char *edge_mask, - char *edge_mask_orig, - char *prune_mask, - flow_t *cf, - flow_t *e, - const int *reverse_edge_map, - const int sg_t) { - - flow_t cf_edge = (i_edge != -1 && edge_mask[i_edge]) ? cf[i_edge] : 0; - - // - // Exclusive sum of edges available capacities (cf = cap - flow) - // If exclusive sum is >= to_push -> nothing to do for this thread - // Else if exclusive + cf_edge <= to_push do a full push - // Else do a partial push of to_push - exclusive - // - - typedef cub::WarpScan WarpScan; - __shared__ typename WarpScan::TempStorage temp_storage[512/THREADS_VERTEX]; //TODO size, multiple thread - int ithread = threadIdx.x + threadIdx.y * blockDim.x + threadIdx.z * (blockDim.x * blockDim.y); - int i_logical_warp = ithread / THREADS_VERTEX; - flow_t aggregate, exclusive_sum; - exclusive_sum = cf_edge; - WarpScan(temp_storage[i_logical_warp]).ExclusiveSum(exclusive_sum, exclusive_sum, aggregate); - //printf("(%i,W:%i,XD:%i) U:%i Value:%f Scan:%f \n", threadIdx.x, i_logical_warp, THREADS_VERTEX, from, cf_edge, exclusive_sum); - //Do not kill the threads with cf_edge = 0 - //We need to update to_push - - //if(i_edge != -1) - //printf("raw : %i -> %i : %i : %f (%f, %i) \n", from, to, i_edge, cf_edge, cf[i_edge], edge_mask[i_edge]); - - if(!isZero(cf_edge)) { - flow_t local_push = 0; - int rev_i_edge; - if(exclusive_sum < to_push) { - local_push = min(cf_edge, to_push - exclusive_sum); - rev_i_edge = reverse_edge_map[i_edge]; - } - if(isZero(cf_edge - local_push)) { - //i_edge is now saturated - cf[i_edge] = 0; - //printf("edge %i is going down \n", i_edge); - edge_mask[i_edge] = 0; - edge_mask_orig[i_edge] = 0; - - //rev_i_edge cant be in layer graph (only edges to next level) - cf[rev_i_edge] += cf_edge; - edge_mask_orig[rev_i_edge] = 1; - - } else if(local_push > 0) { - //partial push on i_edge - cf[i_edge] -= local_push; - cf[rev_i_edge] += local_push; - edge_mask_orig[rev_i_edge] = 1; - } - if(local_push > 0) { - - //printf("moving %f from %i to %i - exlu sum : %f \n", local_push, from, to, exclusive_sum); - - //Assign local_push flow to to - - //Multiple nodes on the same level can have an edge going to to - //We need atomics here - - //We don't push to s or t - //Avoiding useless atomics + avoiding memset (to set e[t'] = 0 at the end) - if(to != 0 && to != sg_t) - atomicAdd(&e[to], local_push); - - //if(MOVE_FLOW_MASK) - // move_flow_mask[to] = 1; - - //Multiple nodes on the same level can have an edge going to to - //We need atomics here - flow_t new_degree_to = atomicAdd(°ree_to[to], -local_push) - local_push; //atomicAdd is postfix - if(isZero(new_degree_to)) - prune_mask[to] = 1; - //printf("new degree from %i %f, to %i %f \n", from, degree_from[from], to, new_degree_to); - - } - } - - to_push -= aggregate; - return (to_push <= 0); //we're done if nothing left to push -} - - - - -void MPM::push_and_pull() { - auto f_edge_push = [*this] __device__ (const int from, const int to, const int i_edge, flow_t &to_push) { - edge_op_move_flow(from, to, i_edge, to_push, degree_in, edge_mask, edge_mask_orig, prune_mask, cf, e, reverse_edge_map, sg_in.n-1); - }; - - auto f_edge_pull = [*this] __device__ (const int from, const int to, const int i_edge, flow_t &to_pull) { - edge_op_move_flow(from, to, i_edge, to_pull, degree_out, edge_mask, edge_mask_orig, prune_mask, cf, e, reverse_edge_map, sg_in.n-1); - }; - - auto f_node_push = [*this, f_edge_push] __device__ (const int u, flow_t to_push) { - if(isZero(to_push)) return; //it is an exact 0 - //printf("will push %f from %i \n", to_push, u); - flow_t pushed = to_push; - iterate_on_edges(u, sg_out, f_edge_push, to_push); - - //printf("(%i) Post Push \n", threadIdx.x); - if(threadIdx.x == 0) { - flow_t new_degree = (degree_out[u] -= pushed); - //printf("%p new degree out[%i] = %f \n", degree_out, u, new_degree); - if(isZero(new_degree)) { - prune_mask[u] = 1; - } - e[u] = 0; - } - - }; - - auto f_node_pull = [*this, f_edge_pull] __device__ (const int u, flow_t to_pull) { - if(isZero(to_pull)) return; //it is an exact 0 - //printf("will pull %f from %i \n", to_pull, u); - flow_t pulled = to_pull; - iterate_on_edges(u, sg_in, f_edge_pull, to_pull); - - if(threadIdx.x == 0) { - flow_t new_degree = (degree_in[u] -= pulled); - //printf("%p new degree in[%i] = %f \n", degree_in, u, new_degree); - if(isZero(new_degree)) { - prune_mask[u] = 1; - } - - e[u] = 0; - } - - }; - - auto f_node_push_e = [*this, f_node_push] __device__ (const int u) { - f_node_push(u, e[u]); - }; - - auto f_node_pull_e = [*this, f_node_pull] __device__ (const int u) { - f_node_pull(u, e[u]); - }; - - nvtxRangePushA("push_pull"); - - - //Launching first push pull - dim3 sgrid, sblock; - sgrid.x =1, sgrid.y = 1, sgrid.z = 1; - sblock.x = 32, sblock.y = 2, sblock.z = 1; - - first_push_pull<<>>(d_ppd, - d_total_flow, - g, - sg_in.n, - h, - e, - node_sg_to_g, - f_node_push, - f_node_pull); - - //Computing h[node_to_push] in the meantime - kvpid h_min = h_ppd.d_min; - int node_to_push = imin_to_node_to_push(h_min.key, sg_in.n); - //printf("on host : %i (%f) \n", node_to_push, h_min.value); - int level_node_to_push = 0; - //could do a binary search - while(node_to_push >= sg_level_offsets[level_node_to_push+1]) - level_node_to_push++; - - dim3 grid, block; - block.x = MOVE_FLOW_DIM_X; - block.y = MOVE_FLOW_DIM_Y; - - grid.x = 1; - grid.y = 1; - //cudaEvent_t pull_done; - //cudaEventCreate(&pull_done); - - iterate_on_levels(level_node_to_push+1, ht-1, sg_level_offsets, f_node_push_e, grid, block, 1, 0, st1, max_level_width); - - iterate_on_levels(level_node_to_push-1, 1, sg_level_offsets, f_node_pull_e, grid, block, 1, 0, st1, max_level_width); - - //cudaStreamWaitEvent(st1, pull_done, 0); - //cudaEventRecord(pull_done, st2); - - - nvtxRangePop(); - - -} - - - diff --git a/maxflow/Makefile b/maxflow/Makefile deleted file mode 100644 index 69f862d..0000000 --- a/maxflow/Makefile +++ /dev/null @@ -1,77 +0,0 @@ -CCXX = g++ -NVCC = nvcc - -GPU_TARGETS= edmonds-karp-gpu-naive push-relabel-gpu-naive mpm-gpu-naive -GPU_TARGETS_GUNROCK= edmonds-karp-gpu-gunrock push-relabel-gpu-gunrock -CPU_TARGETS= edmonds-karp-cpu push-relabel-cpu boost-push-relabel -TARGETS= $(GPU_TARGETS) $(GPU_TARGETS_GUNROCK) $(CPU_TARGETS) - -CXXFLAGS = -O3 -fno-optimize-sibling-calls --std=c++11 -NVCC_FLAGS = -DUSE_GPU -O3 --std=c++11 --expt-extended-lambda -lnvToolsExt - -# add support for more cuda architectures below -#NVCC_ARCH += -gencode arch=compute_35,code=sm_35 -#NVCC_ARCH += -gencode arch=compute_52,code=sm_52 -NVCC_ARCH += -gencode arch=compute_60,code=sm_60 - -# update gunrock location if necessary -GUNROCK_DIR = ./gunrock -GUNROCK_OPTS = $(GUNROCK_DIR)/gunrock/util/test_utils.cu $(GUNROCK_DIR)/gunrock/util/error_utils.cu $(GUNROCK_DIR)/externals/moderngpu/src/mgpucontext.cu $(GUNROCK_DIR)/externals/moderngpu/src/mgpuutil.cpp --std=c++11 -I$(GUNROCK_DIR)/externals/moderngpu/include -I$(GUNROCK_DIR)/externals/cub -Xcompiler -DMETIS_FOUND -isystem $(GUNROCK_DIR)/gunrock -isystem $(GUNROCK_DIR) -GUNROCK_LIBS = -L$(GUNROCK_DIR)/build/lib -lgunrock -Xlinker -lboost_system -Xlinker -lboost_chrono -Xlinker -lboost_timer -Xlinker -lboost_filesystem -Xlinker -lgomp -Xlinker -lmetis - -CLEAN=rm -Rf *.o core bfs/*.o edmonds-karp/*.o push-relabel/*.o MPM/*.o boost_push_relabel/*.o galois-preflowpush - -ifeq ($(LOG_LEVEL),) - LOG_LEVEL = 0 -endif - -.PHONY: all_cpu all_gpu clean - -all_gpu: clean $(GPU_TARGETS) $(GPU_TARGETS_GUNROCK) -all_cpu: clean $(CPU_TARGETS) - -$(GPU_TARGETS) $(GPU_TARGETS_GUNROCK): C = $(NVCC) -$(GPU_TARGETS) $(GPU_TARGETS_GUNROCK): CFLAGS = $(NVCC_FLAGS) $(NVCC_ARCH) - -$(CPU_TARGETS): C = $(CCXX) -$(CPU_TARGETS): CFLAGS = $(CXXFLAGS) - - -boost-push-relabel: boost_push_relabel/push-relabel.o -edmonds-karp-cpu: bfs/bfs_cpu.o edmonds-karp/edmonds-karp.o maxflow.o -edmonds-karp-gpu-naive: bfs/bfs_gpu_naive.o edmonds-karp/edmonds-karp.o maxflow.o -edmonds-karp-gpu-gunrock: edmonds-karp/edmonds-karp.o maxflow.o - $(NVCC) $(NVCC_FLAGS) $(NVCC_ARCH) $(GUNROCK_OPTS) $(GUNROCK_LIBS) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ $^ bfs/bfs_gpu_gunrock.cu - - -mpm-gpu-naive: bfs/bfs_gpu_naive.o MPM/MPM_implem.o maxflow.o MPM/MPM.o - -push-relabel-cpu: push-relabel/push-relabel_operations_cpu_omp.o graph_tools_cpu.o bfs/bfs_cpu.o push-relabel/push-relabel.o maxflow.o -push-relabel-gpu-naive: push-relabel/push-relabel_operations_gpu.o graph_tools_gpu.o bfs/bfs_gpu_naive.o push-relabel/push-relabel.o maxflow.o -push-relabel-gpu-gunrock: graph_tools_gpu.o maxflow.o push-relabel/push-relabel_operations_gpu.o graph_tools_gpu.o push-relabel/push-relabel.o - $(NVCC) $(NVCC_FLAGS) $(NVCC_ARCH) $(GUNROCK_OPTS) $(GUNROCK_LIBS) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ $^ bfs/bfs_gpu_gunrock.cu -mtx2dimacs: - g++ --std=c++11 IO/mtx2dimacs.cpp -o mtx2dimacs - -$(CPU_TARGETS): - $(C) $(CFLAGS) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ $^ -$(GPU_TARGETS): - $(NVCC) $(NVCC_FLAGS) $(NVCC_ARCH) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ $^ - -galois-preflowpush: - cd galois/build && cmake .. && make preflowpush && cp ../../galois-preflowpush.sh ../../galois-preflowpush - - -#Implicit rules -%.o: %.cpp - $(C) $(CFLAGS) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ -c $< - -%.o: %.cu - $(NVCC) $(NVCC_FLAGS) $(NVCC_ARCH) -DLOG_LEVEL=$(LOG_LEVEL) -o $@ -c $< - -clean: - $(CLEAN) - -mrproper: clean - rm -f $(TARGETS) - diff --git a/maxflow/README.md b/maxflow/README.md deleted file mode 100644 index 67c6c0b..0000000 --- a/maxflow/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Maxflow - -This framework implements various maximum flow algorithms on GPU. - -## Motivation - -Existing GPU graph libraries like Gunrock and nvGraph are both missing a few important graph primitives including the maximum flow, which is frequently used in network analysis, image segmentation, clustering, bipartite matching, and other problems. There is also an interesting application of maximum flow algorithm to community detection problem in social networks. There are a lot of algorithms developed to compute the maximum flow so the task will be to investigate their appropriate parallel implementations, find bottlenecks, optimize and benchmark on a set of graphs with different characteristics and explore a few real applications. If things go well, we might consider integration into nvGraph as the final step, although the work will be mostly focused on new algorithms development and analysis. - -## Build instructions - -Update Makefile as necessary then run `make` to build everything, or use `make ` to build a specific version only: `cpu`, `gpu_naive`, `gpu_gunrock`. - -Note that if you're trying to build `gpu_gunrock` you will need to clone recursively to fetch `gunrock` submodule. Then also build Gunrock with all dependencies using cmake. - -## Running examples - -Download data sets using a shell script in `data/get_data.sh`. - -``` -Usage: ./maxflow [] - if random seed is not specified the weights are set as 1/degree for each vertex -``` - -There is also a test script `test.py` which runs various pre-defined examples and validates results. You will need to create a soft link `maxflow_gpu` to `maxflow_gpu_naive` or `maxflow_gpu_gunrock` to use the script. - diff --git a/maxflow/allocator.h b/maxflow/allocator.h deleted file mode 100644 index 3b9b4ce..0000000 --- a/maxflow/allocator.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. -#pragma once - -#include -#include - -static long tot_malloc_bytes = 0; - -#ifndef USE_GPU - -inline void* my_malloc(long size) -{ -#if (LOG_LEVEL > 1) && (LOG_LEVEL < 4) - tot_malloc_bytes += size; - printf("Memory allocation footprint %.3f MB\n", ((float) tot_malloc_bytes)/(1<<20)); -#endif - void *ptr = malloc(size); - return ptr; -} - -inline void my_free(void *ptr) -{ - free(ptr); -} - -#else - -#include - -inline void* my_malloc(int size) -{ -#if (LOG_LEVEL > 1) && (LOG_LEVEL < 4) - tot_malloc_bytes += size; - printf("Unified memory allocation footprint %.3f MB\n", ((float) tot_malloc_bytes)/(1<<20)); -#endif - void *ptr; - cudaMallocManaged(&ptr, size); - return ptr; -} - -inline void my_free(void *ptr) -{ - cudaFree(ptr); -} - -#endif - diff --git a/maxflow/benchmarks/benchmark.py b/maxflow/benchmarks/benchmark.py deleted file mode 100755 index 53f0bf1..0000000 --- a/maxflow/benchmarks/benchmark.py +++ /dev/null @@ -1,175 +0,0 @@ -#!/usr/bin/python -import os -import sys -import subprocess -import random -import time -import argparse - - -#config - -#maxflow implementations -implementations = [ "mpm-gpu-naive", "galois-preflowpush"] -log_filename = "bench_log.csv" - -#end config - -#parsing args -parser = argparse.ArgumentParser(description="Compute benchmarks of maxflow implementations") -parser.add_argument("--log", dest='log', action='store_const', const=1, default=0, help="Save individual benchmark results to logfile") -parser.add_argument("--make", dest='make', action='store_const', const=1, default=0, help="Make maxflow implementations") - -args = parser.parse_args() -log = args.log -make = args.make - -#make executables -for implementation in implementations: - if make: - subprocess.call(["make", "-C", "..", "clean"]) - subprocess.call(["make", "-C", "..", implementation, "LOG_LEVEL=1"]) - else: - if not os.path.isfile("../" + implementation): - print("../" + implementation + " does not exist. Please use --make to compile it.") - sys.exit(1) - - -commit_hash = subprocess.Popen(["git", "log", "-n", "1", "--pretty=format:\"%h\""], stdout=subprocess.PIPE).communicate()[0] -commit_title = subprocess.Popen(["git", "log", "-n", "1", "--pretty=format:\"%s\""], stdout=subprocess.PIPE).communicate()[0] -time_bench = time.time() - -if log: - logfile = open(log_filename, "a") - - -#text coloring -def colorRef(val): - return "\033[94m" + str(val) + "\033[0m" - - -def colorPassed(val): - return "\033[92m" + str(val) + "\033[0m" - - -def colorFailed(val): - return "\033[91m" + str(val) + "\033[0m" - - -def argmin(lst): - return lst.index(min(lst)) - -def argsort(seq): - return [x for x,y in sorted(enumerate(seq), key = lambda x: x[1])] - -#extract runtime and flow from program output -def flow_time_extract(res): - time = res.rsplit(None, 2)[-2].rstrip() - flow = res.rsplit(None, 4)[-4].rstrip() - return flow,time - -def test(matrix, s, t, w=None): - global failed,passed,winners - filename, file_extension = os.path.splitext(matrix) - - matrix = "../" + matrix - - if file_extension != '.gr': #we need ton convert the graph first - if not os.path.isfile(matrix + '.gr'): - print "Converting " + matrix + " to gr format..." - subprocess.call(['../data/convert_graph.sh', matrix, matrix + '.gr']) - - matrix += '.gr' - - times = [] - out_line = [matrix, str(s), str(t)] - - for i in range(len(implementations)): - - implementation = implementations[i] - - res = subprocess.Popen(["../" + implementation, matrix, str(s), str(t)], stdout=subprocess.PIPE).communicate()[0] - flow,time = flow_time_extract(res) - - if i==0: #reference - ref_flow = flow - out_line.append(ref_flow) - times.append(float(time)) - out_line.append(colorRef(time)) - else: - if flow == ref_flow: - out_line.append(colorPassed(time)) - times.append(float(time)) - passed += 1 - else: - out_line.append(colorFailed(time)) - times.append(sys.maxint) - failed += 1 - if log: - logfile_line = [str(time_bench), commit_hash, commit_title, implementation, matrix, str(s), str(t), time, flow] - logfile.write(', '.join(logfile_line) + "\n") - logfile.flush() - - best = argmin(times) - winners[best] += 1 - - out_line.append(implementations[best]) - - - print ', '.join(out_line) - -passed = 0 -failed = 0 - -#winners[i] : number of times implementations[i] was the best one -winners = [0] * len(implementations) - -print '=== BENCHMARKS ===' - -random.seed(1234321) - -# save header -header = ['matrix', 'source', 'sink', 'flow'] -header.extend(implementations) -header.append("best") -print ', '.join(header) - -test('data/wiki2003.mtx', 3, 12563) -test('data/wiki2003.mtx', 54, 1432) -test('data/wiki2003.mtx', 65, 7889) -test('data/wiki2003.mtx', 43242, 5634) -test('data/wiki2003.mtx', 78125, 327941) -test('data/wiki2003.mtx', 2314, 76204) - -test('data/roadNet-CA.mtx', 2314, 76204) -test('data/roadNet-CA.mtx', 9, 1247) -test('data/roadNet-CA.mtx', 1548, 365940) -test('data/roadNet-CA.mtx', 1548785, 654123) -test('data/roadNet-CA.mtx', 8, 284672) - -# USA road network (23.9M vertices, 28.8M edges) -test('data/road_usa.mtx', 125, 7846232) -test('data/road_usa.mtx', 458743, 321975) -test('data/road_usa.mtx', 96, 4105465) -test('data/road_usa.mtx', 5478, 658413) -test('data/road_usa.mtx', 364782, 32) -#test('data/road_usa.mtx', 21257849, 2502578) -#test('data/road_usa.mtx', 12345678, 23000000) -#test('data/road_usa.mtx', 16807742, 17453608) - -# wikipedia (3.7M vertices, 66.4M edges) -test('data/wiki2011.mtx', 254, 87452) -test('data/wiki2011.mtx', 315547, 874528) -test('data/wiki2011.mtx', 8796, 673214) - -if log: - logfile.close() - -print '=== SUMMARY ===' -print str(failed) + ' tests failed out of ' + str(passed + failed) -print "Implementations ranking : " -w_indexes = reversed(argsort(winners)) -for w_idx in w_indexes: - print implementations[w_idx] + " : " + str(winners[w_idx]) + " win(s)" - - diff --git a/maxflow/benchmarks/benchmarks.cpp b/maxflow/benchmarks/benchmarks.cpp deleted file mode 100644 index a14ed31..0000000 --- a/maxflow/benchmarks/benchmarks.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#define _POSIX_C_SOURCE 199309L - -#include -#include -#include -#include -#include -#include - -#include "../allocator.h" -#include "../matrix_io.h" -#include "../matrix.h" -#include "../MPM/MPM.h" - -#include - -#include "../boost-push-relabel/push-relabel.h" - -using std::stringstream; - -void do_benchmarks() -{ - vector> st_roadCA = {{1,1000}}; - - map>> todo; - todo.insert({"data/roadNet-CA.mtx", st_roadCA); - - for(auto& g_sts : todo) { - string g_path = g_sts.first; - vector> ls_s_t = g_sts.second; - - csr_graph g; - // read capacity graph, generate symmetric entries - read_mm_matrix(argv[1], &g.n, &g.n, &g.nnz, &g.row_offsets, &g.col_indices, &g.vals_cap); - if (argc == 4) - g.set_edge_weights_rcp_degree(); - - - //Using MPM - MPM mpm(*g); - - for(auto st : ls_s_t) { - int s = st.first; - int t = st.second; - - stringstream dimacs; - export_to_dimacs(dimacs, g, s, t); - - float pr_time, mpm_time; - boost_push_relabel(dimacs, &pr_time); - mpm.maxflow(s, t, &mpm_time); - } - - my_free(g.row_offsets); - my_free(g.col_indices); - my_free(g.vals_cap); - mpm.clean(); - } -} - - -int main(int argc, char **argv) -{ - do_benchmarks(); -} diff --git a/maxflow/benchmarks/benchmarks_log.csv b/maxflow/benchmarks/benchmarks_log.csv deleted file mode 100644 index e112c8d..0000000 --- a/maxflow/benchmarks/benchmarks_log.csv +++ /dev/null @@ -1 +0,0 @@ -1472508703.3, "264e4d2", "working commit : limit nblocks 65000, untested", edmonds-karp-cpu, data/roadNet-CA.mtx, 1, 10000, 0.670, 11472508703.3, "264e4d2", "working commit : limit nblocks 65000, untested", mpm-gpu-naive, data/roadNet-CA.mtx, 1, 10000, 0.503, 11472508703.3, "264e4d2", "working commit : limit nblocks 65000, untested", edmonds-karp-cpu, data/roadNet-CA.mtx, 1, 1000, 0.168, 0.8333331472508819.28, "264e4d2", "working commit : limit nblocks 65000, untested", edmonds-karp-cpu, data/roadNet-CA.mtx, 1, 10000, 0.671, 11472508819.28, "264e4d2", "working commit : limit nblocks 65000, untested", mpm-gpu-naive, data/roadNet-CA.mtx, 1, 10000, 0.500, 11472508819.28, "264e4d2", "working commit : limit nblocks 65000, untested", edmonds-karp-cpu, data/roadNet-CA.mtx, 1, 1000, 0.169, 0.8333331472508819.28, "264e4d2", "working commit : limit nblocks 65000, untested", mpm-gpu-naive, data/roadNet-CA.mtx, 1, 1000, 0.415, 0.833333 \ No newline at end of file diff --git a/maxflow/bfs/bfs.h b/maxflow/bfs/bfs.h deleted file mode 100644 index 2c37972..0000000 --- a/maxflow/bfs/bfs.h +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#include - -using std::vector; -//Output types - -#define BFS_MARK_PREDECESSOR 0 -#define BFS_MARK_DEPTH 1 - -int bfs(int *row_offsets, int *col_indices, int num_nodes, int num_edges, int src_node, int dst_node, int *q, int *output, int output_type, char *col_mask, int *bfs_offsets); - diff --git a/maxflow/bfs/bfs_cpu.cpp b/maxflow/bfs/bfs_cpu.cpp deleted file mode 100644 index 326e694..0000000 --- a/maxflow/bfs/bfs_cpu.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -#include -#include -#include -#include "../allocator.h" -#include "../matrix.h" -#include "bfs.h" - -//BFS CPU implementation -int bfs(int *row_offsets, int *col_indices, int num_nodes, int num_edges, int s, int t, int *q, int *output, int output_type, char *mask, int *bfs_offsets) -{ - int found = 0; - #pragma omp parallel num_threads(1) - { - int edges = 0; // count number of visited edges - - // set all vertices as undiscovered (-1) - for (int i = 0; i < num_nodes; i++) output[i] = -1; - - // start with source vertex - q[0] = s; - output[s] = (output_type == BFS_MARK_PREDECESSOR) ? s : 0; - - #if LOG_LEVEL > 3 - int *bfs_level = (int*)my_malloc(num_nodes * sizeof(int)); - int *bfs_vertices = (int*)my_malloc(num_nodes * sizeof(int)); - int *bfs_edges = (int*)my_malloc(num_nodes * sizeof(int)); - memset(bfs_level, 0, num_nodes * sizeof(int)); - memset(bfs_vertices, 0, num_nodes * sizeof(int)); - memset(bfs_edges, 0, num_nodes * sizeof(int)); - bfs_vertices[0] = 1; - bfs_edges[0] = row_offsets[s+1] - row_offsets[s]; - #endif - - int idx = -1; - int size = 1; - while (idx+1 < size && !found) { - idx = idx+1; - int u = q[idx]; - for (int i = row_offsets[u]; i < row_offsets[u+1]; i++) { - int v = col_indices[i]; - edges++; - if (output[v] == -1 && mask[i]) { - output[v] = (output_type == BFS_MARK_PREDECESSOR) ? u : output[u]+1; - #if LOG_LEVEL > 3 - bfs_level[v] = bfs_level[u] + 1; - bfs_vertices[bfs_level[v]]++; - bfs_edges[bfs_level[v]] += row_offsets[v+1] - row_offsets[v]; - #endif - - if (v == t) { - found = 1; - #if LOG_LEVEL > 1 - printf("bfs: traversed vertices %d (%.0f%%), traversed edges %d (%.0f%%), ", size, (double)100.0*size/num_nodes, edges, (double)100*edges/num_edges); - #endif - - #if LOG_LEVEL > 3 - printf("\n"); - for (int i = 0; i < bfs_level[v]; i++) - printf(" bfs level %i: %i vertices, %i edges\n", i, bfs_vertices[i], bfs_edges[i]); - #endif - break; - } - q[size] = v; - size++; - } - } - } - #if LOG_LEVEL > 3 - my_free(bfs_level); - my_free(bfs_vertices); - my_free(bfs_edges); - #endif - } - - return found; -} - diff --git a/maxflow/bfs/bfs_cpu_omp.cpp b/maxflow/bfs/bfs_cpu_omp.cpp deleted file mode 100644 index f3cb559..0000000 --- a/maxflow/bfs/bfs_cpu_omp.cpp +++ /dev/null @@ -1,54 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -#include -#include -#include -#include "../matrix.h" -#include "bfs.h" - -//BFS CPU OMP implementation -int bfs(int *row_offsets, int *col_indices, int num_nodes, int num_edges, int s, int t, int *q, int *output, int output_type, char *mask, int* bfs_offsets) -{ - // set all vertices as undiscovered (-1) - #pragma omp parallel for - for (int i = 0; i < num_nodes; i++) output[i] = -1; - - // start with source vertex - q[0] = s; - bool mark_pred = (output_type == BFS_MARK_PREDECESSOR); - output[s] = mark_pred ? s : 0; - - int size = 1; - int start_idx = 0; - int end_idx = size; - int found = 0; - int bfs_level = 0; - - while(!found && start_idx < end_idx) { - - #pragma omp parallel for - for(int idx = start_idx; idx < end_idx; idx++) { - int u = q[idx]; - - for (int i = row_offsets[u]; i < row_offsets[u+1]; i++) { - int v = col_indices[i]; - if(output[v] == -1 && mask[i]) { - if(mask[i] && __sync_val_compare_and_swap(&output[v], -1, mark_pred ? u : (bfs_level+1)) == -1) { - if (v == t) { - found = 1; - break; - } - int pos = __sync_fetch_and_add (&size, 1); - q[pos] = v; - } - } - } - } - - start_idx = end_idx; - end_idx = size; - ++bfs_level; - } - return found; -} - diff --git a/maxflow/bfs/bfs_gpu_gunrock.cu b/maxflow/bfs/bfs_gpu_gunrock.cu deleted file mode 100644 index b9369a1..0000000 --- a/maxflow/bfs/bfs_gpu_gunrock.cu +++ /dev/null @@ -1,167 +0,0 @@ -// ---------------------------------------------------------------- -// Gunrock -- Fast and Efficient GPU Graph Library -// ---------------------------------------------------------------- -// This source code is distributed under the terms of LICENSE.TXT -// in the root directory of this source distribution. -// ---------------------------------------------------------------- - -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -/** - * @file - * test_bfs.cu - * - * @brief Simple test driver program for breadth-first search. - */ - -#include -#include -#include -#include -#include -#include -#include - -// Utilities and correctness-checking -#include -#include - -// BFS includes -#include -#include -#include - -// Operator includes -#include -#include - -#include - -// graph structure -#include "../matrix.h" - -//Generic tools handling fill -#include "../graph_tools.h" - -#include "bfs.h" - -using namespace gunrock; -using namespace gunrock::app; -using namespace gunrock::util; -using namespace gunrock::oprtr; -using namespace gunrock::app::bfs; - -void ref_bfs_mask(const int src_node, const int dst_node, const int num_nodes, const int num_edges, const int *row_offsets, const int *col_indices, const int *col_mask, int *parents) -{ - int *q = (int*)malloc(num_nodes * sizeof(int)); - q[0] = src_node; - parents[src_node] = src_node; - int idx = -1; - int size = 1; - int found = 0; - while (idx+1 < size && !found) { - idx++; - int u = q[idx]; - for (int i = row_offsets[u]; i < row_offsets[u+1]; i++) { - int v = col_indices[i]; - if (parents[v] == -1 && col_mask[i]) { - parents[v] = u; - if (v == dst_node) { - found = 1; - break; - } - else { - q[size] = v; - size++; - } - } - } - } -} - -int bfs(int *row_offsets, int *col_indices, int num_nodes, int num_edges, int src_node, int dst_node, int *q, int *output, int output_type, int *col_mask) -{ - fill(num_nodes, output, -1); - cudaDeviceSynchronize(); - - bool mark_pred = (output_type == BFS_MARK_PREDECESSOR); -#if 0 - // TODO: use Gunrock's customized BFS here - ref_bfs_mask(src_node, dst_node, num_nodes, num_edges, row_offsets, col_indices, col_mask, parents); - - return cudaSuccess; -#else - typedef int VertexId; - typedef int SizeT; - typedef int Value; - typedef BFSProblem // IDEMPOTENCE - Problem; - typedef BFSEnactor Enactor; - - cudaError_t retval = cudaSuccess; - - Info *info = new Info; - - info->InitBase2("BFS"); - ContextPtr *context = (ContextPtr*)info->context; - cudaStream_t *streams = (cudaStream_t*)info->streams; - - int *gpu_idx = new int[1]; - gpu_idx[0] = 0; - - Problem *problem = new Problem(false, false); //no direction optimized, no undirected - if (retval = util::GRError(problem->Init( - false, //stream_from_host (depricated) - row_offsets, - col_indices, - col_mask, - output, - num_nodes, - num_edges, - 1, - NULL, - "random", - streams), - "BFS Problem Init failed", __FILE__, __LINE__)) return retval; - - Enactor *enactor = new Enactor(1, gpu_idx); - - if (retval = util::GRError(enactor->Init(context, problem), - "BFS Enactor Init failed.", __FILE__, __LINE__)) return retval; - - if (retval = util::GRError(problem->Reset( - src_node, enactor->GetFrontierType()), - "BFS Problem Reset failed", __FILE__, __LINE__)) - return retval; - - if (retval = util::GRError(enactor->Reset(), - "BFS Enactor Reset failed", __FILE__, __LINE__)) - return retval; - - if (retval = util::GRError(enactor->Enact(src_node), - "BFS Enact failed", __FILE__, __LINE__)) return retval; - - if (retval = util::GRError(problem->Extract(output, NULL), - "BFS Extract failed", __FILE__, __LINE__)) return retval; - - - // free memory - delete info; - delete problem; - delete enactor; - //check if path exists - - - //MAX_INT default value for src dis TODO - return (dst_node >= 0 && dst_node < num_nodes) && (output[dst_node] != -1); -#endif -} - - -// Leave this at the end of the file -// Local Variables: -// mode:c++ -// c-file-style: "NVIDIA" -// End: diff --git a/maxflow/bfs/bfs_gpu_naive.cu b/maxflow/bfs/bfs_gpu_naive.cu deleted file mode 100644 index 20866a4..0000000 --- a/maxflow/bfs/bfs_gpu_naive.cu +++ /dev/null @@ -1,125 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -#include -#include "../matrix.h" -#include "bfs.h" - -#include "nvToolsExt.h" - -//Generic tools handling masks and fill - -#define THREADS_PER_VERTEX_BFS 4 - -#define N_BLOCKS_MAX 65535 - - -template -__global__ void fill_kernel(int size, T *data, T value) -{ - int tid = threadIdx.x + blockIdx.x * blockDim.x; - if (tid < size) data[tid] = value; -} - -template -void fill(int size, T *data, T value) { - fill_kernel<<<(size + 255)/256, 256>>>(size, data, value); - cudaDeviceSynchronize(); -} - -// main bfs kernel: finds next frontier using blockDim.x threads per vertex -//level_width[i] contain the max width of the ith level -__global__ void next_frontier(int start, int end, int *d_next, int *row_offsets, int *col_indices, char *mask, int t, int *q, int *output, bool mark_pred, int bfs_level, int *found) -{ - for(int idx = start + blockIdx.y * blockDim.y + threadIdx.y; - idx < end; - idx += blockDim.y * gridDim.y) { - // current frontier - int u = q[idx]; - // writing node level TODO optional - - // loop over neighbor vertices - for (int i = row_offsets[u] + threadIdx.x; i < row_offsets[u+1]; i += blockDim.x) { - // next frontier - int v = col_indices[i]; - // only advance if we haven't visited & mask allows it - if (output[v] == -5 && mask[i]) { - // critical section below to avoid collisions from multiple threads - if (atomicCAS(&output[v], -5, mark_pred ? u : (bfs_level+1)) == -5) { - // add new vertex to our queue - int pos = atomicAdd(d_next, 1); - q[pos] = v; - - if (v == t) { - // early exit if we found the path - *found = 1; - return; - } - } - } - } - } -} - -//BFS GPU naive implementation -int bfs(int *row_offsets, int *col_indices, int num_nodes, int num_edges, int s, int t, int *q, int *output, int output_type, char *mask, int *bfs_offsets) -{ - nvtxRangePushA("BFS"); - // set all vertices as undiscovered (-5) - fill(num_nodes, output, -5); - // start with source vertex - q[0] = s; - bool mark_pred = (output_type == BFS_MARK_PREDECESSOR); - output[s] = mark_pred ? s : 0; - - // found flag (zero-copy memory) - static int *found = NULL; - if (!found) cudaMallocHost(&found, sizeof(int)); - *found = 0; - - static int *d_next = NULL; - if (!d_next) cudaMalloc(&d_next, sizeof(int)); - - int h_start = 0, h_end = 1; - - cudaMemcpy(d_next, &h_end, sizeof(int), cudaMemcpyHostToDevice); - - int bfs_level = 0; - - int off_idx = 0; - bfs_offsets[off_idx++] = 0; - - - dim3 block(THREADS_PER_VERTEX_BFS, 128 / THREADS_PER_VERTEX_BFS); - do { - // calculate grid size - int nitems; - nitems = h_end - h_start; - -#if LOG_LEVEL > 4 - printf(" bfs level %i: %i vertices :\n", bfs_level, nitems); - for(int i=h_start; i!=h_end; ++i) - printf("%i\t", q[i]); - printf("\n"); - -#endif - - dim3 grid(1, min((nitems + block.y-1) / block.y, N_BLOCKS_MAX)); - next_frontier<<>>(h_start, h_end, d_next, row_offsets, col_indices, mask, t, q, output, mark_pred, bfs_level, found); - - bfs_offsets[off_idx++] = h_end; - h_start = h_end; - cudaMemcpy(&h_end, d_next, sizeof(int), cudaMemcpyDeviceToHost); - ++bfs_level; - } while(h_start < h_end && *found == 0); - - bfs_offsets[off_idx++] = h_end; - - #if LOG_LEVEL > 1 - if (*found) - printf("bfs: traversed vertices %d (%.0f%%), ", h_end, (double)100.0*h_end/num_nodes); - #endif - - nvtxRangePop(); - return (*found); -} - diff --git a/maxflow/bfs/bfs_gunrock.cu b/maxflow/bfs/bfs_gunrock.cu deleted file mode 100644 index c708614..0000000 --- a/maxflow/bfs/bfs_gunrock.cu +++ /dev/null @@ -1,170 +0,0 @@ -// ---------------------------------------------------------------- -// Gunrock -- Fast and Efficient GPU Graph Library -// ---------------------------------------------------------------- -// This source code is distributed under the terms of LICENSE.TXT -// in the root directory of this source distribution. -// ---------------------------------------------------------------- - -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. - -/** - * @file - * test_bfs.cu - * - * @brief Simple test driver program for breadth-first search. - */ - -#include -#include -#include -#include -#include -#include -#include - -// Utilities and correctness-checking -#include -#include - -// BFS includes -#include -#include -#include - -// Operator includes -#include -#include - -#include - -// graph structure -#include "../matrix.h" - -//Generic tools handling masks and fill -#include "bfs_tools.cu" - -using namespace gunrock; -using namespace gunrock::app; -using namespace gunrock::util; -using namespace gunrock::oprtr; -using namespace gunrock::app::bfs; - -void ref_bfs_mask(const int src_node, const int dst_node, const int num_nodes, const int num_edges, const int *row_offsets, const int *col_indices, const int *col_mask, int *parents) -{ - int *q = (int*)malloc(num_nodes * sizeof(int)); - q[0] = src_node; - parents[src_node] = src_node; - int idx = -1; - int size = 1; - int found = 0; - while (idx+1 < size && !found) { - idx++; - int u = q[idx]; - for (int i = row_offsets[u]; i < row_offsets[u+1]; i++) { - int v = col_indices[i]; - if (parents[v] == -1 && col_mask[i]) { - parents[v] = u; - if (v == dst_node) { - found = 1; - break; - } - else { - q[size] = v; - size++; - } - } - } - } -} - -cudaError_t bfs_mask(int src_node, int dst_node, int num_nodes, int num_edges, int *row_offsets, int *col_indices, int *col_mask, int *parents) -{ -#if 0 - // TODO: use Gunrock's customized BFS here - ref_bfs_mask(src_node, dst_node, num_nodes, num_edges, row_offsets, col_indices, col_mask, parents); - - return cudaSuccess; -#else - typedef int VertexId; - typedef int SizeT; - typedef int Value; - typedef BFSProblem // IDEMPOTENCE - Problem; - typedef BFSEnactor Enactor; - - cudaError_t retval = cudaSuccess; - - Info *info = new Info; - - - info->InitBase2("BFS"); - ContextPtr *context = (ContextPtr*)info->context; - cudaStream_t *streams = (cudaStream_t*)info->streams; - - int *gpu_idx = new int[1]; - gpu_idx[0] = 0; - - Problem *problem = new Problem(false, false); //no direction optimized, no undirected - if (retval = util::GRError(problem->Init( - false, //stream_from_host (depricated) - row_offsets, - col_indices, - col_mask, - parents, - num_nodes, - num_edges, - 1, - NULL, - "random", - streams), - "BFS Problem Init failed", __FILE__, __LINE__)) return retval; - - Enactor *enactor = new Enactor(1, gpu_idx); - - if (retval = util::GRError(enactor->Init(context, problem), - "BFS Enactor Init failed.", __FILE__, __LINE__)) return retval; - - if (retval = util::GRError(problem->Reset( - src_node, enactor->GetFrontierType()), - "BFS Problem Reset failed", __FILE__, __LINE__)) - return retval; - - if (retval = util::GRError(enactor->Reset(), - "BFS Enactor Reset failed", __FILE__, __LINE__)) - return retval; - - if (retval = util::GRError(enactor->Enact(src_node), - "BFS Enact failed", __FILE__, __LINE__)) return retval; - - // free memory - delete info; - delete problem; - delete enactor; - - return retval; -#endif -} - -//BFS gunrock implementation -int bfs(csr_graph *g, int s, int t, int *q, int *p, int *mask) -{ - // set all vertices as undiscovered (-1) - fill<-1><<<(g->n + 255)/256, 256>>>(g->n, p); - cudaDeviceSynchronize(); - - // setup mask, TODO: move this step inside Gunrock to reduce BW - setup_mask<<<(g->nnz + 255)/256, 256>>>(g->nnz, mask, g->vals_cap, g->vals_flow); - - // run bfs (with mask) - bfs_mask(s, t, g->n, g->nnz, g->row_offsets, g->col_indices, mask, p); - - // check if path exists - return (p[t] != -1); -} -// Leave this at the end of the file -// Local Variables: -// mode:c++ -// c-file-style: "NVIDIA" -// End: diff --git a/maxflow/boost_push_relabel/push-relabel.cpp b/maxflow/boost_push_relabel/push-relabel.cpp deleted file mode 100644 index 626d9d6..0000000 --- a/maxflow/boost_push_relabel/push-relabel.cpp +++ /dev/null @@ -1,73 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using std::istream; - -int main() { - using namespace boost; - - typedef adjacency_list_traits Traits; - typedef adjacency_list, - property > > - > Graph; - - std::ifstream dimacs("../data/dimacs_autoexport.dat"); - - Graph g; - - property_map::type - capacity = get(edge_capacity, g); - property_map::type - rev = get(edge_reverse, g); - property_map::type - residual_capacity = get(edge_residual_capacity, g); - - Traits::vertex_descriptor s, t; - read_dimacs_max_flow(g, capacity, rev, s, t, dimacs); - - - struct timespec start, end; - clock_gettime(CLOCK_MONOTONIC, &start); - - long flow; -#if defined(BOOST_MSVC) && BOOST_MSVC <= 1300 - // Use non-named parameter version - property_map::type - indexmap = get(vertex_index, g); - flow = push_relabel_max_flow(g, s, t, capacity, residual_capacity, rev, indexmap); -#else - flow = push_relabel_max_flow(g, s, t); -#endif - - clock_gettime(CLOCK_MONOTONIC, &end); - float time = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9; - - double f_as_d = (double)flow / 1000000000.0; - - printf("max flow = %f\n", f_as_d); - printf("time: %.3f s\n", time); - - - /* - std::cout << "c flow values:" << std::endl; - graph_traits::vertex_iterator u_iter, u_end; - graph_traits::out_edge_iterator ei, e_end; - for (boost::tie(u_iter, u_end) = vertices(g); u_iter != u_end; ++u_iter) - for (boost::tie(ei, e_end) = out_edges(*u_iter, g); ei != e_end; ++ei) - if (capacity[*ei] > 0) - std::cout << "f " << *u_iter << " " << target(*ei, g) << " " - << (capacity[*ei] - residual_capacity[*ei]) << std::endl; - */ - return 0; -} - diff --git a/maxflow/config.h b/maxflow/config.h deleted file mode 100644 index 5cf37c6..0000000 --- a/maxflow/config.h +++ /dev/null @@ -1,6 +0,0 @@ -// -// Config for maxflow -// -typedef double flow_t; - -#define FLOW_INF DBL_MAX diff --git a/maxflow/cub b/maxflow/cub deleted file mode 160000 index c3cceac..0000000 --- a/maxflow/cub +++ /dev/null @@ -1 +0,0 @@ -Subproject commit c3cceac115c072fb63df1836ff46d8c60d9eb304 diff --git a/maxflow/edmonds-karp/edmonds-karp.cpp b/maxflow/edmonds-karp/edmonds-karp.cpp deleted file mode 100644 index 290f8ec..0000000 --- a/maxflow/edmonds-karp/edmonds-karp.cpp +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. -#include "../matrix.h" -#include "../allocator.h" -#include -#include "../graph_tools.h" -#include "../bfs/bfs.h" -#include - -//Edmonds-karp implementation -//TODO separate CPU and GPU implem ? - -double maxflowimplementation(csr_graph* g, int s, int t, float *time) { - struct timespec start, end; - clock_gettime(CLOCK_MONOTONIC, &start); - - - - int it = 0; // number of augmented paths - double fm = 0.0; - int *q = (int*)my_malloc(g->n * sizeof(int)); // bfs vertices queue - int *p = (int*)my_malloc(g->n * sizeof(int)); // parent vertices - int *h = (int*)my_malloc(g->n * sizeof(int)); // depth of nodes - TODO remove - char *mask = (char*)my_malloc(g->nnz * sizeof(int)); // edge mask (used in Gunrock only) - double *cf = g->vals_cap; - - int* level_width = (int*)my_malloc(g->n * sizeof(int)); // depth of nodes - TODO remove - //not used here - // find shortest augmented paths in c-f - setup_mask_unsaturated(g->nnz, mask, cf); - while (bfs(g->row_offsets, g->col_indices, g->n, g->nnz, s, t, q, p, BFS_MARK_PREDECESSOR, mask, level_width)) { - // backtrack to find the max flow we can push through - int v = t; - double mf = INF; - - while (v != s) { - int u = p[v]; - int i = g->edge(u,v); - mf = min(mf, cf[i]); - v = u; - } - // update flow value - fm = fm + mf; - - // backtrack and update flow graph - v = t; - int len = 0; - while (v != s) { - int u = p[v]; - int uv = g->edge(u,v); - int vu = g->edge(v,u); - - cf[uv] -= mf; - mask[uv] = (cf[uv] > 0); - cf[g->edge(v,u)] += mf; - mask[vu] = (cf[vu] > 0); - - v = u; - len++; - } - - // output more stats -#if LOG_LEVEL > 1 - printf("path length %d, aug flow %g\n", len, mf); -#endif - -#if LOG_LEVEL > 2 - printf("aug path vertices: "); - v = t; - while (v != s) { printf("%i ", v+1); v = p[v]; } - printf("%i \n", v+1); -#endif - - // count number of iterations - it++; - } -#if LOG_LEVEL > 0 - printf("%i augmenting paths\n", it); -#endif - - my_free(q); - my_free(p); - my_free(mask); - - clock_gettime(CLOCK_MONOTONIC, &end); - *time = (end.tv_sec - start.tv_sec) + (end.tv_nsec - start.tv_nsec) * 1e-9; - - return fm; -} diff --git a/maxflow/galois-preflowpush.sh b/maxflow/galois-preflowpush.sh deleted file mode 100755 index 6e72fe8..0000000 --- a/maxflow/galois-preflowpush.sh +++ /dev/null @@ -1,16 +0,0 @@ -#!/bin/sh -#Wrapper for galois maxflow - -if [ "$#" -lt "3" ] -then - echo "Usage : $0 " - exit 0 -fi - -s=$(($2 - 1)) #base 0 in galois -t=$(($3 - 1)) - -#Moving to own directory - -DIR="$( cd "$(dirname "$0")" && pwd )" -$DIR/galois/build/apps/preflowpush/preflowpush "$1" "$s" "$t" -t 12 --noverify | grep "^time\|^max" diff --git a/maxflow/galois/CMakeLists.txt b/maxflow/galois/CMakeLists.txt deleted file mode 100644 index c31f11c..0000000 --- a/maxflow/galois/CMakeLists.txt +++ /dev/null @@ -1,347 +0,0 @@ -cmake_minimum_required(VERSION 2.8.8) -set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/") -project(Galois) -set(GALOIS_VERSION_MAJOR "2") -set(GALOIS_VERSION_MINOR "2") -set(GALOIS_VERSION_PATCH "1") -set(GALOIS_VERSION ${GALOIS_VERSION_MAJOR}.${GALOIS_VERSION_MINOR}.${GALOIS_VERSION_PATCH}) -set(GALOIS_COPYRIGHT_YEAR "2014") # Also in COPYRIGHT - -if(NOT CMAKE_BUILD_TYPE) - message(STATUS "No build type selected, default to Release") - set(CMAKE_BUILD_TYPE "Release") -endif() - -###### Options (alternatively pass as options to cmake -DName=Value) ###### -set(USE_GPROF OFF CACHE BOOL "Enable GCC profiling") -set(USE_VTUNE OFF CACHE BOOL "Use VTune for profiling") -set(USE_PAPI OFF CACHE BOOL "Use PAPI counters for profiling") -set(USE_HPCTOOLKIT OFF CACHE BOOL "Use HPCToolKit for profiling") -set(USE_STRICT_CONFIG OFF CACHE BOOL "Instead of falling back gracefully, fail") -set(USE_LONGJMP ON CACHE BOOL "Use longjmp instead of exceptions to signal aborts") -set(INSTALL_APPS OFF CACHE BOOL "Install apps as well as library") -set(SKIP_COMPILE_APPS OFF CACHE BOOL "Skip compilation of applications using Galois library") - -set(INSTALL_LIB_DIR lib CACHE PATH "Installation directory for libraries") -set(INSTALL_BIN_DIR bin CACHE PATH "Installation directory for executables") -set(INSTALL_INCLUDE_DIR include CACHE PATH "Installation directory for header files") -set(INSTALL_CMAKE_DIR lib/cmake/Galois CACHE PATH "Installation directory for CMake files") -# Make relative paths absolute -foreach(p LIB BIN INCLUDE CMAKE) - set(var INSTALL_${p}_DIR) - if(NOT IS_ABSOLUTE "${${var}}") - set(${var} "${CMAKE_INSTALL_PREFIX}/${${var}}") - endif() -endforeach() - -# Enable iss specific options; should be OFF in the general release; all guarded by USE_EXP -set(USE_EXP OFF CACHE BOOL "Use experimental features") -set(USE_HTM OFF CACHE BOOL "Use HTM") -set(EXP_DOALL "PTHREAD" CACHE STRING "Which type of implementation of parallel_doall") -set(USE_PROF OFF CACHE BOOL "Use profiling specific features") -set(USE_SUBVERSION_REVISION ON CACHE BOOL "Embed subversion numbers") - -###### Configure (users don't need to go beyond here) ###### - -enable_testing() - -###### Configure compiler ###### - -# ICC -if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion - OUTPUT_VARIABLE ICC_VERSION) - if(ICC_VERSION VERSION_GREATER 13.0 OR ICC_VERSION VERSION_EQUAL 13.0) - #message(STATUS "ICC Version >= 13.0") - else() - message(FATAL_ERROR "ICC must be 13.0 or higher found: ${ICC_VERSION}") - endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -wd68 -wd981 -wd383 -wd869 -wd2196 -wd279 -wd2504 -wd2943 -wd32013") - - if("$ENV{GCC_BIN}" STREQUAL "") - message(STATUS "Using default GCC toolchain; set environment variable GCC_BIN to override") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gxx-name=$ENV{GCC_BIN}/g++") - endif() -endif() - -# Clang -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - if("$ENV{GCC_BIN}" STREQUAL "") - message(STATUS "Using default GCC toolchain; set environment variable GCC_BIN to override") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -gcc-toolchain $ENV{GCC_BIN}/..") - endif() -endif() - -# XL -if(CMAKE_CXX_COMPILER_ID MATCHES "XL") - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -qversion - COMMAND sed 1d - COMMAND sed s/Version:// - OUTPUT_VARIABLE XLC_VERSION) - if(XLC_VERSION VERSION_GREATER 12.0) - #message(STATUS "XLC Version > 12.0") - else() - message(FATAL_ERROR "XLC must be higher than 12.0") - endif() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qsuppress=1540-0724 -qsuppress=1500-029 -qmaxmem=-1 -qalias=noansi -qsmp=omp") -endif() - -# check for incompatible GCC -if(CMAKE_COMPILER_IS_GNUCC) - execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpversion - OUTPUT_VARIABLE GCC_VERSION) - if(GCC_VERSION VERSION_GREATER 4.6 OR GCC_VERSION VERSION_EQUAL 4.6) - #message(STATUS "GCC Version >= 4.6") - else() - message(FATAL_ERROR "GCC must be 4.6 or higher") - endif() -endif() - -# solaris -if(CMAKE_SYSTEM MATCHES "SunOS.*") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mcpu=niagara2 -lposix4") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -m64 -lposix4") -endif() - -# Always include debug symbols -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g") -set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -g") - -# Enable architecture-specific optimizations -if(CMAKE_BUILD_TYPE MATCHES "Release" AND CMAKE_CXX_COMPILER_ID MATCHES "GNU") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native") -endif() - -# More warnings -if(CMAKE_BUILD_TYPE MATCHES "Debug") - if(NOT CMAKE_CXX_COMPILER_ID MATCHES "XL") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") - endif() - if(CMAKE_COMPILER_IS_GNUCC) - if(GCC_VERSION VERSION_GREATER 4.8 OR GCC_VERSION VERSION_EQUAL 4.8) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-local-typedefs") - endif() - endif() -endif() - -# GNU profiling -if(USE_GPROF) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pg") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pg") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pg") -endif(USE_GPROF) - -###### Configure features ###### - -# Experimental features -if(USE_EXP) - set(USE_VTUNE ON) - add_definitions(-DGALOIS_USE_EXP) - include_directories("exp/include") - - find_package(OpenMP) - if (OPENMP_FOUND) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - endif () - - if(USE_PROF) - add_definitions(-DGALOIS_USE_PROF) - endif() - - if(USE_SUBVERSION_REVISION) - include(GetSVNVersion) - set(GALOIS_USE_SVNVERSION on) - endif() - - if(USE_HTM) - if(CMAKE_CXX_COMPILER_ID MATCHES "XL") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -qtm -qsmp=speculative") - set(GALOIS_USE_HTM on) - set(GALOIS_USE_SEQ_ONLY on) - set(GALOIS_USE_LONGJMP on) - else() - message(FATAL_ERROR "Hardware transactional memory not supported") - endif() - endif() - - # Experimental Deterministic features - if(USE_DET_INORDER) - add_definitions(-DGALOIS_USE_DET_INORDER) - endif() - if(USE_DET_FIXED_WINDOW) - add_definitions(-DGALOIS_USE_DET_FIXED_WINDOW) - endif() -endif() - -# PThreads -find_package(Threads REQUIRED) - -# NUMA (linux) -find_package(NUMA) -if(NUMA_FOUND) - set(GALOIS_USE_NUMA on) -elseif(USE_STRICT_CONFIG) - message(FATAL_ERROR "Need libnuma") -endif() - -# CILK -include(CheckCilk) - -# HugePages -include(CheckHugePages) -if(NOT HAVE_HUGEPAGES AND USE_STRICT_CONFIG) - message(FATAL_ERROR "Need huge pages") -endif() - -# Longjmp -if(USE_LONGJMP) - set(GALOIS_USE_LONGJMP on) -endif() - -# Boost -set(Boost_ADDITIONAL_VERSIONS "1.40" "1.40.0" "1.47" "1.47.0" "1.49" "1.49.0" "1.51.0") -if(NOT "$ENV{BOOST_DIR}" STREQUAL "") - set(BOOST_ROOT $ENV{BOOST_DIR}) -endif() -find_package(Boost 1.38.0 REQUIRED) -include_directories(${Boost_INCLUDE_DIR}) - -# C++11 features -find_package(CXX11) -if ("${CXX11_FLAGS}" STREQUAL "") - message( FATAL_ERROR "Needs C++11") -endif() -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CXX11_FLAGS}") - -include(CheckEndian) - -include(llvm-extras) -#always import c99 stdint functions into c++ -#include(UseStdMacro) # HandleLLVMOptions.cmake (via llvm-extras) already does this for us -#include_directories("${PROJECT_BINARY_DIR}/include") # llvm-extra already does this for us - -###### Build Hacks ###### - -# XXX(ddn): Hack for lonestar machines -if(NUMA_FOUND) - if(NUMA_OLD) - set(GALOIS_USE_NUMA_OLD on) - endif() -endif() - -###### Global Functions ###### - -include(ParseArguments) - -function(compileApp name) - if(ARGN) - set(Sources ${ARGN}) - else() - file(GLOB Sources *.cpp) - endif() - add_executable(${name} ${Sources}) -endfunction (compileApp) - -function(app name) - PARSE_ARGUMENTS(APP "REQUIRES;EXTLIBS" "" ${ARGN}) - - foreach(required ${APP_REQUIRES}) - if(${${required}} MATCHES "TRUE") - else() - message("-- NOT compiling ${name} (missing: ${required})") - return() - endif() - endforeach() - - compileApp(${name} ${APP_DEFAULT_ARGS}) - target_link_libraries(${name} ${APP_EXTLIBS}) - target_link_libraries(${name} galois) - if(INSTALL_APPS) - install(TARGETS ${name} DESTINATION bin) - endif() -endfunction(app) - -###### Source finding ###### -include_directories(include) - -add_subdirectory(lib) -add_subdirectory(src) -add_subdirectory(tools) -add_subdirectory(scripts) -if(NOT SKIP_COMPILE_APPS) - add_subdirectory(apps) - add_subdirectory(inputs) - add_subdirectory(test) -endif() -if(USE_EXP) - add_subdirectory(exp) -endif() - -###### Documentation ###### -set(DOXYFILE_SOURCE_DIR "src\" \"include") -include(UseDoxygen OPTIONAL) - -###### Distribution ###### -include(InstallRequiredSystemLibraries) -set(CPACK_GENERATOR "TGZ") -set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYRIGHT") -set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README") -set(CPACK_PACKAGE_VERSION_MAJOR ${GALOIS_VERSION_MAJOR}) -set(CPACK_PACKAGE_VERSION_MINOR ${GALOIS_VERSION_MINOR}) -set(CPACK_PACKAGE_VERSION_PATCH ${GALOIS_VERSION_PATCH}) -include(CPack) - -###### Installation ###### - -export(TARGETS galois APPEND FILE "${PROJECT_BINARY_DIR}/GaloisTargets.cmake") -export(PACKAGE Galois) - -# Galois include files -file(RELATIVE_PATH rel_include_dir "${INSTALL_CMAKE_DIR}" "${INSTALL_INCLUDE_DIR}") -set(GALOIS_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/include" "${PROJECT_BINARY_DIR}/include") -if(USE_EXP) - set(GALOIS_INCLUDE_DIR "${GALOIS_INCLUDE_DIR}" "${PROJECT_SOURCE_DIR}/exp/include") -endif() - -# Galois include dependencies -set(GALOIS_INCLUDE_DIRS) -get_target_property(defs galois INCLUDE_DIRECTORIES) -foreach(d ${defs}) - string(FIND ${d} ${PROJECT_BINARY_DIR} pos1) - string(FIND ${d} ${PROJECT_SOURCE_DIR} pos2) - if(NOT ${pos1} EQUAL 0 AND NOT ${pos2} EQUAL 0) - set(GALOIS_INCLUDE_DIRS ${GALOIS_INCLUDE_DIRS} ${d}) - endif() -endforeach() - -# Galois compiler definitions -set(galois_defs) -get_directory_property(defs DIRECTORY "${PROJECT_SOURCE_DIR}/src" COMPILE_DEFINITIONS) -foreach(d ${defs}) - set(galois_defs "-D${d} ${galois_defs}") -endforeach() -get_directory_property(defs DIRECTORY "${PROJECT_SOURCE_DIR}/src" COMPILE_DEFINITIONS_${CMAKE_BUILD_TYPE}) -foreach(d ${defs}) - set(galois_defs "-D${d} ${galois_defs}") -endforeach() -string(TOUPPER ${CMAKE_BUILD_TYPE} upper_build_type) -set(GALOIS_FLAGS "${galois_defs} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${upper_build_type}}") -set(GALOIS_CXX_COMPILER "${CMAKE_CXX_COMPILER}") - -# Generate appropriate CMake files for installation and build trees -configure_file("${PROJECT_SOURCE_DIR}/cmake/Modules/GaloisConfig.cmake.in" - "${PROJECT_BINARY_DIR}/GaloisConfig.cmake" @ONLY) -set(GALOIS_INCLUDE_DIR "\${GALOIS_CMAKE_DIR}/${rel_include_dir}") -configure_file("${PROJECT_SOURCE_DIR}/cmake/Modules/GaloisConfig.cmake.in" - "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/GaloisConfig.cmake" @ONLY) -configure_file("${PROJECT_SOURCE_DIR}/cmake/Modules/GaloisConfigVersion.cmake.in" - "${PROJECT_BINARY_DIR}/GaloisConfigVersion.cmake" @ONLY) -install(FILES - "${PROJECT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/GaloisConfig.cmake" - "${PROJECT_BINARY_DIR}/GaloisConfigVersion.cmake" - DESTINATION "${INSTALL_CMAKE_DIR}" COMPONENT dev) -install(EXPORT GaloisTargets DESTINATION "${INSTALL_CMAKE_DIR}" COMPONENT dev) diff --git a/maxflow/galois/COPYRIGHT b/maxflow/galois/COPYRIGHT deleted file mode 100644 index 1b9283d..0000000 --- a/maxflow/galois/COPYRIGHT +++ /dev/null @@ -1,26 +0,0 @@ -Galois, a framework to exploit amorphous data-parallelism in irregular -programs. - -Copyright (C) 2014, The University of Texas at Austin. All rights reserved. -UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS SOFTWARE -AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, FITNESS FOR ANY -PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF PERFORMANCE, AND ANY -WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF DEALING OR USAGE OF TRADE. -NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH RESPECT TO THE USE OF THE -SOFTWARE OR DOCUMENTATION. Under no circumstances shall University be liable -for incidental, special, indirect, direct or consequential damages or loss of -profits, interruption of business, or related expenses which may arise from use -of Software or Documentation, including but not limited to those resulting from -defects in Software and/or Documentation, or loss or inaccuracy of data of any -kind. - -This software is released under the terms of the University of Texas at Austin -Research License available at - - http://www.otc.utexas.edu/Forms/ResearchLicense_SourceCode_11142005.doc , - -which makes this software available without charge to anyone for academic, -research, experimental, or personal use. For all other uses, please contact the -University of Texas at Austin's Office of Technology Commercialization - - http://www.otc.utexas.edu/ diff --git a/maxflow/galois/Doxyfile.in b/maxflow/galois/Doxyfile.in deleted file mode 100644 index 3f3e9a2..0000000 --- a/maxflow/galois/Doxyfile.in +++ /dev/null @@ -1,1511 +0,0 @@ -# Doxyfile 1.5.8 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = "@PROJECT_NAME@" - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = "@PROJECT_VERSION@" - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = "@DOXYFILE_OUTPUT_DIR@" - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Farsi, Finnish, French, German, Greek, -# Hungarian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, Polish, -# Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, Slovene, -# Spanish, Swedish, and Ukrainian. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = NO - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = YES - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = NO - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 8 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it parses. -# With this tag you can assign which parser to use for a given extension. -# Doxygen has a built-in mapping, but you can override or extend it using this tag. -# The format is ext=language, where ext is a file extension, and language is one of -# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, -# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), -# use: inc=Fortran f=C - -EXTENSION_MAPPING = - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = YES - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = NO - -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols - -SYMBOL_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = YES - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = YES - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. -# This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command , where is the value of -# the FILE_VERSION_FILTER tag, and is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by -# doxygen. The layout file controls the global structure of the generated output files -# in an output format independent way. The create the layout file that represents -# doxygen's defaults, run doxygen with the -l option. You can optionally specify a -# file name after the option, if omitted DoxygenLayout.xml will be used as the name -# of the layout file. - -LAYOUT_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = YES - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = YES - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = "@DOXYFILE_SOURCE_DIR@" - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = "_darcs" - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = "*/.*" "*/.*/*" - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -#EXAMPLE_PATH = "@CMAKE_CURRENT_SOURCE_DIR@/examples" -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = "@CMAKE_CURRENT_SOURCE_DIR@" - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command , where -# is the value of the INPUT_FILTER tag, and is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. -# If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. -# Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. -# The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. -# Otherwise they will link to the documentation. - -REFERENCES_LINK_SOURCE = YES - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = NO - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = "@DOXYFILE_HTML_DIR@" - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = NO - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. -# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER -# are set, an additional index file will be generated that can be used as input for -# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated -# HTML documentation. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can -# be used to specify the file name of the resulting .qch file. -# The path specified is relative to the HTML output folder. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#namespace - -QHP_NAMESPACE = - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#virtual-folders - -QHP_VIRTUAL_FOLDER = doc - -# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. -# For more information please see -# http://doc.trolltech.com/qthelpproject.html#custom-filters - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see -# Qt Help Project / Custom Filters. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's -# filter section matches. -# Qt Help Project / Filter Attributes. - -QHP_SECT_FILTER_ATTRS = - -# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can -# be used to specify the location of Qt's qhelpgenerator. -# If non-empty doxygen will try to run qhelpgenerator on the generated -# .qhp file. - -QHG_LOCATION = - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to FRAME, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (for instance Mozilla 1.0+, -# Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are -# probably better off using the HTML help feature. Other possible values -# for this tag are: HIERARCHIES, which will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list; -# ALL, which combines the behavior of FRAME and HIERARCHIES; and NONE, which -# disables this behavior completely. For backwards compatibility with previous -# releases of Doxygen, the values YES and NO are equivalent to FRAME and NONE -# respectively. - -GENERATE_TREEVIEW = NONE - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = @DOXYFILE_LATEX@ - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = "@DOXYFILE_LATEX_DIR@" - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = "@LATEX_COMPILER@" - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = "@MAKEINDEX_COMPILER@" - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = amsmath - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = @DOXYFILE_PDFLATEX@ - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = YES - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. -# This is useful -# if you want to understand what is going on. -# On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = @DOXYFILE_DOT@ - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans - -# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. -# The default size is 10pt. - -DOT_FONTSIZE = 10 - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = NO - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = NO - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = "@DOXYGEN_DOT_PATH@" - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not -# seem to support this out of the box. Warning: Depending on the platform used, -# enabling this option may lead to badly anti-aliased labels on the edges of -# a graph (i.e. they become hard to read). - -DOT_TRANSPARENT = YES - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = NO - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES - -#--------------------------------------------------------------------------- -# Options related to the search engine -#--------------------------------------------------------------------------- - -# The SEARCHENGINE tag specifies whether or not a search engine should be -# used. If set to NO the values of all tags below this one will be ignored. - -SEARCHENGINE = NO diff --git a/maxflow/galois/README b/maxflow/galois/README deleted file mode 100644 index cb47287..0000000 --- a/maxflow/galois/README +++ /dev/null @@ -1,4 +0,0 @@ -See online documentation at: - http://iss.ices.utexas.edu/?p=projects/galois/doc/current/getting_started - -See build/readme.txt for build instructions diff --git a/maxflow/galois/apps/CMakeLists.txt b/maxflow/galois/apps/CMakeLists.txt deleted file mode 100644 index 9d3a296..0000000 --- a/maxflow/galois/apps/CMakeLists.txt +++ /dev/null @@ -1,23 +0,0 @@ -if(NOT CMAKE_CXX_COMPILER_ID MATCHES "XL") - add_subdirectory(avi) - add_subdirectory(connectedcomponents) - add_subdirectory(des) - add_subdirectory(kruskal) - add_subdirectory(gmetis) -endif() -add_subdirectory(barneshut) -add_subdirectory(betweennesscentrality) -add_subdirectory(bfs) -add_subdirectory(boruvka) -add_subdirectory(clustering) -add_subdirectory(delaunayrefinement) -add_subdirectory(delaunaytriangulation) -add_subdirectory(independentset) -add_subdirectory(matching) -add_subdirectory(pagerank) -add_subdirectory(pta) -add_subdirectory(preflowpush) -add_subdirectory(sssp) -add_subdirectory(surveypropagation) -add_subdirectory(spanningtree) -add_subdirectory(tutorial) diff --git a/maxflow/galois/apps/avi/CMakeLists.txt b/maxflow/galois/apps/avi/CMakeLists.txt deleted file mode 100644 index 1259093..0000000 --- a/maxflow/galois/apps/avi/CMakeLists.txt +++ /dev/null @@ -1,94 +0,0 @@ -file(GLOB Sources - util/*.cpp - libMat/*.cpp - libElm/libGeom/*.cpp - libElm/libQuad/*.cpp - libElm/libShape/*.cpp - libElm/libShapesEvaluated/*.cpp - libElm/libElement/*.cpp - libElm/*.cpp - libElOp/*.cpp - libAVI/*.cpp - libMeshInit/femap/*.cpp - libMeshInit/dgmechanics/*.cpp - libMeshInit/*.cpp - dummy.f90 -) - -find_package(Fortran) -if(Fortran_FOUND) - enable_language(Fortran OPTIONAL) -endif() - -find_package(LAPACK) -find_package(ZLIB) -# TODO(ddn): CMake Boost module on BG/Q cannot be called multiple times. Skip -# avi althogether for now. -if(CMAKE_SYSTEM_NAME MATCHES "BlueGeneQ") - set(Boost_FOUND off) -else() - # Needs a slightly higher version than other Galois apps - find_package(Boost 1.50.0 COMPONENTS system iostreams filesystem) -endif() - -if(CMAKE_Fortran_COMPILER_WORKS AND LAPACK_FOUND AND ZLIB_FOUND AND Boost_FOUND) - include_directories(${ZLIB_INCLUDE_DIRS}) - add_library(AVI ${Sources}) - target_link_libraries(AVI ${ZLIB_LIBRARIES}) - target_link_libraries(AVI ${Boost_LIBRARIES}) - set(AVI_FOUND TRUE) -endif() - -app(AVIorderedSerial main/AVIorderedSerial.cpp - REQUIRES AVI_FOUND - EXTLIBS AVI ${LAPACK_LIBRARIES}) - -app(AVIodgOrdered main/AVIodgOrdered.cpp - REQUIRES AVI_FOUND - EXTLIBS AVI ${LAPACK_LIBRARIES}) - -app(AVIodgExplicit main/AVIodgExplicit.cpp - REQUIRES AVI_FOUND - EXTLIBS AVI ${LAPACK_LIBRARIES}) - -app(AVIodgExplicitNoLock main/AVIodgExplicitNoLock.cpp - REQUIRES AVI_FOUND - EXTLIBS AVI ${LAPACK_LIBRARIES}) - -# the files may be removed from the release without causing error in cmake -if(0) - app(AVIodgReadonly exp/AVIodgReadonly.cpp - REQUIRES AVI_FOUND USE_EXP - EXTLIBS AVI ${LAPACK_LIBRARIES}) - - app(AVIodgImplicit exp/AVIodgImplicit.cpp - REQUIRES AVI_FOUND USE_EXP - EXTLIBS AVI ${LAPACK_LIBRARIES}) - - app(AVIodgAutoPriLock exp/AVIodgAutoPriLock.cpp - REQUIRES AVI_FOUND USE_EXP - EXTLIBS AVI ${LAPACK_LIBRARIES}) - - app(AVIodgAutoShare exp/AVIodgAutoShare.cpp - REQUIRES AVI_FOUND USE_EXP - EXTLIBS AVI ${LAPACK_LIBRARIES}) - - app(AVIodgNB exp/AVIodgNB.cpp - REQUIRES AVI_FOUND USE_EXP - EXTLIBS AVI ${LAPACK_LIBRARIES}) -endif() - -include_directories(util) -include_directories(main) -include_directories(libElm) -include_directories(libElm/libQuad) -include_directories(libElm/libGeom) -include_directories(libElm/libShapesEvaluated) -include_directories(libElm/libShape) -include_directories(libElm/libElement) -include_directories(libAVI) -include_directories(libMeshInit) -include_directories(libMeshInit/dgmechanics) -include_directories(libMeshInit/femap) -include_directories(libMat) -include_directories(libElOp) diff --git a/maxflow/galois/apps/avi/dummy.f90 b/maxflow/galois/apps/avi/dummy.f90 deleted file mode 100644 index 3012008..0000000 --- a/maxflow/galois/apps/avi/dummy.f90 +++ /dev/null @@ -1,9 +0,0 @@ -!*************************************************** -! File: Minimal95.F95 -! Language: Fortran 95 -! Description: This program does nothing -!*************************************************** - -PROGRAM Minimal95 - print *, "Hello" -END PROGRAM Minimal95 diff --git a/maxflow/galois/apps/avi/libAVI/AVI.h b/maxflow/galois/apps/avi/libAVI/AVI.h deleted file mode 100644 index 2f902f8..0000000 --- a/maxflow/galois/apps/avi/libAVI/AVI.h +++ /dev/null @@ -1,447 +0,0 @@ -/** - * AVI.h - * DG++ - * - * Created by Adrian Lew on 9/23/08. - * - * Copyright (c) 2008 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef AVI_CLASS -#define AVI_CLASS - -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - - -#include "AuxDefs.h" -#include "ElementalOperation.h" -#include "DiagonalMassForSW.h" -#include "StressWork.h" - -#include "util.h" - -/** - \brief AVI: abstract base class for AVI - - An AVI object consists of: - - 1) A force field \f$f_a(q_1,\ldots,q_N,t)\f$, where \f$q_a\f$ are degrees of freedom, - and \f$f_a\f$ is the force acting on degree of freedom "a".\n - 2) A mass matrix for the degrees of freedom of the potential. \n - 3) A time step \n - 4) The last time at which the force field was updated\n - 5) The last time each degree of freedom was updated\n - - The AVI class assumes that there exists four provided global - arrays, accessed through a LocalToGlobalMap: - - 1) An array Qval with the value of each degree of freedom at its last update, \f$q_a^{i-1}\f$\n - 2) An array Vval with the latest value of the time derivative of the - degrees of freedom at its last update, \f$v_a^{i-1}\f$.\n - 3) An array Vbval with the latest value of the time derivative of the - degree of freedom at the middle of a time step, \f$v_a^{i-1/2}\f$.\n - 4) An array Tval with the last time at which a degree of freedom was updated, \f$t_a^{i-1}\f$. - - Given \f$q_a^{i-1}\f$, \f$v_a^{i-1/2}\f$ and \f$t_a^{i-1}\f$, the class - computes \f$q_a^i\f$, \f$v_a^i\f$ and \f$v_a^{i+1/2}\f$ for each - degree of freedom of the object's force field. It also updates the - values of \f$t_a^i\f$ and the objects latest time. - - The update is as follows: - - If \f$q_a\f$ does not have imposed values \f$q_a(t)\f$ (e.g., Dirichlet bounday conditions), then - - 1) Compute \f$q_a^i = q_a^{i-1} + (t-t_a^i) v_a^{i-1/2}\f$ for each \f$a\f$, where \f$t\f$ is the current time of the object.\n - 2) Solve \f$ M_{ab} \Delta v_b = - \Delta t f_a(q_a^i)\f$ for \f$\Delta v_b\f$, where \f$f_a(t)\f$ is the force on node \f$a\f$ by the force field in the object computed with \f$q_a^i\f$.\n - 3) Compute \f$ v_a^i = v_a^{i-1/2} + \Delta v_b/2\f$\n - 4) Compute \f$ v_a^{i+1/2} = v_a^{i-1/2} + \Delta v_b\f$ - - Else, set \f$q_a^i=q_a(t_a^i)\f$, \f$v_a^i=\dot{q}_a(t_a^i)\f$ and - \f$v_a^{i+1/2}\f$ is defined to any value, it is not guaranteed to - have any meaning. - - The class also has an initialization procedure to compute the - initial values of \f$v_a^{1/2}\f$. - - Following the convention elsewhere in the code, degrees of freedom - are labeled with field number f and degree of freedom number for - that field, b. Even thought it is not the most natural numbering in - the context of ODEs, it simplifies the communication with the rest - of the code. Field and degree of freedom numbers begin from zero. - - The class separates the update process into three different functions: gather, update, assemble. - In this way more flexibility is provided to be able to work with force fields that depend, perhaps, - on sets of internal variables that are not necessarily updated with AVI. - - Imposed values need to be provided separately. A virtual procedure is constructed to this end. - - \todo The update routine does not take further arguments that may not need to be updated but - may be needed to compute the force field, such as when dealing with plasticity or a coupled heat - conduction problem. We will need to deal with this eventually. - - */ - -class AVI { -protected: - double timeStamp; - double timeStep; - -public: - AVI (double timeStamp): timeStamp(timeStamp) { } - virtual ~AVI () { } - virtual AVI * clone () const = 0; - // virtual int getElemIndex(void) const = 0; - - //! Returns the current value of the time step for the potential - double getTimeStep () const { - return timeStep; - } - - //! Returns the last time at which the force field was updated - double getTimeStamp () const { - return timeStamp; - } - - //! set the last update time. Returns true upon success. \n - //! - //! @param timeval value of the last update time - bool setTimeStamp (double timeval) { - assert (timeval >= 0.0); - timeStamp = timeval; - return true; - } - - //! Returns the next time at which the force field will be updated - double getNextTimeStamp () const { - return getTimeStamp() + getTimeStep(); - } - - //! increment the time stamp - void incTimeStamp () { - setTimeStamp(getNextTimeStamp()); - } - - //! Returns the field numbers used whose degrees of freedom participate in the force field computation - //! - //! getFields()[i] returns the field number beginning from zero.\n - //! If the degree of freedom \f$q_a\f$ is indexed with [f][b] locally, then it corresponds to field - //! getFields()[f] in the Global arrays. - virtual const std::vector& getFields () const = 0; - - //! Returns the number of degrees of freedom per field used - //! for the computation \n - //! - //! getFieldDof(fieldnum) returns the number of deegrees of freedom - //! in the participating field number "fieldnumber". The argument - //! fieldnumber begins from zero.\n - // - virtual size_t getFieldDof (size_t fieldnumber) const = 0; - - //! Returns the global element index for the AVI object - virtual size_t getGlobalIndex (void) const = 0; - - virtual const DResidue& getOperation () const = 0; - - virtual const Element& getElement () const = 0; - - virtual const ElementGeometry& getGeometry () const = 0; - - //! write the updated time vector into the argument provided - //! value filled in is the one obtained from getNextTimeStamp () - //! @param tnew - virtual void computeLocalTvec (MatDouble& tnew) const = 0; - - //! Initialization routine to set values at the half time step - //! This function is called only once per element and is called after gather - //! and before update and assemble. - //! @param q used to calculate the velocity at the half step. - //! local vector with the last known values of the degrees of freedom, \f$q_a^{i-1}\f$ in - //! the notation above. \n - //! @param v previous values of the time derivatives of velocity - //! @param vb previous values of the time derivatives at the - //! middle of a time step - //! of the degrees of freedom, \f$v_a^{i-1/2}\f$ in the notation above.\n - //! @param ti local vector with the last update time of a the degrees of freedom, - //! \f$t_a^{i-1}\f$ in the notation above.\n - //! - //! @param tnew updated local time vector @see AVI::computeLocalTvec () - //! @param qnew updated value of q vector - //! @param vbinit initialized value of vb vector - //! @param forcefield temporary intermediate vector - //! @param funcval temporary intermediate vector - //! @param deltaV temporary intermediate vector - //! For example, q[f][b] = Q[L2G.map(ElementIndex,getFields()[f],b)] - //! - //! This function uses getImposedValues to fill the imposed values arrays. - - virtual bool vbInit ( - const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vbinit, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const = 0; - - //! Computes the value of the force field at a given configuration. Returns true upon success. - //! - //! @param argval values of the degrees of freedom for the force field of the object. - //! argval[f][a] contains the value of the degree of freedom indexed with field "f" - //! and degree of freedom number in that field "a". - //! - //! @param forcefield values of \f$f_b\f$ upon exit. forcefield[f][a] contains the value - //! of \f$f_b\f$ for the degree of freedom indexed with field "f" and degree of - //! freedom number in that field "a". - virtual bool - getForceField (const MatDouble& argval, MatDouble& forcefield) const = 0; - - //! update of the given forcefield. It returns the new values for the degrees of freedom - //! and its time derivatives. These need yet to be assembled. Returns true upon success. - //! - //! The forcefield time is updated. Of course, this does not happen with the last update time of each - //! degree of freedom, which are updated in the assemble part. - //! - //! In the following vectors, index [f][b] indicates the value for the "b"-th degree of freedom - //! of field "f". - //! - //! @param q vector with the last known values of the degrees of freedom, \f$q_a^{i-1}\f$ in - //! the notation above. \n - //! @param v vector with the last known values of the time derivatives of the degrees of freedom, - //! \f$v_a^{i-1}\f$ in the notation above.\n - //! @param vb vector with the last known values of the time derivatives at the middle of a time step - //! of the degrees of freedom, \f$v_a^{i-1/2}\f$ in the notation above.\n - //! @param ti vector with the last update time of a the degrees of freedom, \f$t_a^{i-1}\f$ in - //! the notation above.\n - //! @param tnew vector with the updated time of a the degrees of freedom, \f$t_a^{i}\f$ in - //! @param qnew Upon exit, new values of the degrees of freedom, \f$q_a^{i}\f$ in - //! the notation above.\n - //! @param vnew Upon exit, new values of the time derivatives of the degrees of freedom, - //! \f$v_a^{i}\f$ in the notation above.\n - //! @param vbnew Upon exit, new values of the time derivatives at the middle of a time step - //! of the degrees of freedom, \f$v_a^{i+1/2}\f$ in the notation above.\n - //! - //! @param forcefield temporary intermediate vector - //! @param funcval temporary intermediate vector - //! @param deltaV temporary intermediate vector - //! - - - virtual bool update ( - const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vnew, - MatDouble& vbnew, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const = 0; - - //! Gathers the values needed from the global arrays to perform the force field computation. - //! - //! - //! identify its global degrees of freedom. This information is not embedded in the object. \n - //! @param L2G Local to Global map used to find the values in the Global arrays\n - //! @param Qval Global array with the last updated values of the degrees of freedom.\n - //! @param Vval Global array with the last updated values of the time derivatives of the - //! degrees of freedom.\n - //! @param Vbval Global array with the last updated values of the time derivatives of the - //! degrees of freedom at the middle of the time step.\n - //! @param Tval Global array with the times at which the degrees of freedom were last updated.\n - //! @param q Upon exit, local vector with the last known values of the degrees of freedom, \f$q_a^{i-1}\f$ in - //! the notation above. \n - //! @param v Upon exit, local vector with the last known values of the time derivatives of the - //! degrees of freedom, - //! \f$v_a^{i-1}\f$ in the notation above.\n - //! @param vb Upon exit, local vector with the last known values of the time derivatives at the - //! middle of a time step - //! of the degrees of freedom, \f$v_a^{i-1/2}\f$ in the notation above.\n - //! @param ti Upon exit, local vector with the last update time of a the degrees of freedom, - //! \f$t_a^{i-1}\f$ in the notation above.\n - //! - //! For example, q[f][b] = Q[L2G.map(ElementIndex,getFields()[f],b)] - //! - //! This function uses getImposedValues to fill the imposed values arrays. - virtual bool - gather ( const LocalToGlobalMap& L2G, - const VecDouble& Qval, - const VecDouble& Vval, - const VecDouble& Vbval, - const VecDouble& Tval, - MatDouble& q, - MatDouble& v, - MatDouble& vb, - MatDouble& ti) const = 0; - - //! Assembles the updated values in the global array, including the latest time of update of the - //! degrees of freedom in the object. - //! - //! - //! identify its global degrees of freedom. This information is not embedded in the object. \n - //! @param L2G Local to Global map used to find the values in the Global arrays\n - //! @param qnew local vector with the updated values of the degrees of freedom, \f$q_a^{i}\f$ in - //! the notation above. \n - //! @param vnew Upon exit, local vector with the updated values of the time derivatives of the - //! degrees of freedom, - //! \f$v_a^{i}\f$ in the notation above.\n - //! @param vbnew Upon exit, local vector with the updated values of the time derivatives at the - //! middle of a time step - //! of the degrees of freedom, \f$v_a^{i+1/2}\f$ in the notation above.\n - //! @param tnew updated values of time vector - //! @param Qval Global array where to assemble the updated values of the degrees of freedom.\n - //! @param Vval Global array where to assemble the time derivatives of the - //! degrees of freedom.\n - //! @param Vbval Global array where to assemble the time derivatives of the - //! degrees of freedom at the middle of the time step.\n - //! @param Tval Global array where to assemble the times at which the degrees of freedom were last - //! updated.\n - //! @param LUpdate Global array to keep track of which element updated the Dof last. This is used - //! to keep Dofs from being updated out of order due to messaging delays. contains the global - //! elem index of the last element to update each Dof. - //! - //! For example, Q[L2G.map(ElementIndex,getFields()[f],b)]=q[f][b] - //! - virtual bool assemble (const LocalToGlobalMap& L2G, - const MatDouble& qnew, - const MatDouble& vnew, - const MatDouble& vbnew, - const MatDouble& tnew, - VecDouble& Qval, - VecDouble& Vval, - VecDouble& Vbval, - VecDouble& Tval, - VecDouble& LUpdate) const = 0; - -protected: - virtual void setTimeStep (double epsilon = 1.0) = 0; - - //! Solves the system of equations with the mass matrix for \f$\Delta v_b\f$. \n - //! This is made a virtual function so that, if the mass matrix happens to be - //! diagonal, it is done efficiently. - //! - //! @param funcval values of \f$\Delta t f_a(q_a^i)\f$. - //! Notice that the multiplication by \f$\Delta t\f$ is already included. - //! funcval[f][b] contains the value of \f$\Delta t f_a(q_a^i)\f$ for the degree of - //! freedom indexed with field "f" and degree of freedom number in that field "b". - //! - //! @param DeltaV values of \f$\Delta v_a\f$ upon exit. DeltaV[f][b] contains the value - //! of \f$\Delta v_a\f$ for the degree of freedom indexed with field "f" and degree of - //! freedom number in that field "b". - - virtual void - computeDeltaV (const MatDouble& funcval, MatDouble& DeltaV) const = 0; - - //! Imposed values of the degrees of freedom and its time derivative - //! - //! @param ElementIndex GlobalElementIndex or index of the force field in the AVI object, used to - //! identify its global degrees of freedom. This information is not embedded in the object. \n - //! @param L2G Local to Global map used to find the values in the Global arrays\n - //! @param field field number for which the imposed values are sought, starting from zero.\n - //! @param dof degree of freedom number within field "field" for which the imposed values are sought, - //! starting from zero.\n - //! @param qvalue upon exit, imposed value of the degree of freedom\n - //! @param vvalue upon exit, imposed value of the time derivative of the degree of freedom\n - //! - //! The values of ElementIndex and L2G are not always needed, but they are included here to provide - //! a general interface. - //! - //! \todo There has to be a cleaner way to deal with the value of the boundary condition other - //! than providing a general interface to code them in the derived classes. - virtual bool getImposedValues (const GlobalElementIndex& ElementIndex, - const LocalToGlobalMap& L2G, size_t field, size_t dof, - double& qvalue, double& vvalue) const = 0; - -public: - //! @return string representation for printing debugging etc - virtual const std::string toString () const { - std::ostringstream ss; - ss << "AVI(id: " << getGlobalIndex() << ", " << getNextTimeStamp() << " )"; - return ss.str (); - } - - //! @return for use with std::ostream - friend std::ostream& operator << (std::ostream& out, const AVI& avi) { - out << avi.toString (); - return out; - } -}; - - -/** - * A comparator class for comparing two AVI objects - * according to their time stamps - */ -struct AVIComparator { -// static const double EPS = 1e-12; - - //! tie break comparison - //! @param left pointer to first AVI object - //! @param right pointer to second AVI object - static inline int compare (const AVI* const left, const AVI* const right) { - int result = DoubleComparator::compare (left->getNextTimeStamp (), right->getNextTimeStamp ()); - - if (result == 0) { - result = left->getGlobalIndex() - right->getGlobalIndex(); - } - - return result; - } - - //! return true if left < right - //! @param left pointer to first AVI object - //! @param right pointer to second AVI object - bool operator () (const AVI* const left, const AVI* const right) const { - return compare (left, right) < 0; - } -}; - -//! since C++ priority_queue is a max heap, this -//! comparator allows using C++ priority_queue as a -//! min heap by inverting the comparison -struct AVIReverseComparator: public AVIComparator { - //! @returns true if left > right - //! @param left pointer to first AVI object - //! @param right pointer to second AVI object - bool operator () (const AVI* const left, const AVI* const right) const { - return compare (left, right) > 0; - } -}; - - -#endif diff --git a/maxflow/galois/apps/avi/libAVI/StandardAVI.cpp b/maxflow/galois/apps/avi/libAVI/StandardAVI.cpp deleted file mode 100644 index 6c3b5bb..0000000 --- a/maxflow/galois/apps/avi/libAVI/StandardAVI.cpp +++ /dev/null @@ -1,437 +0,0 @@ -/* - * AVI.h - * DG++ - * - * Created by Mark Potts on 3/25/09. - * - * Copyright (c) 2009 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "StandardAVI.h" - -#include "util.h" - - - -bool StandardAVI::gather ( const LocalToGlobalMap& L2G, const VecDouble& Qval, const VecDouble& Vval, - const VecDouble& Vbval, const VecDouble& Tval, - MatDouble& q, - MatDouble& v, - MatDouble& vb, - MatDouble& ti) const { - - // double *Q, *V, *Vb, *T; -// - // VecGetArray (Qval,& Q); - // VecGetArray (Vval,& V); - // VecGetArray (Vbval,& Vb); - // VecGetArray (Tval,& T); - - - if (q.size () < nfields) - q.resize (nfields); - if (v.size () < nfields) - v.resize (nfields); - if (vb.size () < nfields) - vb.resize (nfields); - if (ti.size () < nfields) - ti.resize (nfields); - - for (size_t f = 0; f < nfields; f++) { - size_t fieldDof = getFieldDof (f); - if (q[f].size () < fieldDof) - q[f].resize (fieldDof); - - if (v[f].size () < fieldDof) - v[f].resize (fieldDof); - - if (vb[f].size () < fieldDof) - vb[f].resize (fieldDof); - - if (ti[f].size () < fieldDof) - ti[f].resize (fieldDof); - - for (size_t a = 0; a < fieldDof; a++) { - size_t index = L2G.map (f, a, this->globalIdx); - q[f][a] = Qval[index]; - v[f][a] = Vval[index]; - vb[f][a] = Vbval[index]; - ti[f][a] = Tval[index]; - } - } - - // VecRestoreArray (Qval,& Q); - // VecRestoreArray (Vval,& V); - // VecRestoreArray (Vbval,& Vb); - // VecRestoreArray (Tval,& T); - - - return (true); -} - -void StandardAVI::computeLocalTvec (MatDouble& tnew) const { - assert (tnew.size () == nfields); - - for (size_t f = 0; f < nfields; f++) { - size_t fieldDof = getFieldDof (f); - assert (tnew[f].size () == fieldDof); - - for (size_t a = 0; a < fieldDof; a++) { - tnew[f][a] = getNextTimeStamp (); - } - } -} - - -bool StandardAVI::assemble (const LocalToGlobalMap& L2G, - const MatDouble& qnew, - const MatDouble& vnew, - const MatDouble& vbnew, - const MatDouble& tnew, - VecDouble& Qval, VecDouble& Vval, - VecDouble& Vbval, VecDouble& Tval, VecDouble& LUpdate) const { - - // double * resvals = new double[localsize]; - // double * vvals = new double[localsize]; - // double * vbvals = new double[localsize]; - // double * tvals = new double[localsize]; - - // size_t * indices = new size_t[localsize]; - // double * updates = new double[localsize]; - - - for (size_t f = 0, i = 0; f < nfields; f++) { - for (size_t a = 0; a < getFieldDof (f); a++, i++) { - size_t index = L2G.map (f, a, globalIdx); - Qval[index] = qnew[f][a]; - Vval[index] = vnew[f][a]; - Vbval[index] = vbnew[f][a]; - Tval[index] = tnew[f][a]; - LUpdate[index] = (double) globalIdx; - } - } - - // VecSetValues (*Qval, localsize, indices, resvals, INSERT_VALUES); - // VecSetValues (*Vval, localsize, indices, vvals, INSERT_VALUES); - // VecSetValues (*Vbval, localsize, indices, vbvals, INSERT_VALUES); - // VecSetValues (*Tval, localsize, indices, tvals, INSERT_VALUES); - // VecSetValues (*LUpdate, localsize, indices, updates, INSERT_VALUES); - - // delete[] resvals; - // delete[] vvals; - // delete[] vbvals; - // delete[] tvals; - // delete[] indices; - // delete[] updates; - - return (true); -} - -bool StandardAVI::update (const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vnew, - MatDouble& vbnew, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const { - - for (size_t f = 0; f < nfields; f++) { - for (size_t a = 0; a < getFieldDof (f); a++) { -#if 0 - if(imposedFlags[f][a] == true) { - qnew[f][a]=imposedValues[f][a]; - } - else { - qnew[f][a]=q[f][a] + (getNextTimeStamp() - ti[f][a]) * vb[f][a]; - } -#else - if (imposedTypes[f][a] == ONE) { - double t = ti[f][a]; - qnew[f][a] = ((avi_bc_func)[a]) (f, a, t); - } - else { - qnew[f][a] = q[f][a] + (tnew[f][a] - ti[f][a]) * vb[f][a]; - } -#endif - } - } - - // MatDouble funcval (nfields); - getForceField (qnew, forcefield); - - if (funcval.size () != nfields) { - funcval.resize (nfields); - } - - for (size_t f = 0; f < nfields; f++) { - size_t fieldDof = getFieldDof (f); - - if (funcval[f].size () != fieldDof) { - funcval[f].resize (fieldDof); - } - - for (size_t a = 0; a < fieldDof; a++) { - funcval[f][a] = -(getTimeStep ()) * (forcefield)[f][a]; - } - } - - // MatDouble DeltaV; - computeDeltaV (funcval, deltaV); - - for (size_t f = 0; f < nfields; f++) { - for (size_t a = 0; a < getFieldDof (f); a++) { - if (imposedTypes[f][a] == ZERO) { - vnew[f][a] = vb[f][a] + deltaV[f][a] / 2.0; - vbnew[f][a] = vb[f][a] + deltaV[f][a]; - } - else if (imposedTypes[f][a] == ONE) { - vnew[f][a] = 0.0; - vbnew[f][a] = 0.0; - } - else if (imposedTypes[f][a] == TWO) { - double t = ti[f][a]; - vnew[f][a] = ((avi_bc_func)[a]) (f, a, t); - vbnew[f][a] = ((avi_bc_func)[a]) (f, a, t); - } - } - } - - // XXX (amber) Commented, must make explicit call to incTimeStamp in main loop - // setTimeStamp (getNextTimeStamp ()); - - return (true); -} - -bool StandardAVI::vbInit ( - const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vbinit, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const { - - // MatDouble qnew; - - // qnew.resize ((q).size ()); - - assert (qnew.size () == q.size ()); - - for (size_t f = 0; f < nfields; f++) { - - assert (qnew[f].size () == q[f].size ()); - - for (size_t a = 0; a < getFieldDof (f); a++) { - if (imposedFlags[f][a] == true) { - qnew[f][a] = imposedValues[f][a]; - } - else { - qnew[f][a] = (q)[f][a] + (tnew[f][a] - ti[f][a]) / 2.0 * vb[f][a]; - } - } - } - - getForceField ((qnew), forcefield); - -#ifdef DEBUG - std::cerr << "forcefield = "; - for (size_t f = 0; f < nfields; ++f) { - printIter(std::cerr, forcefield[f].begin(), forcefield[f].end()); - } -#endif - - // MatDouble funcval (nfields); - if (funcval.size () != nfields) { - funcval.resize (nfields); - } - - for (size_t f = 0; f < nfields; f++) { - size_t fieldDof = getFieldDof (f); - - if (funcval[f].size () != fieldDof) { - funcval[f].resize (fieldDof); - } - - for (size_t a = 0; a < fieldDof; a++) { - funcval[f][a] = -(getTimeStep ()) * (forcefield)[f][a]; - } - } - - // MatDouble DeltaV; - computeDeltaV (funcval, deltaV); - -#ifdef DEBUG - std::cerr << "funcval = "; - for (size_t f = 0; f < nfields; ++f) { - printIter(std::cerr, funcval[f].begin(), funcval[f].end()); - } - std::cerr << "DeltaV = "; - for (size_t f = 0; f < nfields; ++f) { - printIter(std::cerr, DeltaV[f].begin(), DeltaV[f].end()); - } -#endif - - for (size_t f = 0; f < nfields; f++) { - for (size_t a = 0; a < getFieldDof (f); a++) { - vbinit[f][a] = (vb[f][a] + deltaV[f][a] / 2.0); - } - } - return (true); -} - - -void StandardAVI::computeDeltaV(const MatDouble& funcval, - MatDouble& DeltaV) const { - - if(DeltaV.size()getElement().getGeometry().getConnectivity()[i]); - } - printf("\n"); - for(size_t i = 0; i < MMdiag.size(); i++) { - for(size_t j = 0; j < MMdiag[i].size(); j++) - printf("%e ",MMdiag[i][j]); - printf("\n"); - } - }; - - */ - - - diff --git a/maxflow/galois/apps/avi/libAVI/StandardAVI.h b/maxflow/galois/apps/avi/libAVI/StandardAVI.h deleted file mode 100644 index 6762c75..0000000 --- a/maxflow/galois/apps/avi/libAVI/StandardAVI.h +++ /dev/null @@ -1,349 +0,0 @@ -/** - * StandardAVI.h - * DG++ - * - * Created by Adrian Lew on 9/23/08. - * - * Copyright (c) 2008 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _STANDARD_AVI_H_ -#define _STANDARD_AVI_H_ - -#include "AVI.h" - -/** \brief StandardAVI class implementation of AVI base class - - */ - -class StandardAVI: public AVI { -public: - //! function used for obtaining value of boundary conditions - typedef double (*BCFunc) (int, int, double); - - //! type of boundary conditions imposed - //! ZERO means no boundary conditions - enum BCImposedType { - ZERO, ONE, TWO - }; - - //! StandardAVI constructor designed to handle integration of an individual element - //! - //! @param L2G local to global map for access to Petsc vectors - //! @param MyRes ref to a Stresswork for this element - //! @param MassVec ref to the assembled mass vector for the mesh - //! @param globalIdx is the Element's unique global index - //! @param IFlag is a ref to the Boundary values indicator - //! @param IVal is a ref to the actual values of boundary conditions, if IFlag is not false - //! @param delta is a double that is used as a safety factor in computing the times step for an element - //! @param time allows the AVI object's time to be set to some value - //! - - StandardAVI( - const LocalToGlobalMap& L2G, - const DResidue& MyRes, - const VecDouble& MassVec, - size_t globalIdx, - const MatBool& IFlag, - const MatDouble& IVal, - const double delta, - const double time) - - : AVI (time), - operation (MyRes), - globalIdx (globalIdx), - imposedFlags (IFlag), - imposedValues (IVal), - delta (delta) { - - init(L2G, MassVec); - - - imposedTypes = std::vector< std::vector > (IFlag.size (), std::vector ()); - - for (size_t f = 0; f < IFlag.size (); ++f) { - - for (size_t a = 0; a < IFlag[f].size (); ++f) { - if (!IFlag[f][a]) { - imposedTypes[f].push_back (StandardAVI::ONE); - - } else { - imposedTypes[f].push_back (StandardAVI::ZERO); - } - } - } - } - - //! StandardAVI constructor designed to handle integration of an individual element - //! - //! @param MyRes ref to a Stresswork for this element - //! @param MassVec ref to the assembled mass vector for the mesh - //! @param L2G local to global map for access to Petsc vectors - //! @param globalIdx is the Element's unique global index - //! @param IType is a vector containing types of imposed boundary conditons @see StandardAVI::BCImposedType - //! @param bcfunc_vec is a vector of function pointers used to obtain the value of boundary conditions - //! @param delta is a double that is used as a safety factor in computing the times step for an element - //! @param time allows the AVI object's time to be set to some value - //! - StandardAVI( - const LocalToGlobalMap& L2G, - const DResidue& MyRes, - const VecDouble& MassVec, - size_t globalIdx, - const std::vector >& IType, - const std::vector& bcfunc_vec, - const double delta, - const double time) - - : AVI (time), - operation (MyRes), - globalIdx (globalIdx), - imposedTypes (IType), - delta (delta) { - - init (L2G, MassVec); - - - if(imposedFlags.size() != IType.size()) { - imposedFlags.resize(IType.size()); - } - - if(imposedValues.size() != IType.size()) { - imposedValues.resize(IType.size()); - } - - if(imposedTypes.size() != IType.size()) { - imposedTypes.resize(IType.size()); - } - - for(size_t f = 0;f < IType.size();f++){ - if(imposedFlags[f].size() != IType[f].size()) { - imposedFlags[f].resize(IType[f].size()); - } - - if(imposedValues[f].size() != IType[f].size()) { - imposedValues[f].resize(IType[f].size()); - } - - if(imposedTypes[f].size() != IType[f].size()) { - imposedTypes[f].resize(IType[f].size()); - } - - for(size_t a = 0;a < IType[f].size();a++){ - if(IType[f][a] != StandardAVI::ZERO){ - imposedFlags[f][a] = false; - }else{ - imposedFlags[f][a] = true; - } - imposedValues[f][a] = 0.0; - imposedTypes[f][a] = IType[f][a]; - } - - } - - for(size_t a = 0;a < bcfunc_vec.size();a++){ - avi_bc_func.push_back(bcfunc_vec[a]); - } - } - - - //! Copy constructor - StandardAVI (const StandardAVI& that) : - AVI (that), - operation (that.operation), - MMdiag (that.MMdiag), - globalIdx (that.globalIdx), - avi_bc_func (that.avi_bc_func), - imposedTypes (that.imposedTypes), - imposedFlags (that.imposedFlags), - imposedValues (that.imposedValues), - nfields (that.nfields), - delta (that.delta) { - - setTimeStep (); - } - - virtual StandardAVI* clone () const { - return new StandardAVI (*this); - } - - virtual const DResidue& getOperation () const { return operation; } - - size_t getFieldDof (size_t fieldnumber) const { - return operation.getFieldDof (fieldnumber); - } - - const std::vector& getFields () const { - return operation.getFields (); - } - - //! returns the element geometry - const ElementGeometry& getGeometry () const { - return operation.getElement ().getGeometry (); - } - - //! returns the element - const Element& getElement () const { - return operation.getElement (); - } - - //! Updates the force field through the operation Stresswork class - bool getForceField (const MatDouble& argval, MatDouble& forcefield) const { - operation.getVal (argval, forcefield); - return (true); - } - - - size_t getGlobalIndex (void) const { - return (globalIdx); - } - - //! write the updated time vector into the argument provided - //! value filled in is the one obtained from getNextTimeStamp () - virtual void computeLocalTvec (MatDouble& tnew) const; - - virtual bool vbInit ( - const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vbinit, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const; - - - virtual bool update (const MatDouble& q, - const MatDouble& v, - const MatDouble& vb, - const MatDouble& ti, - const MatDouble& tnew, - MatDouble& qnew, - MatDouble& vnew, - MatDouble& vbnew, - MatDouble& forcefield, - MatDouble& funcval, - MatDouble& deltaV - ) const ; - - bool gather ( - const LocalToGlobalMap& L2G, - const VecDouble& Qval, - const VecDouble& Vval, - const VecDouble& Vbval, - const VecDouble& Tval, - MatDouble& q, - MatDouble& v, - MatDouble& vb, - MatDouble& ti - ) const; - - bool assemble (const LocalToGlobalMap& L2G, - const MatDouble& qnew, - const MatDouble& vnew, - const MatDouble& vbnew, - const MatDouble& tnew, - VecDouble& Qval, - VecDouble& Vval, - VecDouble& Vbval, - VecDouble& Tval, - VecDouble& LUpdate) const; - -protected: - //! Sets the time step for the element based upon the element geometry and sound speed. The safety factor, delta, is set - //! during the construction of the AVI object. The optional parameter, epsilon, allows the time step to be adjusted further. - //! @param epsilon: optional parameter which allows the time step to be set to a fraction of its normal value. - virtual void setTimeStep (double epsilon = 1.0) { - timeStep = epsilon * delta * (operation.getElement ().getGeometry ().getInRadius ()) / operation.getMaterial ().getSoundSpeed (); - - } - - virtual void computeDeltaV (const MatDouble& funcval, MatDouble& DeltaV) const; - - virtual bool getImposedValues (const GlobalElementIndex& ElementIndex, - const LocalToGlobalMap& L2G, size_t field, size_t dof, - double& qvalue, double& vvalue) const; - - //! Option to round the time step in order to address round-off error. Not currently used - //! @param min_ts -- the smallest time step in mesh. Every time step will be rounded to a value 3 significant digits - //! smaller than the min_ts - void roundTimeStep (double min_ts) { - char val[80], val2[80]; - sprintf (val, "%0.3e", min_ts / 1000.); - double cut_off = strtod (val, NULL); - strncpy (val2, val, 5); - cut_off = cut_off / strtod (val2, NULL); - timeStep = floor (timeStep / cut_off) * cut_off; - } - - -private: - // disabling assignment - StandardAVI& operator = (const StandardAVI& that) { - return *this; - } - - void init(const LocalToGlobalMap& L2G, const VecDouble& MassVec) { - nfields = operation.getFields().size(); - setTimeStep(); - setDiagVals(MassVec, L2G, globalIdx); //set the diagonal values of the Mass Matrix - } - - - //! This function sets the Mass Matrix diagonal values for the element after they have - //! been computed using DiagonalMassForSW and place into the petsc vector MassVec - //! @param MassVec -- input in the form of petsc vector computed using DiagonalMassForSW - //! @param L2G -- localtoglobal map for petsc vectors - //! @param elem_index -- needed for proper indexing into the L2G map contains the element index locally - void setDiagVals (const VecDouble& MassVec, const LocalToGlobalMap& L2G, - const GlobalElementIndex& elem_index); - - //! set the boundary conditions on the local element - //! @param IFlag is set to true for each bc that is imposed - //! \todo --Modify this to reflect changes to constructor - //! @param IVal is set to value for each bc that is imposed - void setBCs (const MatBool&IFlag, - const MatDouble& IVal); - - - - - - const DResidue& operation; - MatDouble MMdiag; - - size_t globalIdx; - // MatDouble forcefield; - - std::vector avi_bc_func; - std::vector > imposedTypes; - MatBool imposedFlags; - MatDouble imposedValues; - - size_t nfields; - double delta; // safety factor in time step computation -}; - -#endif // _STANDARD_AVI_H_ diff --git a/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.cpp b/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.cpp deleted file mode 100644 index ce10f74..0000000 --- a/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.cpp +++ /dev/null @@ -1,61 +0,0 @@ -// Sriramajayam - -#include "DiagonalMassForSW.h" -#include "Material.h" - -bool DiagonalMassForSW::getVal(const MatDouble &argval, MatDouble& funcval) const { - size_t Dim = fieldsUsed.size(); - - // Assume that all fields use the same quadrature rules - size_t nquad = element.getIntegrationWeights(fieldsUsed[0]).size(); - - std::vector nDof(Dim, 0); // Number of dofs in each field - MatDouble IntWeights(Dim); // Integration weights for each field - MatDouble Shape(Dim); // Shape functions for each field. - - for (size_t f = 0; f < Dim; f++) { - nDof[f] = element.getDof(fieldsUsed[f]); - IntWeights[f] = element.getIntegrationWeights(fieldsUsed[f]); - Shape[f] = element.getShapes(fieldsUsed[f]); - } - - // Resize funcval if required. - if (funcval.size() < fieldsUsed.size()) { - funcval.resize(fieldsUsed.size()); - } - - for (size_t f = 0; f < fieldsUsed.size(); f++) { - if (funcval[f].size() < nDof[f]) { - funcval[f].resize(nDof[f], 0.); - } - else { - for (size_t a = 0; a < nDof[f]; a++) { - funcval[f][a] = 0.; - } - } - } - - for (size_t q = 0; q < nquad; q++) { - VecDouble F(SimpleMaterial::MAT_SIZE, 0.); // F = I in the reference config. - - std::copy(SimpleMaterial::I_MAT, SimpleMaterial::I_MAT + SimpleMaterial::MAT_SIZE, F.begin ()); - -// F[0] = 1.; -// F[4] = 1.; -// F[8] = 1.; - - double Ref_rho = 0.; - if (!material.getLocalMaterialDensity(&F, Ref_rho)) { - std::cerr << "\nDiagonalMassForSW::GetVal()- Could not compute local density.\n"; - return false; - } - - for (size_t f = 0; f < fieldsUsed.size(); f++) { - for (size_t a = 0; a < nDof[f]; a++) { - funcval[f][a] += IntWeights[f][q] * Ref_rho * Shape[f][nDof[f] * q + a]; - } - } - } - - return true; -} diff --git a/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.h b/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.h deleted file mode 100644 index a933bfb..0000000 --- a/maxflow/galois/apps/avi/libElOp/DiagonalMassForSW.h +++ /dev/null @@ -1,95 +0,0 @@ -/** - * DiagonalMassForSW.h - * DG++ - * - * Created by Adrian Lew on 10/24/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -// Sriramajayam - -#ifndef DIAGONALMASSMATRIXFORSW -#define DIAGONALMASSMATRIXFORSW - -#include "AuxDefs.h" -#include "ElementalOperation.h" - -//! \brief Class to compute a diagonal mass matrix for StressWork. -/** This class computes a diagonalized form of the (exact) mass matrix - * \f$ M[f][a][b] = \int_E \rho_0 N_a^f N_b^f. \f$ - * - * Since a diagonal mass matrix is often desired, the entries in each - * row of the exact mass-matrix are lumped together on the diagonal - * entry \f$ M[f][a][a] \f$. - * - * A mass-vector is assembled (instead of a matrix) with each entry - * compued as \f$ M[f][a] = \int_E \rho_0 N_a^f \f$ - * where \f$a\f$ runs over all degrees of freedom for - * field \f$ f \f$. - * - * Keeping this in mind, this class inherits the class Residue to - * assemble the mass vector. It has pointers to the element for which - * the mass matrix is to be computed and the material provided over - * the element. It assumes that there are a minimum of two fields over - * the element with an optional third. It implements the member - * function getVal of Residue to compute the elemental contribution to - * the mass-vector. - * - */ - -class DiagonalMassForSW: public BaseResidue { -public: - - //! Constructor - //! \param IElm Pointer to element over which mass is to be compued. - //! \param SM SimpleMaterial over the element. - //! \param fieldsUsed vector containing ids of fields being computed starting with 0 - inline DiagonalMassForSW (const Element& IElm, const SimpleMaterial &SM, const std::vector& fieldsUsed) - : BaseResidue (IElm, SM, fieldsUsed) { - assert (fieldsUsed.size() > 0 && fieldsUsed.size () <= 3); -} - - //! Destructor - virtual ~DiagonalMassForSW() { - } - - //! Copy constructor - //! \param DMM DiagonalMassForSW object to be copied. - inline DiagonalMassForSW(const DiagonalMassForSW & DMM) : BaseResidue (DMM) { - - } - - //! Cloning mechanism - virtual DiagonalMassForSW * clone() const { - return new DiagonalMassForSW(*this); - } - - - //! Computes the elemental contribution to the mass-vector. - //! \param argval See Residue. It is a dummy argument since integrations are done over the reference. - //! \param funcval See Residue. - bool getVal(const MatDouble &argval, MatDouble& funcval) const; - -}; - -#endif diff --git a/maxflow/galois/apps/avi/libElOp/ElementalOperation.cpp b/maxflow/galois/apps/avi/libElOp/ElementalOperation.cpp deleted file mode 100644 index 0287de6..0000000 --- a/maxflow/galois/apps/avi/libElOp/ElementalOperation.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* - * ElementalOperation.cpp - * DG++ - * - * Created by Adrian Lew on 10/25/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "util.h" -#include "ElementalOperation.h" - -bool DResidue::consistencyTest(const DResidue & DRes, const std::vector & DofPerField, const MatDouble &argval) { - size_t NFields = DRes.getFields().size(); - - MatDouble largval(argval); - MatDouble funcval; - MatDouble funcvalplus; - MatDouble funcvalminus; - - FourDVecDouble dfuncval; - FourDVecDouble dfuncvalnum; - - const double EPS = 1.e-6; - - dfuncvalnum.resize(NFields); - for (size_t f = 0; f < NFields; f++) { - dfuncvalnum[f].resize(DofPerField[f]); - for (size_t a = 0; a < DofPerField[f]; a++) { - dfuncvalnum[f][a].resize(NFields); - for (size_t g = 0; g < NFields; g++) { - dfuncvalnum[f][a][g].resize(DofPerField[g]); - } - } - } - - double maxval = 0; - for (size_t f = 0; f < NFields; f++) { - for (size_t a = 0; a < DofPerField[f]; a++) { - if (maxval > fabs(argval[f][a])) { - maxval = fabs(argval[f][a]); - } - } - } - - maxval += 1; - - for (size_t f = 0; f < NFields; f++) { - for (size_t a = 0; a < DofPerField[f]; a++) { - double ival = largval[f][a]; - - largval[f][a] = ival + EPS * maxval; - DRes.getVal(largval, funcvalplus); - - largval[f][a] = ival - EPS * maxval; - DRes.getVal(largval, funcvalminus); - - largval[f][a] = ival; - - for (size_t g = 0; g < NFields; g++) { - for (size_t b = 0; b < DofPerField[g]; b++) { - dfuncvalnum[g][b][f][a] = (funcvalplus[g][b] - funcvalminus[g][b]) / (2 * EPS * maxval); - } - } - - - } - } - - DRes.getDVal(largval, funcval, dfuncval); - - double error = 0; - double norm = 0; - for (size_t f = 0; f < dfuncval.size(); f++) { - for (size_t a = 0; a < dfuncval[f].size(); a++) { - for (size_t g = 0; g < dfuncval[f][a].size(); g++) { - for (size_t b = 0; b < dfuncval[f][a][g].size(); b++) { - error += pow(dfuncval[f][a][g][b] - dfuncvalnum[f][a][g][b], 2); - norm += pow(dfuncval[f][a][g][b], 2); - } - } - } - } - - error = sqrt(error); - norm = sqrt(norm); - - if (error / norm > EPS * 100) { - std::cerr << "DResidue::ConsistencyTest. DResidue not consistent\n"; - std::cerr << "norm: " << norm << " error: " << error << "\n"; - return false; - } - return true; -} - -enum AssembleMode { - RESIDUE, - DRESIDUE, -}; - -template -static bool _Assemble(std::vector &DResArray, const LocalToGlobalMap & L2G - , const VecDouble & Dofs, VecDouble& ResVec, MatDouble& DResMat, const AssembleMode& mode) { - - // VecZeroEntries(*ResVec); - std::fill (ResVec.begin(), ResVec.end (), 0.0 ); - - if (mode == DRESIDUE) { - // MatZeroEntries(DResMat); - for (MatDouble::iterator i = DResMat.begin (); i != DResMat.end (); ++i) { - std::fill (i->begin(), i->end (), 0.0); - } - } - - MatDouble argval; - MatDouble funcval; - FourDVecDouble dfuncval; - - // double * GDofs; - // VecGetArray(Dofs, &GDofs); - const VecDouble& GDofs = Dofs; - - for (size_t e = 0; e < DResArray.size(); e++) { - const std::vector & DPF = DResArray[e]->getFields(); - size_t localsize = 0; - - if (argval.size() < DPF.size()) { - argval.resize(DPF.size()); - } - - for (size_t f = 0; f < DPF.size(); f++) { - if (argval[f].size() < DResArray[e]->getFieldDof(f)) { - argval[f].resize(DResArray[e]->getFieldDof(f)); - } - - localsize += DResArray[e]->getFieldDof(f); - - for (size_t a = 0; a < DResArray[e]->getFieldDof(f); a++) { - argval[f][a] = GDofs[L2G.map(f, a, e)]; - } - } - - if (mode == DRESIDUE) { - // I am using a dynamic_cast to prevent writing two versions of essentially the - // same code, one for residue and another for dresidue. However, I think that there is - // a flaw in the abstraction, since I cannot apparently do it with polymorphism here. - - DResidue * dr = dynamic_cast (DResArray[e]); - if (dr) { - if (!dr->getDVal(argval, funcval, dfuncval)) { - std::cerr << "ElementalOperation.cpp::Assemble Error in residual computation\n"; - return false; - } - } else { - std::cerr << "ElementalOperation.cpp::Assemble Error. Attempted to compute" - " derivatives of a non-dresidue type\n"; - return false; - } - } else if (!DResArray[e]->getVal(argval, funcval)) { - std::cerr << "ElementalOperation.cpp::Assemble Error in residual computation\n"; - return false; - } - -#ifdef DEBUG - std::cout << "Assemble:: element " << e << std::endl; - for (size_t f = 0; f < DPF.size (); ++f) { - printIter (std::cout, funcval[f].begin(), funcval[f].end()); - } -#endif - - double * resvals = new double[localsize]; - size_t * indices = new size_t[localsize]; - double * dresvals; - - if (mode == DRESIDUE) { - dresvals = new double[localsize * localsize]; - } - - for (size_t f = 0, i = 0, j = 0; f < DPF.size(); f++) { - for (size_t a = 0; a < DResArray[e]->getFieldDof(f); a++, i++) { - resvals[i] = funcval[f][a]; - indices[i] = L2G.map(f, a, e); - - if (mode == DRESIDUE) - for (size_t g = 0; g < DPF.size(); g++) - for (size_t b = 0; b < DResArray[e]->getFieldDof(g); b++, j++) - dresvals[j] = dfuncval[f][a][g][b]; - } - } - - // signature (Vec, size_of_indices, size_t indices[], double[] vals, Mode) - // VecSetValues(*ResVec, localsize, indices, resvals, ADD_VALUES); - for (size_t i = 0; i < localsize; ++i) { - ResVec[indices[i]] += resvals[i]; - } - - if (mode == DRESIDUE) { - // signature (Mat, nrows_of_indices, size_t row_indices[], ncols_of_indices, size_t col_indices[], - // double vals[], Mode) - // algo - // for i in 0..nrows { - // for j in 0..ncols { - // Mat[row_indices[i]][col_indices[j] += or = vals[ncols * i + j]; - - // MatSetValues(*DResMat, localsize, indices, localsize, indices, dresvals, ADD_VALUES); - for (size_t i = 0; i < localsize; ++i) { - for (size_t j = 0; j < localsize; ++j) { - DResMat[ indices[i] ][ indices[j] ] += dresvals [localsize * i + j]; - } - } - } - - delete[] resvals; - delete[] indices; - - if (mode == DRESIDUE) { - delete[] dresvals; - } - } - - // VecRestoreArray(Dofs, &GDofs); - - return true; -} -bool Residue::assemble(std::vector &ResArray, const LocalToGlobalMap & L2G, const VecDouble & Dofs, VecDouble& ResVec) { - MatDouble d; - return _Assemble (ResArray, L2G, Dofs, ResVec, d, RESIDUE); -} - -bool DResidue::assemble(std::vector &DResArray, const LocalToGlobalMap & L2G, const VecDouble & Dofs, VecDouble& ResVec, MatDouble& DResMat) { - return _Assemble (DResArray, L2G, Dofs, ResVec, DResMat, DRESIDUE); -} - diff --git a/maxflow/galois/apps/avi/libElOp/ElementalOperation.h b/maxflow/galois/apps/avi/libElOp/ElementalOperation.h deleted file mode 100644 index 1b34c3c..0000000 --- a/maxflow/galois/apps/avi/libElOp/ElementalOperation.h +++ /dev/null @@ -1,318 +0,0 @@ -/** - * ElementalOperation.h - * DG++ - * - * Created by Adrian Lew on 10/25/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef ELEMENTALOPERATION -#define ELEMENTALOPERATION - -#include -#include - -#include - -#include "AuxDefs.h" -#include "Element.h" -#include "Material.h" - - -/** - \brief Computes a residue on an element - - A Residue object computes the values of a vector function of some or all the - fields in the element, with the distinguishing feature that there is one - component of the function per degree of freedom of the participating fields. - - A Residue object contains two concepts:\n - 1) A procedure to compute the value of the function\n - 2) A pointer to the element over which to compute it\n - - Each residue acts then as a container of a pointer to an element object, - and an operation to perform on that object. In this way the same element can be - used for several operation in a single computation (The alternative of adding - more layers of inheritance to the element classes makes the last possibility very - clumsy). - - Additionally, operation that need higher-levels of specialization, such as special classes - of elements, can perform type-checking in their own implementation. - - The class residue is in fact very similar to a general container, - in the sense that the object it points to does not need to be an - element but can be any object that permits the computation of the - function and can use the (field, dof) notation to label the inputs and - outputs. - - The precise fields to be utilized in the computation of the - operation may vary from element type to element type, hence these - will generally be specified. - - More precisely, a residual is a function - \f[ - F^f_a(u^0_0,u^0_1, \ldots, u^0_{n_0-1}, u^0_1, \ldots, u^{K}_{n_{K-1}-1}), - \f] - where \f$u^f_a\f$ is the a-th degree of freedom of the f-th participating field in - the function. A total of K of the element fields participate as arguments in the - function. The f-th participating field has a total of \f$n_f\f$ degrees of freedom. - The coefficient of the force "f" runs from 0 to K-1, and "a" ia the degree of freedom - index that ranges from 0 to \f$n_f\f$. - - We need to specify specify which field in the element will represent the f-th - participating field in the function F. For instance, the field number 2 in the element - can be used as the first argument of F, i.e., as participating field number 0. - - \todo This class does not accept the input of additional parameters that may - be needed for the evaluation of T that may not be solved for. - - \todo The assembly procedure should probably be changed -*/ -class Residue -{ - public: - Residue() {} - virtual ~Residue() {} - Residue(const Residue &NewEl) {} - virtual Residue * clone() const = 0; - - //! Returns the fields used for the computation of the residue\n - //! - //! getFields()[i] returns the field number beginning from zero.\n - //! The variable \f$u^f_a\f$ is then computed with field getFields()[f] - //! in the element. - virtual const std::vector & getFields() const = 0; - - - //! Returns the number of degrees of freedom per field used - //! for the computation of the residue\n - //! - //! getFieldDof(fieldnum) returns the number of deegrees of freedom - //! in the participating fieldo number "fieldnumber". The argument - //! fieldnumber begins from zero.\n - //! The number of different values of "a" in \f$u^{f}_a\f$ is - //! then computed with field getFieldDof(f) - virtual size_t getFieldDof(size_t fieldnumber) const = 0; - - - //! Returns the value of the residue given the values of the fields. - //! - //! @param argval vector of vector containing the values of the degrees of - //! freedom for each field.\n - //! argval[f][a] contains the value of degree of freedom "a" for participating - //! field "f". - //! - //! @param funcval It returns a vector< vector > with the values of each - //! component of the residual function. We have that \f$F^f_a\f$=funcval[f][a]. - //! The vector funcval is resized and zeroed in getVal. - //! - //! - //! The function returns true if successful, false otherwise. - virtual bool getVal(const MatDouble &argval, MatDouble& funcval ) const = 0; - - virtual const Element& getElement () const = 0; - - virtual const SimpleMaterial& getMaterial () const = 0; - - - //! \brief assemble Residual Vector - //! - //! Assembles the contributions from an Array of residual objects ResArray - //! into ResVec. The contribution from each ResArray, ResArray[e], is mapped - //! into ResVec with a LocalToGlobalMap. - //! - //! - //! @param ResArray Array of residue objects - //! @param L2G LocalToGlobalMap - //! @param Dofs PetscVector with values of degrees of freedom - //! @param ResVec Pointer to a PetscVector where to assemble the residues. - //! ResVec is zeroed in assemble - //! - //! This is precisely what's done:\n - //! 1) assemble input gathered as \n - //! argval[f][a] = Dofs[L2G(f,a)]\n - //! 2) Computation of the local residue funcval as - //! ResArray[i]->getVal(argval, funcval)\n - //! 3) assemble output gathered as\n - //! ResVec[L2G(f,a)] += funcval[f][a]\n - //! - //! Behavior:\n - //! Successful assembly returns true, unsuccessful false - //! - //! \warning: The residue object that computes the contributions of element - //! "e" is required to have position "e" in ResArray as well, so that - //! the LocalToGlobalMap object is used consistently - //! - //! \todo A defect of this implementation is that all fields in the element enter as - //! arguments for Residue and its derivative. It would be good to have the flexibility to extract - //! a subset of the degrees of freedom of the element as the argument to Residue and its - //! derivative. In this way it is possible to act with different elemental operation on - //! different degrees of freedom naturally, i.e., without having to artificially return a Residue - //! that has presumed contributions from all degrees of freedom. - - - static bool assemble(std::vector &ResArray, - const LocalToGlobalMap & L2G, - const VecDouble & Dofs, - VecDouble& ResVec); -}; - - -/** - * Base class for common functionality - */ - -class BaseResidue: public Residue { - -protected: - const Element& element; - const SimpleMaterial& material; - const std::vector& fieldsUsed; - - - BaseResidue (const Element& element, const SimpleMaterial& material, const std::vector& fieldsUsed) - : element (element), material (material), fieldsUsed (fieldsUsed) { - } - - BaseResidue (const BaseResidue& that) : element (that.element), material (that.material) - , fieldsUsed (that.fieldsUsed) { - - } - -public: - virtual const Element& getElement () const { - return element; - } - - virtual const std::vector& getFields () const { - return fieldsUsed; - } - - virtual const SimpleMaterial& getMaterial () const { - return material; - } - - virtual size_t getFieldDof (size_t fieldNum) const { - return element.getDof (fieldsUsed[fieldNum]); - } - -}; - - - - -/** - \brief Computes a residue and its derivative on an element - - See Residue class for an explanation. - - This class just adds a function getDVal that contains a vector - to return the derivative - -*/ - -class DResidue: public BaseResidue { - public: - DResidue (const Element& element, const SimpleMaterial& material, const std::vector& fieldsUsed) - : BaseResidue(element, material, fieldsUsed) {} - - DResidue(const DResidue &NewEl): BaseResidue(NewEl) {} - - virtual DResidue * clone() const = 0; - - - //! Returns the value of the residue and its derivative given the values of the fields. - //! - //! @param argval vector of vector containing the values of the degrees of - //! freedom for each field.\n - //! argval[f][a] contains the value of degree of freedom "a" for participating - //! field "f". - //! - //! @param funcval It returns a vector< vector > with the values of each - //! component of the residual function. We have that \f$F^f_a\f$=funcval[f][a]. - //! The vector funcval is resized and zeroed in getVal. - //! - //! @param dfuncval It returns a vector< vector< vector< vector > > > - //! with the values of each - //! component of the derivative of the residual function. - //! We have that \f$\frac{\partial F^f_a}{\partial u^g_b}\f$=dfuncval[f][a][g][b]. - //! The vector dfuncval is resized and zeroed in getVal. - //! - //! The function returns true if successful, false otherwise. - virtual bool getDVal(const MatDouble& argval, MatDouble& funcval, - FourDVecDouble& dfuncval) const = 0; - - - //! Consistency test for DResidues. - static bool consistencyTest(const DResidue & DRes, - const std::vector & DofPerField, - const MatDouble &argval); - - //! \brief assemble Residual Vector and it Derivative - //! - //! Assembles the contributions from an Array of dresidual objects DResArray - //! into DResVec. The contribution from each DResArray, DResArray[e], is - //! mapped into DResVec with a LocalToGlobalMap. - //! - //! @param DResArray Array of dresidue objects - //! @param L2G LocalToGlobalMap - //! @param Dofs PetscVector with values of degrees of freedom - //! @param ResVec Pointer to a PetscVector where to assemble the dresidues. - //! ResVec is zeroed in assemble - //! @param DResMat Pointer to a PetscVector where to assemble the dresidues. - //! DResMat is zeroed in assemble - //! - //! This is precisely what's done:\n - //! 1) assemble input gathered as \n - //! argval[f][a] = Dofs[L2G(f,a)]\n - //! 2) Computation of the local residue funcval and its derivative dfucnval as - //! DResArray[i]->getVal(argval, funcval, dfuncval)\n - //! 3) assemble output gathered as\n - //! ResVec[L2G(f,a)] += funcval[f][a]\n - //! DResMat[L2G(f,a)][L2G(g,b)] += dfuncval[f][a][g][b] - //! - //! Behavior:\n - //! Successful assembly returns true, unsuccessful false - //! - //! \warning: The residue object that computes the contributions of element - //! "e" is required to have position "e" in DResArray as well, so that - //! the LocalToGlobalMap object is used consistently - //! - //! \todo This structure has to be revised. In the implementation of both - //! assemble functions I had to use a dynamic_cast to prevent writing two - //! versions of essentially the same code, one for residue and another for - // DResidue. This should have been possible through polymorphism. - //! There must be a flaw in the abstraction. - - - static bool assemble(std::vector &DResArray, - const LocalToGlobalMap & L2G, - const VecDouble & Dofs, - VecDouble& ResVec, - MatDouble& DResMat); -}; - - - -#endif - diff --git a/maxflow/galois/apps/avi/libElOp/StressWork.cpp b/maxflow/galois/apps/avi/libElOp/StressWork.cpp deleted file mode 100644 index e93ccf0..0000000 --- a/maxflow/galois/apps/avi/libElOp/StressWork.cpp +++ /dev/null @@ -1,179 +0,0 @@ -/* - * StressWork.cpp - * DG++ - * - * Created by Adrian Lew on 10/25/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#include - -#include "StressWork.h" - -StressWork::PerCPUtmpVecTy StressWork::perCPUtmpVec; - - - - -bool StressWork::getDValIntern (const MatDouble &argval, MatDouble& funcval, FourDVecDouble& dfuncval - , const GetValMode& mode) const { - const size_t Dim = fieldsUsed.size(); - - // XXX: (amber) replaced 3 with NDM, 9 with MAT_SIZE and so on ... - const size_t MAT_SIZE = SimpleMaterial::MAT_SIZE; - const size_t NDM = SimpleMaterial::NDM; - - // We should have the same quadrature points in all fields used - size_t nquad = element.getIntegrationWeights(fieldsUsed[0]).size(); - - - - StressWorkTmpVec& tmpVec = *StressWork::perCPUtmpVec.getLocal (); - tmpVec.adjustSizes (Dim); - - std::vector& nDof = tmpVec.nDof; - std::vector& nDiv = tmpVec.nDiv; - - MatDouble& DShape = tmpVec.DShape; - MatDouble& IntWeights = tmpVec.IntWeights; - - VecDouble& A = tmpVec.A; - VecDouble& F = tmpVec.F; - VecDouble& P = tmpVec.P; - - - for (size_t f = 0; f < Dim; ++f) { - nDof[f] = element.getDof(fieldsUsed[f]); - nDiv[f] = element.getNumDerivatives(fieldsUsed[f]); - - // DShape[f] = element.getDShapes(fieldsUsed[f]); - // do copy instead of assignment to pervent calls to allocator - StressWork::copyVecDouble (element.getDShapes (fieldsUsed[f]), DShape[f]); - - // IntWeights[f] = element.getIntegrationWeights(fieldsUsed[f]); - // do copy instead of assignment to pervent calls to allocator - StressWork::copyVecDouble (element.getIntegrationWeights (fieldsUsed[f]), IntWeights[f]); - } - - if (funcval.size() < Dim) { - funcval.resize(Dim); - } - - for (size_t f = 0; f < Dim; f++) { - if (funcval[f].size() < nDof[f]) { - funcval[f].resize(nDof[f], 0.); - } else { - std::fill(funcval[f].begin(), funcval[f].end(), 0.0); -// for (size_t a = 0; a < nDof[f]; ++a) { -// funcval[f][a] = 0.; -// } - } - - } - - - - if (mode == DVAL) { - - if (dfuncval.size() < Dim) { - dfuncval.resize(Dim); - } - - for (size_t f = 0; f < Dim; ++f) { - if (dfuncval[f].size() < nDof[f]) { - dfuncval[f].resize(nDof[f]); - } - - for (size_t a = 0; a < nDof[f]; ++a) { - if (dfuncval[f][a].size() < Dim) { - dfuncval[f][a].resize(Dim); - } - - for (size_t g = 0; g < Dim; ++g) { - if (dfuncval[f][a][g].size() < nDof[g]) { - dfuncval[f][a][g].resize(nDof[g], 0.); - } else { - for (size_t b = 0; b < nDof[g]; ++b) { - dfuncval[f][a][g][b] = 0.; - } - } - - } // end for - - - } - } - } - - for (size_t q = 0; q < nquad; ++q) { - // Compute gradients - - // F[0] = F[4] = F[8] = 1.; - // F[1] = F[2] = F[3] = F[5] = F[6] = F[7] = 0.; - std::copy (SimpleMaterial::I_MAT, SimpleMaterial::I_MAT + MAT_SIZE, F.begin ()); - - for (size_t f = 0; f < Dim; ++f) { - for (size_t a = 0; a < nDof[f]; ++a) { - for (size_t J = 0; J < nDiv[f]; ++J) { - // double t = argval[f][a] * DShape[f][q * nDof[f] * nDiv[f] + a * nDiv[f] + J]; - double t = DShape[f][q * nDof[f] * nDiv[f] + a * nDiv[f] + J] * argval[f][a]; - F[f * NDM + J] += t; - } - } - } - - if (!material.getConstitutiveResponse(F, P, A, SimpleMaterial::SKIP_TANGENTS)) { - std::cerr << "StressWork.cpp: Error in the constitutive response\n"; - return false; - } - - for (size_t f = 0; f < Dim; ++f) { - for (size_t a = 0; a < nDof[f]; ++a) { - for (size_t J = 0; J < nDiv[f]; ++J) { - funcval[f][a] += IntWeights[f][q] * P[f * NDM + J] * DShape[f][q * nDof[f] * nDiv[f] + a * nDiv[f] + J]; - } - } - } - - - if (mode == DVAL) { - for (size_t f = 0; f < Dim; ++f) { - for (size_t a = 0; a < nDof[f]; ++a) { - for (size_t g = 0; g < Dim; ++g) { - for (size_t b = 0; b < nDof[g]; ++b) { - for (register size_t J = 0; J < nDiv[f]; ++J) { - for (register size_t L = 0; L < nDiv[g]; ++L) { - dfuncval[f][a][g][b] += IntWeights[f][q] * A[f * NDM * MAT_SIZE + J * MAT_SIZE + g * NDM + L] * DShape[f][q * nDof[f] * nDiv[f] + a * nDiv[f] + J] - * DShape[g][q * nDof[g] * nDiv[g] + b * nDiv[g] + L]; - } - } - } - } - } - } - } - - - } - - return true; -} diff --git a/maxflow/galois/apps/avi/libElOp/StressWork.h b/maxflow/galois/apps/avi/libElOp/StressWork.h deleted file mode 100644 index b4b1c93..0000000 --- a/maxflow/galois/apps/avi/libElOp/StressWork.h +++ /dev/null @@ -1,190 +0,0 @@ -/** - * StressWork.h - * DG++ - * - * Created by Adrian Lew on 10/25/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef STRESSWORK -#define STRESSWORK - -#include -#include - -#include - -#include "Galois/Runtime/PerThreadStorage.h" - -#include "ElementalOperation.h" -#include "AuxDefs.h" - -/** - \brief Computes the virtual work of the stress tensor, and its derivative - - The virtual work of the stress tensor \f${\bf P}\f$ is defined as - \f[ - \int_{E} P_{iJ} v_{i,J}\ d\Omega, - \f] - where \f${\bf v}\f$ is a virtual displacement field. This operation works - for two and three-dimensional problems. In two-dimensional - problems plane strain is assumed, i.e., the displacements and virtual - displacements have the form \f$(v_1(x_1,x_2), v_2(x_1,x_2), 0)\f$. - - StressWork works only on SolidElements, since it needs a SimpleMaterial - to compute the stress tensor. - - StressWork computes the residue - \f[ - R[f][a] = \int_{E} P_{fJ} N_{a,J}\ d\Omega, - \f] - where \f$N_{a,f}\f$ is the derivative of shape function associated to - degree of freedom \f$a\f$ in direction \f$f\f$. - - The derivative of this residue is - \f[ - DR[f][a][g][b] = \int_{E} A_{fJgL} N_{a,J} N_{b,L}\ d\Omega, - \f] - where \f$A\f$ are the elastic moduli \f$\partial{\bf P}/\partial {\bf F}\f$. - */ -class StressWork: public DResidue { -protected: - enum GetValMode { - VAL, - DVAL, - }; - - //! \warning argval should contain displacements, not deformation mapping - bool getDValIntern(const MatDouble &argval, MatDouble& funcval, FourDVecDouble& dfuncval, const GetValMode& mode) const; - -private: - - /** - * Contains the temporary vectors used by @see getDValIntern - * Instead of creating and destroying new vectors on every call, - * which happens at least once per iteration, we reuse vectors - * from this struct. There's one instance per thread of this struct - */ - struct StressWorkTmpVec { - static const size_t MAT_SIZE = SimpleMaterial::MAT_SIZE; - - std::vector nDof; - std::vector nDiv; - - MatDouble DShape; - MatDouble IntWeights; - - VecDouble A; - VecDouble F; - VecDouble P; - - StressWorkTmpVec () - : A (MAT_SIZE * MAT_SIZE, 0.0), - F (MAT_SIZE, 0.0), - P (MAT_SIZE, 0.0) {} - - void adjustSizes (size_t Dim) { - if (nDof.size () != Dim) { nDof.resize (Dim); } - - if (nDiv.size () != Dim) { nDiv.resize (Dim); } - - if (DShape.size () != Dim) { DShape.resize (Dim); } - - if (IntWeights.size () != Dim) { IntWeights.resize (Dim); } - - if (A.size () != MAT_SIZE * MAT_SIZE) { A.resize (MAT_SIZE * MAT_SIZE, 0.0); } - - if (F.size () != MAT_SIZE) { F.resize (MAT_SIZE, 0.0); } - - if (P.size () != MAT_SIZE) { P.resize (MAT_SIZE, 0.0); } - } - - - - }; - - /** - * Per thread storage for temporary vectors used in @see getDValIntern - */ - typedef Galois::Runtime::PerThreadStorage PerCPUtmpVecTy; - - static PerCPUtmpVecTy perCPUtmpVec; - -public: - //! Construct a StressWork object with fields "field1, field2 and field3" as - //! the three dimensional displacement fields. - //! @param IElm pointer to the element over which the value will be computed. - //! The Input object is non-const, since these can be modified during the - //! operation. The object pointed to is not destroyed when the operation is. - //! @param SM SimpleMaterial object used to compute the stress and moduli. It is - //! only referenced, not copied. - //! @param fieldsUsed vector containing ids of fields being computed starting with 0 - //! Cartesian component of the displacement field. If not provided, it is - //! assumed that it is a plane strain case. - StressWork(const Element& IElm, const SimpleMaterial &SM, const std::vector& fieldsUsed) - : DResidue (IElm, SM, fieldsUsed) { - - assert (fieldsUsed.size() > 0 && fieldsUsed.size () <= 3); - - } - - virtual ~StressWork() {} - - - StressWork(const StressWork & SW) : DResidue (SW) {} - - - virtual StressWork * clone() const { - return new StressWork(*this); - } - - - VecDouble getIntegrationWeights(size_t fieldnumber) const { - return BaseResidue::element.getIntegrationWeights(fieldnumber); - } - - //! \warning argval should contain displacements, not deformation mapping - bool getDVal(const MatDouble &argval, MatDouble& funcval, FourDVecDouble& dfuncval) const { - return getDValIntern (argval, funcval, dfuncval, DVAL); - } - - //! \warning argval should contain displacements, not deformation mapping - bool getVal(const MatDouble &argval, MatDouble& funcval) const { - FourDVecDouble d; - return getDValIntern(argval, funcval, d, VAL); - } - - -private: - - static void copyVecDouble (const VecDouble& vin, VecDouble& vout) { - if (vout.size () != vin.size ()) { - vout.resize (vin.size ()); - } - - std::copy (vin.begin (), vin.end (), vout.begin ()); - } - -}; - -#endif diff --git a/maxflow/galois/apps/avi/libElOp/test/testAssemble-modelOutput b/maxflow/galois/apps/avi/libElOp/test/testAssemble-modelOutput deleted file mode 100644 index f9fbc9b..0000000 --- a/maxflow/galois/apps/avi/libElOp/test/testAssemble-modelOutput +++ /dev/null @@ -1,24 +0,0 @@ -0.154738 --0.299013 --0.393311 --0.0738921 --0.1821 --0.1821 -0.437839 -0.366179 --0.017166 -0.188826 -1.7526200541774453e+00 -5.4367685082125014e-02 -5.9310201907772753e-02 6.8684748269348916e-01 -1.2476775373517976e+00 -4.1322314049586772e-01 -4.4563231491787492e-01 -2.1925665711549644e-01 0.0000000000000000e+00 0.0000000000000000e+00 --5.4367685082125014e-02 1.5980445359033753e+00 7.1324227564380849e-01 0.0000000000000000e+00 -3.3445439685592970e-01 -4.9999999999999989e-01 -3.2442019370575381e-01 -1.0980445359033753e+00 0.0000000000000000e+00 0.0000000000000000e+00 --5.9310201907772739e-02 7.1324227564380849e-01 3.3167846176174178e+00 7.2607463366897407e-01 -4.9999999999999989e-01 -3.3445439685592970e-01 -2.3179806351820593e+00 -7.6763391024997962e-01 -4.3949378052758548e-01 -3.3722860220687306e-01 -6.8684748269348916e-01 0.0000000000000000e+00 7.2607463366897396e-01 3.6189670977545658e+00 -4.1322314049586772e-01 -1.2476775373517976e+00 -7.4396662724380636e-01 -1.0726074633668974e+00 -2.5573234862278899e-01 -1.2986820970358712e+00 --1.2476775373517976e+00 -3.3445439685592970e-01 -4.9999999999999989e-01 -4.1322314049586772e-01 1.7476775373517976e+00 7.4767753735179743e-01 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 --4.1322314049586772e-01 -4.9999999999999989e-01 -3.3445439685592970e-01 -1.2476775373517976e+00 7.4767753735179743e-01 1.7476775373517976e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 --4.4563231491787497e-01 -3.2442019370575381e-01 -2.3179806351820593e+00 -7.4396662724380636e-01 0.0000000000000000e+00 0.0000000000000000e+00 2.8301697915195905e+00 6.6460137732303115e-01 -6.6556841419655927e-02 4.0378544362652902e-01 --2.1925665711549644e-01 -1.0980445359033753e+00 -7.6763391024997962e-01 -1.0726074633668974e+00 0.0000000000000000e+00 0.0000000000000000e+00 6.6460137732303115e-01 2.1040951578506166e+00 3.2228919004244494e-01 6.6556841419655927e-02 -0.0000000000000000e+00 0.0000000000000000e+00 -4.3949378052758548e-01 -2.5573234862278899e-01 0.0000000000000000e+00 0.0000000000000000e+00 -6.6556841419655927e-02 3.2228919004244494e-01 5.0605062194724137e-01 -6.6556841419655927e-02 -0.0000000000000000e+00 0.0000000000000000e+00 -3.3722860220687306e-01 -1.2986820970358712e+00 0.0000000000000000e+00 0.0000000000000000e+00 4.0378544362652902e-01 6.6556841419655927e-02 -6.6556841419655927e-02 1.2321252556162152e+00 - - -Consistency Test -Consistency test successful - norm = 9.30356 error = 4.36321e-10 diff --git a/maxflow/galois/apps/avi/libElOp/test/testAssemble.cpp b/maxflow/galois/apps/avi/libElOp/test/testAssemble.cpp deleted file mode 100644 index 75c9590..0000000 --- a/maxflow/galois/apps/avi/libElOp/test/testAssemble.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * testAssemble.cpp - * DG++ - * - * Created by Adrian Lew on 10/27/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Triangle.h" -#include "ElementalOperation.h" -#include "StressWork.h" -#include "P12DElement.h" -#include "petscvec.h" -#include "petscmat.h" - -static char help[] = "test"; - -int main(int argc, char **argv) -{ - PetscInitialize(&argc,&argv,(char*) 0,help); - - double Vertices[] = {1,0,0,1,0,0,1,1,0,2}; - std::vector Vertices0(Vertices, Vertices+10); - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - - std::vector< Element * > LocalElements; - std::vector< DResidue * > LocalOperations(3); - - double conn[] = {1,2,3,1,4,2,2,4,5}; - - int dim = 10; - - sleep(1); - - NeoHookean NH(1,1); - - for(int e=0; e<3; e++) - { - LocalElements.push_back(new P12D<2>::Bulk(conn[3*e],conn[3*e+1],conn[3*e+2])); - LocalOperations[e] = new StressWork(LocalElements[e], NH, 0,1); - } - StandardP12DMap L2G(LocalElements); - - - Vec Dofs, resVec; - Mat dresMat; - - VecCreate(PETSC_COMM_WORLD, &Dofs); - VecSetSizes(Dofs,PETSC_DECIDE,dim); - VecSetFromOptions(Dofs); - VecDuplicate(Dofs,&resVec); - - MatCreateSeqDense(PETSC_COMM_SELF,dim,dim,PETSC_NULL,&dresMat); - MatSetOption(dresMat,MAT_SYMMETRIC); - - double dofvalues[] = {0.1,0.,0.,0.1,0.,0.,0.1,0.2,-0.1,0.2}; - int indices[] = {0,1,2,3,4,5,6,7,8,9}; - VecSetValues(Dofs,dim,indices,dofvalues,INSERT_VALUES); - VecAssemblyBegin(Dofs); - VecAssemblyEnd(Dofs); - - DResidue::assemble(LocalOperations, L2G, Dofs, &resVec, &dresMat); - - VecAssemblyBegin(resVec); - VecAssemblyEnd(resVec); - MatAssemblyBegin(dresMat,MAT_FINAL_ASSEMBLY); - MatAssemblyEnd(dresMat,MAT_FINAL_ASSEMBLY); - - VecView(resVec,PETSC_VIEWER_STDOUT_SELF); - MatView(dresMat,PETSC_VIEWER_STDOUT_SELF); - - std::cout << "\n\nConsistency Test\n"; - - Vec resVecPlus; - Vec resVecMinus; - VecDuplicate(Dofs,&resVecPlus); - VecDuplicate(Dofs,&resVecMinus); - - Mat dresMatNum; - MatDuplicate(dresMat, MAT_DO_NOT_COPY_VALUES, &dresMatNum); - - double EPS = 1.e-6; - for(int i=0; i coordinates; - coordinates.assign(coord, coord+6); - - Segment<2>::SetGlobalCoordinatesArray(coordinates); - Triangle<2>::SetGlobalCoordinatesArray(coordinates); - - Element * Elm = new P12D<2>::Bulk(1,2,3); - - // Create simple material. - IsotropicLinearElastic ILE(1.0, 1.0, 1.0); // lambda, mu and ref_rho. - - // Create Residue object to compute diagonal mass vector. - DiagonalMassForSW MassVec(Elm, ILE, 0, 1); - - // Test class DiagonalMassForSW - std::cout<<"\nNumber of fields: "< > argval(2); - argval[0].resize(3,0.); - argval[1].resize(3,0.); - - std::vector< std::vector > funcval; - if(!MassVec.getVal(argval, &funcval)) - { - std::cerr<<"\nCould not compute mass vector. Test failed.\n"; - exit(1); - } - for(unsigned int f=0; fgetIntegrationWeights(0).size(); q++) - Area += Elm->getIntegrationWeights(0)[q]; - - double mv = (1./3.)*MassVec.GetSimpleMaterial().getDensityInReference()*Area*1.0; - std::cout<<"\nBoth mass vectors should read: (" - <getFields().size() - <<" should be 2."; - std::cout<<"\nLocal fields: "<getFields()[0]<<","<getFields()[1] - <<" should be 0,1."; - std::cout<<"\nNumber of dofs per field: "<getFieldDof(0)<<","<getFieldDof(1) - <<" should be 3,3."; - std::cout<<"\nMaterial given over element: "<GetSimpleMaterial().getMaterialName() - <<" should be IsotropicLinearElastic."; - std::cout<<"\nDensity of material in reference config.: "<GetSimpleMaterial().getDensityInReference() - <<" should be 1."; - delete MassVecClone; - } - - std::cout<<"\n\nTesing finished.\n"; -} diff --git a/maxflow/galois/apps/avi/libElOp/test/testStressWork.cpp b/maxflow/galois/apps/avi/libElOp/test/testStressWork.cpp deleted file mode 100644 index 696e038..0000000 --- a/maxflow/galois/apps/avi/libElOp/test/testStressWork.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * testStressWork.cpp - * DG++ - * - * Created by Adrian Lew on 10/25/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Triangle.h" -#include "ElementalOperation.h" -#include "StressWork.h" -#include "P12DElement.h" - -int main() -{ - double Vertices[] = {1,0,0,1,0,0,1,1}; - std::vector Vertices0(Vertices, Vertices+8); - - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - - P12D<2>::Bulk TestElement(1,2,3); - NeoHookean NH(1,1); - - - StressWork MyResidue(&TestElement, NH, 0,1); - - std::vector< std::vector > argval(2); - argval[0].resize(3); - argval[1].resize(3); - - argval[0][0] = 0.1; - argval[0][1] = 0.; - argval[0][2] = -0.1; - - argval[1][0] = 0.; - argval[1][1] = 0.1; - argval[1][2] = -0.1; - - argval[0][0] = 0.; - argval[0][1] = 0.; - argval[0][2] = -0.; - - argval[1][0] = 0.; - argval[1][1] = 0.; - argval[1][2] = -0.; - - std::vector DofPerField(2); - - DofPerField[0] = TestElement.getDof( MyResidue.getFields()[0] ); - DofPerField[1] = TestElement.getDof( MyResidue.getFields()[1] ); - - - if(MyResidue.consistencyTest(MyResidue, DofPerField, argval)) - std::cout << "DResidue::Consistency test successful\n"; - else - std::cout << "DResidue::Consistency test not successful\n"; -} diff --git a/maxflow/galois/apps/avi/libElm/libElement/Element.h b/maxflow/galois/apps/avi/libElm/libElement/Element.h deleted file mode 100644 index 5eb1fe5..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/Element.h +++ /dev/null @@ -1,318 +0,0 @@ -/** - * Element.h: Basic Element over which fields are defined. - * It contains an element geometry, shape functions etc. - * - * DG++ - * - * Created by Adrian Lew on 9/2/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef ELEMENT -#define ELEMENT - -#include -#include "AuxDefs.h" -#include "ElementGeometry.h" -#include "BasisFunctions.h" - - -// TODO: add P1nDBoundaryTrace and P1nDMap - -/** - \brief Element: abstract base class for any element - - An Element is a convex polytope with possibly-multiple discrete - functional spaces, one for each field, with support on it. - - An element has:\n - 1) A geometry. The connectivity of vertices that define the convex hull.\n - 2) A group of scalar functional spaces. Each functional - space is defined by a set of basis functions with support - on the element. Each functional space has an associated - number of degrees of freedom to it: the components of any function - in the space in the chosen basis.\n - - A field is a scalar-valued function defined over the element.\n - Each field may have a different underlying functional space.\n - Each field may have a different quadrature rule. Different quadrature rules - will also be handled with different elements, or through inheritance. The - consistency of the quadrature rule when different fields are integrated together - is in principle not checked by the element hierarchy. - \n - Clearly, isoparametric elements will not necessarily be convex, - but Element class can still be used. - - As a convention, fields are numbered starting from 0. - - \todo It would be nice to have vectors and tensors as fields, where for each - component one assigns a single set of shape functions and quadrature points. - The current abstraction is flexible in the sense that it does not enforce - vector or tensor fields to have the same quadrature and shapes. - - \todo Need to explain what the expected order of the vectors getShapes() and - getDShapes() is. - - \todo Need to explain that either getShapes or getDShapes may return an empty vector, - signaling that those values are not available. -*/ - -class Element -{ - public: - - inline Element(){} - inline virtual ~Element(){} - inline Element(const Element &){} - virtual Element * clone() const = 0; - - // Accessors/Mutators: - - //! Number of different fields - virtual size_t getNumFields() const = 0; - - //! Number of degrees of freedom of one of the fields. - virtual size_t getDof(size_t field) const = 0; - - //! Number of derivatives per shape function for one of the fields - virtual size_t getNumDerivatives(size_t field) const = 0; - - //! Shape functions at quadrature points of one of the fields - virtual const std::vector &getShapes(size_t field) const = 0; - - //! Shape function derivatives at quadrature points of one of the fields - virtual const std::vector &getDShapes(size_t field) const = 0; - - //! Integration weights of a given field - virtual const std::vector &getIntegrationWeights(size_t field) const = 0; - - //! Integration point coordinates of a given field - virtual const std::vector &getIntegrationPtCoords(size_t field) const = 0; - - //! Value of shape function "shapenumber" of field "field" - //! at quadrature point "quad" - virtual double getShape(size_t field, size_t quad, size_t shapenumber) const = 0; - - //! Value of derivative of shape function "shapenumber" of field "field" - //! at quadrature point "quad" in direction "dir" - virtual double getDShape(size_t field, size_t quad, size_t shapenumber, size_t dir) - const = 0; - - //! Access to Element Geometry - virtual const ElementGeometry & getGeometry() const = 0; - -}; - - - - -/** - \brief Element_: Abstract implementation of an Element class - - Element_ constructs a finite element type. The ElementGeometry is - specified in derived classes. The idea is to use this class to derive - different finite element types. - - - Element_ is defined by:\n - 1) Access to a (ElementGeometry &)EG that defines the geometry of - the element through a map a reference domain \f$\hat{\Omega}\f$ to - the real element shape (or an approximation, such as in - isoparametric elements)\n - 2) An array (BasisFunctions *) LocalShapes all constructed over EG. - 3) A virtual function to be defined by derived classses getFieldShapes. The - value of LocalShapes[getFieldShapes(FieldNumber)] returns the shape - functions of field FieldNumber. This map is needed since the same - BasisFunctions object can be used for several fields. - -*/ - - - -class Element_: public Element { -private: - void copy (const Element_& that) { - for (size_t i = 0; i < that.LocalShapes.size (); i++) { - LocalShapes.push_back ((that.LocalShapes[i])->clone ()); - } - } - - void destroy () { - for (size_t i = 0; i < LocalShapes.size (); i++) { - delete LocalShapes[i]; - LocalShapes[i] = NULL; - } - } - -public: - inline Element_ () : Element() { - } - - inline virtual ~Element_ () { - destroy (); - } - - Element_ (const Element_ &OldElement_) : - Element (OldElement_) { - copy (OldElement_); - } - - Element_& operator = (const Element_& that) { - if (this != &that) { - destroy (); - copy (that); - } - return (*this); - } - - virtual Element_ * clone () const = 0; - - inline size_t getDof (size_t field) const { - return LocalShapes[getFieldShapes (field)]->getBasisDimension (); - } - inline size_t getNumDerivatives (size_t field) const { - return LocalShapes[getFieldShapes (field)]-> getNumberOfDerivativesPerFunction (); - } - inline const std::vector &getShapes (size_t field) const { - return LocalShapes[getFieldShapes (field)]->getShapes (); - } - inline const std::vector &getDShapes (size_t field) const { - return LocalShapes[getFieldShapes (field)]->getDShapes (); - } - inline const std::vector &getIntegrationWeights (size_t field) const { - return LocalShapes[getFieldShapes (field)]->getIntegrationWeights (); - } - inline const std::vector &getIntegrationPtCoords (size_t field) const { - return LocalShapes[getFieldShapes (field)]->getQuadraturePointCoordinates (); - } - - inline double getShape (size_t field, size_t quad, size_t shapenumber) const { - return getShapes (field)[quad * getDof (field) + shapenumber]; - } - - inline double getDShape (size_t field, size_t quad, size_t shapenumber, size_t dir) const { - return getDShapes (field)[quad * getDof (field) * getNumDerivatives (field) + shapenumber - * getNumDerivatives (field) + dir]; - } - -protected: - //! addBasisFunctions adds a BasisFunctions pointer at the end of LocalShapes - //! The i-th added BasisFunctions pointer is referenced when getFieldShapes - //! returns the integer i-1 - inline void addBasisFunctions (const BasisFunctions &BasisFunctionsPointer) { - LocalShapes.push_back (BasisFunctionsPointer.clone ()); - // XXX: amber: clone has no effect (see ShapesEvaluated__ copy constructor) and it seems - // that clone is not needed - // LocalShapes.push_back( (const_cast (&BasisFunctionsPointer)) ); - } - - //! getFieldShapes returns the position in LocalShapes in which - //! the shape functions for field Field are. - virtual size_t getFieldShapes (size_t Field) const = 0; - - //! returns the length of LocalShapes - inline size_t getNumShapes () const { - return LocalShapes.size (); - } - -private: - std::vector LocalShapes; -}; - -/** - \brief - SpecificElementFamily: classes that contain all Element types that form - a family. For example, bulk and boundary interpolation. - */ - -class SpecificElementFamily { -}; - -/** - \brief LocalToGlobalMap class: map the local degrees of freedom - of each Element to the global ones. - - The Local to Global map is strongly dependent on how the program - that utilizes the Element objects is organized. The objective of - this class is then to define the interface that the derived - objects should have. - - There will generally not be a single LocalToGlobalMap object per - Element, but rather only one LocalToGlobalMap for all of - them. Hence, the interface requires a way to specify which - element is being mapped. - - Convention:\n - Fields and Dofs start to be numbered at 0 - */ -class LocalToGlobalMap { -public: - inline LocalToGlobalMap () { - } - inline virtual ~LocalToGlobalMap () { - } - inline LocalToGlobalMap (const LocalToGlobalMap &) { - } - virtual LocalToGlobalMap * clone () const = 0; - - // //! @param ElementMapped: GlobalElementIndex of the Element to be mapped. - // //! This sets - // //! the default Element object whose field degrees of freedom are - // //! mapped. - // // XXX: commented out (amber) - // // virtual void set (const GlobalElementIndex &ElementMapped) = 0; - - // //! @param field field number in the element, 0\f$ \le \f$ field \f$\le\f$ Nfields-1\n - // //! @param dof number of degree of freedom in that field, - // //! \f$ 0 \le \f$ dof \f$\le\f$ Ndofs-1 \n - // //! map returns the GlobalDofIndex associated to degree of freedom "dof" of field "field" - // //! in the default Element object. The latter is - // //! set with the function set(const Element &). - // // XXX: commented out (amber) - // // virtual const GlobalDofIndex map (size_t field, size_t dof) const = 0; - - //! @param field field number in the element, 0\f$ \le \f$ field \f$\le\f$ Nfields-1\n - //! @param dof number of degree of freedom in that field, \f$ 0 \le \f$ dof \f$\le\f$ Ndofs-1 \n - //! @param ElementMapped: GlobalElementIndex of the Element whose degrees - //! of freedom are being mapped\n - //! map returns the GlobalDofIndex associated to degree of freedom "dof" - //! of field "field" - //! in element MappedElement. - virtual GlobalDofIndex map (size_t field, size_t dof, const GlobalElementIndex & ElementMapped) const = 0; - - //! Total number of elements that can be mapped. Usually, total number of - //! elements in the mesh. - virtual size_t getNumElements () const = 0; - - //! Number of fields in an element mapped - virtual size_t getNumFields (const GlobalElementIndex & ElementMapped) const = 0; - - //! Number of dofs in an element mapped in a given field - virtual size_t getNumDof (const GlobalElementIndex & ElementMapped, size_t field) const = 0; - - //! Total number of dof in the entire map - virtual size_t getTotalNumDof () const = 0; -}; - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libElement/ElementBoundaryTrace.h b/maxflow/galois/apps/avi/libElm/libElement/ElementBoundaryTrace.h deleted file mode 100644 index e9dce2d..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/ElementBoundaryTrace.h +++ /dev/null @@ -1,213 +0,0 @@ -/** - * ElementBoundaryTraces.h - * DG++ - * - * Created by Adrian Lew on 10/12/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef ELEMENTBOUNDARYTRACES -#define ELEMENTBOUNDARYTRACES - -#include -#include "AuxDefs.h" -#include "Element.h" - -/** - \brief ElementBoundaryTraces: values of the trace of some - or all the fields in an Element over some or all faces of the - polytope. - - An ElementBoundaryTraces object contains\n - 1) The outward normal to the faces for which values are desired.\n - 2) The trace at these faces of some or all of the fields in the - element. These traces are provided as Element objects, one for - each face. - - The number of faces or fields in each face depends on the - particular ElementBoundaryTraces object build. The exact - number of fields whose trace is computed for each face will be - determined by the Element object in each face. - - ElementBoundaryTraces objects are designed to work jointly - with Element objects, so for example, it has no access to the - ElementGeometry. The outward normal to the faces are included here - because these are often used in integrands over faces. - - Each polytope has a convention to label ALL of its faces. By convention, - these labels are consecutive integers starting at 0 to the total number of faces in the - polytope minus one. -*/ - -class ElementBoundaryTraces -{ - public: - ElementBoundaryTraces() {} - virtual ~ElementBoundaryTraces() {} - ElementBoundaryTraces(const ElementBoundaryTraces &) {} - virtual ElementBoundaryTraces * clone() const = 0; - - //! Number of faces for which traces are provided - virtual size_t getNumTraceFaces() const = 0; - - //! Returns the face number in the polytope whose traces are provided. - //! Each polytope has a convention to label ALL of its faces. Traces are - //! provided for a subset of these faces. A total number of getNumTraceFaces() - //! faces have their traces in this object. getTraceFaceIds()[FaceIndex], - //! with FaceIndex between - //! 0 and getNumTraceFaces()-1, provides the face number in the polytope - //! face element accesses with getTrace(FaceIndex). - //! - //! The value returned starts at 0 for the first face and so on. - virtual const std::vector & getTraceFaceIds() const = 0; - - - //! Returns the Trace number where the trace for face FaceIndex is stored. - //! If no trace is provided for that face returns a -1. - //! - //! It is always true that FaceNumberToTrace[ getTraceFaceIds()[i] ] = i; - //! for 0<= i <= getNumTraceFaces()-1 - virtual size_t getTraceNumberOfFace(size_t FaceIndex) const = 0; - - //! Returns a constant reference to the Element that contains - //! the traces of the face getTraceFacesNumbers()[FaceIndex]. \n - //! FaceIndex ranges from 0 - //! to the getNumTraceFaces()-1. - virtual const Element & getTrace(size_t FaceIndex) const = 0; - - //! Returns getTrace(FaceIndex). Done for simplicity of the interface. - //! FaceIndex ranges from 0 - //! to the getNumTraceFaces()-1. - inline const Element & operator[](size_t FaceIndex) - { return getTrace(FaceIndex); } - - //! Returns the outward normal to face getTraceFaceIds(FaceIndex) - //! FaceIndex ranges from 0 - //! to the getNumTraceFaces()-1. - virtual const std::vector & getNormal(size_t FaceIndex) const = 0; - - //! map between the degrees of freedom of field in a trace - //! and those in the original element - //! - //! @param FaceIndex starting from 0 - //! @param field field number to map, starting from 0 - //! @param dof degree of freedom number on the trace of field "field" - //! - //! The function returns the degree of freedom number in the original - //! element - virtual size_t dofMap(size_t FaceIndex, size_t field, size_t dof) const = 0; -}; - - - - -/** - \brief ElementBoundaryTraces_: implementation of - ElementBoundaryTraces - - An ElementBoundaryTraces_ allows derived classes to add - Element_ objects, one per face whose trace is desired. - - The class is abstract since the getNormal function is yet to be - defined by the specific derived ElementBoundaryTraces - classes. - - The faces added with addFace are copied into the object, not - referenced. - -*/ -class ElementBoundaryTraces_: public ElementBoundaryTraces { -private: - void copy (const ElementBoundaryTraces_& that) { - FaceNumbers = that.FaceNumbers; - - for (size_t i = 0; i < that.FaceElements.size (); i++) { - FaceElements.push_back (that.FaceElements[i]->clone ()); - } - } - - void destroy () { - for (size_t i = 0; i < FaceElements.size (); i++) { - delete FaceElements[i]; - FaceElements[i] = NULL; - } - } - -public: - ElementBoundaryTraces_ () { - } - - virtual ~ElementBoundaryTraces_ () { - destroy(); - } - - ElementBoundaryTraces_ (const ElementBoundaryTraces_ & that) : - ElementBoundaryTraces (that) { - - copy (that); - } - - ElementBoundaryTraces_& operator = (const ElementBoundaryTraces_& that) { - if (this != &that) { - destroy (); - copy (that); - } - return (*this); - } - - virtual ElementBoundaryTraces_ * clone () const = 0; - - size_t getNumTraceFaces () const { - return FaceElements.size (); - } - - const std::vector & getTraceFaceIds () const { - return FaceNumbers; - } - - inline size_t getTraceNumberOfFace (size_t FaceIndex) const { - for (size_t i = 0; i < FaceNumbers.size (); i++) { - if (FaceNumbers[i] == FaceIndex) { - return i; - } - } - return -1; - } - - virtual const Element & getTrace (size_t FaceIndex) const { - return *FaceElements[FaceIndex]; - } - -protected: - void addFace (const Element_ * NewFace, const size_t FaceNumber) { - FaceElements.push_back (NewFace->clone ()); - FaceNumbers.push_back (FaceNumber); - } - -private: - std::vector FaceElements; - std::vector FaceNumbers; -}; - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libElement/P12DElement.h b/maxflow/galois/apps/avi/libElm/libElement/P12DElement.h deleted file mode 100644 index 189f0e4..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/P12DElement.h +++ /dev/null @@ -1,260 +0,0 @@ -/** - * P12DElement.h: 2D Element with linear shape functions - * - * DG++ - * - * Created by Adrian Lew on 9/22/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef P12DELEMENT -#define P12DELEMENT - -#include "Element.h" -#include "P1nDElement.h" - -#include "ElementGeometry.h" -#include "Triangle.h" -#include "ElementBoundaryTrace.h" -#include "ShapesEvaluated.h" - -#include -#include -#include - -/** - \brief Two-dimensional linear triangles with NF different fields - */ - -template -class P12DElement: public P1nDElement { -public: - P12DElement (const Triangle<2>& _elemGeom) : - P1nDElement (_elemGeom) { - ShapesP12D::Bulk modelShape (_elemGeom); - Element_::addBasisFunctions (modelShape); - } - - P12DElement (const P12DElement &that) : - P1nDElement (that) { - - } - - virtual P12DElement* clone () const { - return new P12DElement (*this); - } -}; - -/** - \brief P12DTrace: traces of P12DElements - - These trace elements are templated according to the number of Linear - fields and the Triangle face P12DTrace::FaceLabel on which the - trace is computed. - - Traces of P12D elements are somewhat special, since for each side one of the - bulk basis functions is identically zero, effectively leaving only two degrees - of freedom per face. To reduce the amount of memory allocated per element is then - convenient to consider Trace elements with only two degrees of freedom per field - per face, i.e., use the ShapesP12D::Faces ShapesEvaluated objects. - - However, ocassionally it may be necessary to have the boundary elements have the same - number of degrees of freedom as the bulk elements, as it is generally the case for - arbitrary bases. Hence, during the construction of these elements it is possible - to decide which type of shape functions to use. This is accomplished by specifying - the type ShapeType. - - */ - -template -class P12DTrace: public P1nDTrace { -public: - - //! @param BaseElement Element whose trace is desired - //! @param FaceName Face on which to take the trace - //! @param Type ShapeType of face, i.e., with two or three dof - P12DTrace (const P12DElement & BaseElement, const typename P1nDTrace::FaceLabel& FaceName, - const typename P1nDTrace::ShapeType& Type); - - virtual ~P12DTrace () { - } - - P12DTrace (const P12DTrace &that) : - P1nDTrace (that) { - } - - virtual P12DTrace * clone () const { - return new P12DTrace (*this); - } -private: - //! check if the faceLabel and shapeType are consistent with - //! 2D element trace - //! @param faceLabel - //! @param shapeType - void checkArgs (const typename P1nDTrace::FaceLabel& faceLabel, - const typename P1nDTrace::ShapeType& shapeType) { - // Triangle has only 3 faces - assert (faceLabel != P1nDTrace::FaceFour); - - // valid ShapeTypes are TwoDofs and ThreeDofs - assert (shapeType == P1nDTrace::TwoDofs || shapeType == P1nDTrace::ThreeDofs); - } - -}; - -template -P12DTrace::P12DTrace (const P12DElement & BaseElement, - const typename P1nDTrace::FaceLabel& FaceName, const typename P1nDTrace::ShapeType& Type) : - P1nDTrace (BaseElement) { - - checkArgs (FaceName, Type); - - const ElementGeometry& TriGeom = Element::getGeometry (); - assert (dynamic_cast* > (&TriGeom) != NULL); - - ElementGeometry* faceGeom = TriGeom.getFaceGeometry(FaceName); - assert (dynamic_cast* > (faceGeom) != NULL); - - if (Type == P1nDTrace::TwoDofs) { - ShapesP12D::Faces modelShape (*faceGeom); - Element_::addBasisFunctions (modelShape); - - } else { - // Type==ThreeDofs - switch (FaceName) { - case P1nDTrace::FaceOne: { - ShapesP12D::FaceOne ModelShape (*faceGeom); - Element_::addBasisFunctions (ModelShape); - break; - } - - case P1nDTrace::FaceTwo: { - ShapesP12D::FaceTwo ModelShape (*faceGeom); - Element_::addBasisFunctions (ModelShape); - break; - } - - case P1nDTrace::FaceThree: { - ShapesP12D::FaceThree ModelShape (*faceGeom); - Element_::addBasisFunctions (ModelShape); - break; - } - } - } - - delete faceGeom; faceGeom = NULL; -} - -/** - \brief P12DElementBoundaryTraces: group of traces of P12DElements. - - It contains P12DTrace Elements. It is possible to specify which faces to build traces for. - - getTrace(i) returns the i-th face for which traces were built. The order of these faces is always increasing - in face number. Example: if only faces one and three have traces, then getTrace(0) returns face one's trace, - and getTrace(1) face three's trace. - - It does not make a copy or keep a reference of the BaseElement. - - */ - -template -class P12DElementBoundaryTraces: public P1nDBoundaryTraces { -public: - //! @param BaseElement Element for which traces are to be build - //! @param flabels a vector of face labels (size is 3 for triangles) - //! @param shType type of trace element to use. See P12DTrace - P12DElementBoundaryTraces (const P12DElement &BaseElement, - const std::vector::FaceLabel>& flabels, - const typename P1nDTrace::ShapeType& shType): - P1nDBoundaryTraces (BaseElement, flabels, shType) { - - } - - virtual ~P12DElementBoundaryTraces () { - } - - P12DElementBoundaryTraces (const P12DElementBoundaryTraces & OldElem) : - P1nDBoundaryTraces (OldElem) { - } - - P12DElementBoundaryTraces * clone () const { - return new P12DElementBoundaryTraces (*this); - } - - - size_t dofMap (size_t FaceIndex, size_t field, size_t dof) const; - -}; - -// Class Implementation -template size_t P12DElementBoundaryTraces::dofMap ( - size_t FaceIndex, size_t field, size_t dof) const { - size_t val; - if (ElementBoundaryTraces::getTrace (FaceIndex).getDof (field) == 3) { - val = dof; - } else { - // getTrace(FaceIndex).getDof(field)=2 - switch (ElementBoundaryTraces::getTraceFaceIds ()[FaceIndex]) { - case 0: - val = dof; - break; - - case 1: - val = dof + 1; - break; - - case 2: - val = (dof == 0 ? 2 : 0); - break; - - default: - std::cerr << "P12DElementBoundaryTraces.DofMap Error\n"; - exit (1); - } - } - - return val; -} - -/** - \brief P12D family of elements over triangles with NF linearly - interpolated fields. - */ - -template class P12D: public SpecificElementFamily { -public: - //! Linear element over a triangle - typedef P12DElement Bulk; - - //! Linear elements over segments - typedef P12DTrace Face; - - //! Traces on the boundary for P12DElement - typedef P12DElementBoundaryTraces Traces; -}; - - - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libElement/P13DElement.h b/maxflow/galois/apps/avi/libElm/libElement/P13DElement.h deleted file mode 100644 index a3edfab..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/P13DElement.h +++ /dev/null @@ -1,266 +0,0 @@ -/** - * P13DElement.h: A 3D element with linear shape functions - * - * DG++ - * - * Created by Ramsharan Rangarajan - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef P13DELEMENT -#define P13DELEMENT - -#include "P1nDElement.h" -#include "P12DElement.h" -#include "ShapesEvaluatedP13D.h" -#include "Tetrahedron.h" -#include "ElementGeometry.h" - - -#include -#include -#include - -//! \brief three-dimensional linear tetrahedra with NF different fields. -template -class P13DElement: public P1nDElement { -public: - //! \param _elemGeom A reference to element geometry - P13DElement (const Tetrahedron& _elemGeom): P1nDElement (_elemGeom) { - ShapesP13D::Bulk modelShape (_elemGeom); - Element_::addBasisFunctions (modelShape); - } - - - //! Copy constructor - P13DElement (const P13DElement& that): P1nDElement (that) { - - } - - //! Cloning mechanism - virtual P13DElement* clone () const { - return new P13DElement (*this); - } - -}; - - -// Class P13DTrace: -/** \brief Traces of P13DElement. - - These trace elements are templated according to the number of - linear fields and the Tetrahedron face - P13DTrace::FaceLabel on which the trace is computed. - - Traces of P13DElements are somewhat special, since for each side - one of the bulk basis functions is identically zero, effectively - leaving three dofs per face. To reduce the amount of memory - allocated per element is then convenient to consider trace elements - with only 3 dofs per field per face, i.e., use the - ShapesP13D::Faces ShapesEvaluated objects. - - However, ocassionally, it may be necessary to have the boundary - elements have the same nummber of dofs as the bulk elements, as it - is generally the case for arbitrary bases. Hence during their - construction, it is possible to specify what type of shape - functions to use. This is accomplished by specifying the type - ShapeType. - - \warning As opposed to P13DElement(s), there is no local copy of - the geometry of the element here, but only a reference to - it. Hence, the destruction of the base geometry before the - destruction of the element will render the implementation - inconsistent. - - \todo As was commented in P12DTrace, TetGeom may have to be a - pointer and not a reference, since this is a reference to a copy of - the original geometry. - */ -template -class P13DTrace: public P1nDTrace { -public: - - //! \param BaseElement Element whose trace is desired. - //! \param FaceName Face on which to take the trace. - //! \param Type ShapeType of the face, i.e., with three or four dofs. - P13DTrace (const P13DElement &BaseElement, const typename P1nDTrace::FaceLabel& FaceName, - const typename P1nDTrace::ShapeType& Type); - - virtual ~P13DTrace () { - } - - //! Copy constructor - P13DTrace (const P13DTrace &OldElement_) : - P1nDTrace (OldElement_) { - } - - //! Cloning mechanism - virtual P13DTrace* clone () const { - return new P13DTrace (*this); - } - -private: - //! checks if arguments are consistend with 3D element - //! @param flabel - //! @param shType - void checkArgs (const typename P1nDTrace::FaceLabel& flabel, const typename P1nDTrace::ShapeType& shType) { - assert (shType != P1nDTrace::TwoDofs); - } - -}; - -// Implementation of class P13DTrace: - -template -P13DTrace::P13DTrace (const P13DElement &BaseElement, - const typename P1nDTrace::FaceLabel& FaceName, const typename P1nDTrace::ShapeType& Type) : - P1nDTrace (BaseElement) { - - checkArgs (FaceName, Type); - - const ElementGeometry& tetGeom = Element::getGeometry (); - assert (dynamic_cast (&tetGeom) != NULL); - - ElementGeometry* faceGeom = tetGeom.getFaceGeometry(FaceName); - assert (dynamic_cast* > (faceGeom) != NULL); - - - if (Type == P1nDTrace::ThreeDofs) { - ShapesP13D::Faces ModelShape (*faceGeom); - Element_::addBasisFunctions (ModelShape); - } else { - //Type == FourDofs - switch (FaceName) { - case P1nDTrace::FaceOne: { - ShapesP13D::FaceOne ModelShape(*faceGeom); - Element_::addBasisFunctions(ModelShape); - break; - } - - case P1nDTrace::FaceTwo: { - ShapesP13D::FaceTwo ModelShape(*faceGeom); - Element_::addBasisFunctions(ModelShape); - break; - } - - case P1nDTrace::FaceThree: { - ShapesP13D::FaceThree ModelShape(*faceGeom); - Element_::addBasisFunctions(ModelShape); - break; - } - - case P1nDTrace::FaceFour: { - ShapesP13D::FaceFour ModelShape(*faceGeom); - Element_::addBasisFunctions(ModelShape); - break; - } - - } - } - - delete faceGeom; faceGeom = NULL; -} - -// Class for ElementBoundaryTraces: - -/** - \brief Group of traces for P13DElement. - - It contains P13DTrace elements. It is possible to specify which faces to build traces for. - - getTrace(i) returns the i-th face for which traces are built. The order of these faces is always increasing - in number. - It does not make a copy o keep reference of the BaseElement. - */ - -template -class P13DElementBoundaryTraces: public P1nDBoundaryTraces { -public: - - //! \param BaseElement Element for which to build traces. - //! \param faceLabels is a vector telling which faces to build - //! \param Type type of trace element to use. - P13DElementBoundaryTraces (const P13DElement &BaseElement, - const std::vector::FaceLabel>& faceLabels, - typename P13DTrace::ShapeType Type): - P1nDBoundaryTraces (BaseElement, faceLabels, Type) { - - } - - virtual ~P13DElementBoundaryTraces () { - } - - //! Copy constructor - P13DElementBoundaryTraces (const P13DElementBoundaryTraces &OldElem) : - P1nDBoundaryTraces (OldElem) { - } - - //! Cloning mechanism - P13DElementBoundaryTraces * clone () const { - return new P13DElementBoundaryTraces (*this); - } - - //! map dofs between dofs of field in a trace and those in the original element. - //! \param FaceIndex starting from 0. - //! \param field field number to map, starting from 0. - //! \param dof degree of freedom number on the trace of field "field" - //! The function returns the degree of freedom number in the original element. - size_t dofMap (size_t FaceIndex, size_t field, size_t dof) const; -}; - -// Implementation of class P13DElementBoundaryTraces -template -size_t P13DElementBoundaryTraces::dofMap (size_t FaceIndex, size_t field, size_t dof) const { - size_t val; - - if (ElementBoundaryTraces::getTrace (FaceIndex).getDof (field) == 4) { - val = dof; - } else { // Three dofs per face. - - const size_t* FaceNodes = Tetrahedron::FaceNodes; - size_t facenum = ElementBoundaryTraces::getTraceFaceIds ()[FaceIndex]; - val = FaceNodes[3 * facenum + dof]; - } - return val; -} - -//! \brief Family of elements over tetrahedra with NF linearly interpolated fields. -template -class P13D: public SpecificElementFamily { - -public: - //! Linear over the element. - typedef P13DElement Bulk; - - //! Linear over triangles. - typedef P13DTrace Face; - - //! Traces on the boundary of P13DElement - typedef P13DElementBoundaryTraces Traces; - -}; - - - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libElement/P1nDElement.h b/maxflow/galois/apps/avi/libElm/libElement/P1nDElement.h deleted file mode 100644 index 26ab759..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/P1nDElement.h +++ /dev/null @@ -1,240 +0,0 @@ -/** - * P1nDElement.h: Common base class for 2D/3D elements with linear shape functions - * - * DG++ - * - * Created by Adrian Lew on 9/2/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef P1NDELEMENT_H_ -#define P1NDELEMENT_H_ - -#include -#include - -#include "AuxDefs.h" -#include "Element.h" -#include "ElementBoundaryTrace.h" -#include "ElementGeometry.h" - -template -class P1nDElement: public Element_ { -private: - const ElementGeometry& elemGeom; - -public: - //! constructor - //! @param _elemGeom element geometry - P1nDElement (const ElementGeometry& _elemGeom) : Element_(), elemGeom(_elemGeom) { - - } - - //! copy constructor - //! @param that - P1nDElement (const P1nDElement& that) : Element_(that), elemGeom (that.elemGeom) { - - } - - //! @see Element::getNumFields - virtual size_t getNumFields () const { - return NF; - } - - //! @see Element::getGeometry - virtual const ElementGeometry& getGeometry () const { - return elemGeom; - } - -protected: - //! @see Element_::getFieldShapes - size_t getFieldShapes (size_t field) const { - return 0; - } -}; - -/** - * Common base class for 2D/3D linear traces - */ -template -class P1nDTrace: public P1nDElement { -public: - //! Range of FaceIndices available to enumerate faces\n - //! When providing a FaceLabel as an argument there is automatic - //! control of its range - enum FaceLabel { - FaceOne=0, FaceTwo=1, FaceThree=2, FaceFour=3 - }; - - //! TwoDofs indicates Segment<2> boundary elements, with two dofs per field \n - //! ThreeDofs indicates Triangle<2> boundary elements, with three dofs per field. The - //! shape functions in P12DElement are just evaluated at quadrature points on each face\n - //! FourDofs is a Tetrahedron - enum ShapeType { - TwoDofs, ThreeDofs, FourDofs - }; - - P1nDTrace (const P1nDElement& baseElem): - P1nDElement(baseElem) { - - } - - P1nDTrace (const P1nDTrace& that): - P1nDElement (that) { - - } - - -}; - - -/** - * Common base class for boundary traces - * @see ElementBoundaryTraces - */ - -template -class P1nDBoundaryTraces: public ElementBoundaryTraces_ { -private: - MatDouble normals; - -public: - typedef typename P1nDTrace::FaceLabel FaceLabel; - typedef typename P1nDTrace::ShapeType ShapeType; - - P1nDBoundaryTraces (const P1nDElement& baseElem, - const std::vector& faceLabels, const ShapeType& shapeType) : - ElementBoundaryTraces_ () { - - assert (faceLabels.size() == baseElem.getGeometry().getNumFaces ()); - - for (size_t i = 0; i < faceLabels.size (); ++i) { - const P1nDTrace* fTrace = makeTrace (baseElem, faceLabels[i], shapeType); - addFace (fTrace, i); - } - - normals.resize (getNumTraceFaces ()); - - for (size_t i = 0; i < getNumTraceFaces (); ++i) { - baseElem.getGeometry ().computeNormal (getTraceFaceIds ()[i], normals[i]); - } - - } - - P1nDBoundaryTraces (const P1nDBoundaryTraces& that) : - ElementBoundaryTraces_ (that), normals (that.normals) { - - } - - const std::vector & getNormal (size_t FaceIndex) const { - return normals[FaceIndex]; - } - -protected: - virtual const P1nDTrace* makeTrace (const P1nDElement& baseElem, - const FaceLabel& flabel, const ShapeType& shType) const = 0; - -}; - - -/** - \brief StandardP1nDMap class: standard local to global map for 2D/3D elements with - linear shape functions - - StandardP1nDMap assumes that\n - 1) The GlobalNodalIndex of a node is an size_t\n - 2) All degrees of freedom are associated with nodes, and their values for each - node ordered consecutively according to the field number. - - Consequently, the GlobalDofIndex of the degrees of freedom of node N with NField - fields are given by - - (N-1)*NF + field-1, where \f$ 1 \le \f$ fields \f$ \le \f$ NF - */ - -class StandardP1nDMap: public LocalToGlobalMap { -private: - const std::vector& elementArray; - -public: - StandardP1nDMap (const std::vector& _elementArray) - : LocalToGlobalMap (), elementArray (_elementArray) { - - } - - StandardP1nDMap (const StandardP1nDMap& that) : - LocalToGlobalMap (that), elementArray (that.elementArray) { - - } - - virtual StandardP1nDMap* clone () const { - return new StandardP1nDMap (*this); - } - - inline GlobalDofIndex map (size_t field, size_t dof, const GlobalElementIndex& ElementMapped) const { - const Element* elem = elementArray[ElementMapped]; - // we subtract 1 from node ids in 1-based node numbering - // return elem->getNumFields () * (elem-> getGeometry ().getConnectivity ()[dof] - 1) + field; - // no need to subtract 1 with 0-based node numbering - return elem->getNumFields () * (elem-> getGeometry ().getConnectivity ()[dof]) + field; - } - - - inline size_t getNumElements () const { - return elementArray.size (); - } - - inline size_t getNumFields (const GlobalElementIndex & ElementMapped) const { - return elementArray[ElementMapped]->getNumFields (); - } - inline size_t getNumDof (const GlobalElementIndex & ElementMapped, size_t field) const { - return elementArray[ElementMapped]->getDof (field); - } - - size_t getTotalNumDof () const { - GlobalNodalIndex MaxNodeNumber = 0; - - for (size_t e = 0; e < elementArray.size (); e++) { - const std::vector& conn = elementArray[e]->getGeometry ().getConnectivity (); - - for (size_t a = 0; a < conn.size (); ++a) { - if (conn[a] > MaxNodeNumber) { - MaxNodeNumber = conn[a]; - } - } - } - - // return maxNode * elementArray.get (0).numFields (); - // add 1 here since nodes are number 0 .. numNodes-1 in 0-based node numbering - return static_cast (MaxNodeNumber + 1) * elementArray[0]->getNumFields (); - } - -protected: - //! Access to ElementArray for derived classes - const std::vector & getElementArray () const { - return elementArray; - } - -}; - -#endif /* P1NDELEMENT_H_ */ diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement-modelOutput b/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement-modelOutput deleted file mode 100644 index 1114aa2..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement-modelOutput +++ /dev/null @@ -1,59 +0,0 @@ -Number of fields: 2 should be 2 -Number of dof field(0): 3 should be 3 -Number of dof field(1): 3 should be 3 -Shape function values at quad points field(0): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function values at quad points field(1): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function derivatives values at quad points field(0): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Shape function derivatives values at quad points field(1): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weight values at quad points field(0): -0.166667 0.166667 0.166667 -Integration weight values at quad points field(1): -0.166667 0.166667 0.166667 -Quad points coordinates for field(0): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 -Quad points coordinates for field(1): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 -Test Copy Constructor -Number of fields: 2 should be 2 -Number of dof field(0): 3 should be 3 -Number of dof field(1): 3 should be 3 -Shape function values at quad points field(0): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function values at quad points field(1): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function derivatives values at quad points field(0): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Shape function derivatives values at quad points field(1): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weight values at quad points field(0): -0.166667 0.166667 0.166667 -Integration weight values at quad points field(1): -0.166667 0.166667 0.166667 -Quad points coordinates for field(0): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 -Quad points coordinates for field(1): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 -Cloned element before destruction. Test cloning mechanism -Number of fields: 2 should be 2 -Number of dof field(0): 3 should be 3 -Number of dof field(1): 3 should be 3 -Shape function values at quad points field(0): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function values at quad points field(1): -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Shape function derivatives values at quad points field(0): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Shape function derivatives values at quad points field(1): -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weight values at quad points field(0): -0.166667 0.166667 0.166667 -Integration weight values at quad points field(1): -0.166667 0.166667 0.166667 -Quad points coordinates for field(0): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 -Quad points coordinates for field(1): -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement.cpp b/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement.cpp deleted file mode 100644 index fa0c089..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElement.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * testP12DElement.cpp - * DG++ - * - * Created by Adrian Lew on 9/22/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "P12DElement.h" -#include - - -int main() -{ - double Vertices[] = {1,0,0,1,0,0}; - std::vector Vertices0(Vertices, Vertices+6); - - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - - P12DElement<2> TestElement(1,2,3); - Element * VirtualElement; - - sleep(2); - - std::cout << "Number of fields: " << TestElement.GetFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << TestElement.getDof(0) << " should be 3\n"; - std::cout << "Number of dof field(1): " << TestElement.getDof(1) << " should be 3\n"; - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;q CopyElement(TestElement); - std::cout << "Test Copy Constructor\n"; - - std::cout << "Number of fields: " << CopyElement.GetFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << CopyElement.getDof(0) << " should be 3\n"; - std::cout << "Number of dof field(1): " << CopyElement.getDof(1) << " should be 3\n"; - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetNumFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << VirtualElement->getDof(0) << " should be 3\n"; - std::cout << "Number of dof field(1): " << VirtualElement->getDof(1) << " should be 3\n"; - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetShapes(a).size();q++) - std::cout << VirtualElement->getShapes(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Shape function derivatives values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetDShapes(a).size();q++) - std::cout << VirtualElement->getDShapes(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Integration weight values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationWeights(a).size();q++) - std::cout << VirtualElement->getIntegrationWeights(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Quad points coordinates for field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationPtCoords(a).size();q++) - std::cout << VirtualElement->getIntegrationPtCoords(a)[q] << " "; - std::cout << "\n"; - } - - delete VirtualElement; -} - diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces-modelOutput b/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces-modelOutput deleted file mode 100644 index ea83960..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces-modelOutput +++ /dev/null @@ -1,74 +0,0 @@ -Number of traces: 2 should be 2 -Face number : 0 -Normal components -0.707107 0.707107 -Face number : 2 -Normal components -0 -1 -Face number : 0 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.788675 0.211325 0.211325 0.788675 -Face number : 2 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.211325 0 0.788675 0 - -Test copy constructor -Number of traces: 2 should be 2 -Face number : 0 -Normal components -0.707107 0.707107 -Face number : 2 -Normal components -0 -1 -Face number : 0 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.788675 0.211325 0.211325 0.788675 -Face number : 2 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.211325 0 0.788675 0 - -Test Cloning -Number of traces: 2 should be 2 -Face number : 0 -Normal components -0.707107 0.707107 -Face number : 2 -Normal components -0 -1 -Face number : 0 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.788675 0.211325 0.211325 0.788675 -Face number : 2 -Shape functions values for the first field -0.788675 0.211325 0.211325 0.788675 -Integration point coordinates -0.211325 0 0.788675 0 - - Test ThreeDofs traces -Number of traces: 2 should be 2 -Face number : 0 -Normal components -0.707107 0.707107 -Face number : 2 -Normal components -0 -1 -Face number : 0 -Shape functions values for the first field -0.788675 0.211325 0 0.211325 0.788675 0 -Integration point coordinates -0.788675 0.211325 0.211325 0.788675 -Face number : 2 -Shape functions values for the first field -0.211325 0 0.788675 0.788675 0 0.211325 -Integration point coordinates -0.211325 0 0.788675 0 diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces.cpp b/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces.cpp deleted file mode 100644 index 80082b9..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP12DElementBoundaryTraces.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * testP12DElementBoundaryTraces.cpp - * DG++ - * - * Created by Adrian Lew on 10/12/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "P12DElement.h" -#include - - -int main() -{ - double Vertices[] = {1,0,0,1,0,0}; - std::vector Vertices0(Vertices, Vertices+6); - - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - Segment<2>::SetGlobalCoordinatesArray(Vertices0); - ElementBoundaryTraces * TestElementBoundaryClone; - - sleep(2); - - P12DElement<2> TestElement(1,2,3); - - - { - P12DElementBoundaryTraces<2> - TestElementBoundary(TestElement, true, false, true, P12DTrace<2>::TwoDofs); - - std::cout << "Number of traces: " << TestElementBoundary.getNumTraceFaces() << " should be 2\n"; - - for(unsigned int a=0; a TestElementBoundaryCopy(TestElementBoundary); - - std::cout << "Number of traces: " << TestElementBoundaryCopy.getNumTraceFaces() << " should be 2\n"; - - for(unsigned int a=0; agetNumTraceFaces() << " should be 2\n"; - - for(unsigned int a=0; agetNumTraceFaces(); a++) - { - int facenumber = TestElementBoundaryClone->getTraceFaceIds()[a]; - std::cout << "Face number : " << facenumber << std::endl; - - std::cout << "Normal components\n"; - for(unsigned int q=0;qgetNormal(a).size();q++) - std::cout << TestElementBoundaryClone->getNormal(a)[q] << " "; - std::cout << "\n"; - } - - for(unsigned int a=0; agetNumTraceFaces(); a++) - { - int facenumber = TestElementBoundaryClone->getTraceFaceIds()[a]; - std::cout << "Face number : " << facenumber << std::endl; - - std::cout << "Shape functions values for the first field\n"; - const Element & face = (*TestElementBoundaryClone)[a]; - - for(unsigned int q=0;q - TestElementBoundary(TestElement, true, false, true, P12DTrace<2>::ThreeDofs); - - std::cout << "Number of traces: " << TestElementBoundary.getNumTraceFaces() << " should be 2\n"; - - for(unsigned int a=0; a - - -int main() -{ - double Vertices[] = {1,0,0,1,0,0}; - std::vector Vertices0(Vertices, Vertices+6); - - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - Segment<2>::SetGlobalCoordinatesArray(Vertices0); - - sleep(2); - - P12DElement<2> TestElement(1,2,3); - - P12DTrace<2> TestTraceOne(TestElement,P12DTrace<2>::FaceOne,P12DTrace<2>::TwoDofs); - P12DTrace<2> TestTraceTwo(TestElement,P12DTrace<2>::FaceTwo,P12DTrace<2>::TwoDofs); - P12DTrace<2> TestTraceThree(TestElement,P12DTrace<2>::FaceThree,P12DTrace<2>::TwoDofs); - - P12DTrace<2> * Faces[] = { &TestTraceOne, &TestTraceTwo, - &TestTraceThree}; - - for(int i=0; i<3; i++) - { - std::cout << "\nTesting Face: "<< i+1 << "\n"; - std::cout << "Number of fields: " << Faces[i]->getFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << Faces[i]->getDof(0) << " should be 2\n"; - std::cout << "Number of dof field(1): " << Faces[i]->getDof(1) << " should be 2\n"; - - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetShapes(a).size();q++) - std::cout << Faces[i]->getShapes(a)[q] << " "; - std::cout << "\n"; - } - - - for(int a=0; a<2; a++) - { - std::cout << "Integration weight values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationWeights(a).size();q++) - std::cout << Faces[i]->getIntegrationWeights(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Quad points coordinates for field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationPtCoords(a).size();q++) - std::cout << Faces[i]->getIntegrationPtCoords(a)[q] << " "; - std::cout << "\n"; - } - std::cout << "No shape function derivatives tested\n"; - } - - P12DTrace<2> *VirtualTraceOne; - - { - P12DTrace<2> CopyTraceOne(TestTraceOne); - std::cout << "\nTest Copy Constructor for Face 1\n"; - - std::cout << "Number of fields: " << CopyTraceOne.getFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << CopyTraceOne.getDof(0) << " should be 2\n"; - std::cout << "Number of dof field(1): " << CopyTraceOne.getDof(1) << " should be 2\n"; - - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << VirtualTraceOne->getDof(0) << " should be 2\n"; - std::cout << "Number of dof field(1): " << VirtualTraceOne->getDof(1) << " should be 2\n"; - - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetShapes(a).size();q++) - std::cout << VirtualTraceOne->getShapes(a)[q] << " "; - std::cout << "\n"; - } - - - for(int a=0; a<2; a++) - { - std::cout << "Integration weight values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationWeights(a).size();q++) - std::cout << VirtualTraceOne->getIntegrationWeights(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Quad points coordinates for field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationPtCoords(a).size();q++) - std::cout << VirtualTraceOne->getIntegrationPtCoords(a)[q] << " "; - std::cout << "\n"; - } - std::cout << "No shape function derivatives tested\n\n"; - - - delete VirtualTraceOne; - - std::cout << "Test different ShapeType\n"; - P12DTrace<2> TestTraceOneType(TestElement,P12DTrace<2>::FaceOne,P12DTrace<2>::ThreeDofs); - P12DTrace<2> TestTraceTwoType(TestElement,P12DTrace<2>::FaceTwo,P12DTrace<2>::ThreeDofs); - P12DTrace<2> TestTraceThreeType(TestElement,P12DTrace<2>::FaceThree,P12DTrace<2>::ThreeDofs); - - Faces[0] = &TestTraceOneType; - Faces[1] = &TestTraceTwoType; - Faces[2] = &TestTraceThreeType; - - for(int i=0; i<3; i++) - { - std::cout << "\nTesting Face: "<< i+1 << "\n"; - std::cout << "Number of fields: " << Faces[i]->getFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << Faces[i]->getDof(0) << " should be 3\n"; - std::cout << "Number of dof field(1): " << Faces[i]->getDof(1) << " should be 3\n"; - - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetShapes(a).size();q++) - std::cout << Faces[i]->getShapes(a)[q] << " "; - std::cout << "\n"; - } - - - for(int a=0; a<2; a++) - { - std::cout << "Integration weight values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationWeights(a).size();q++) - std::cout << Faces[i]->getIntegrationWeights(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Quad points coordinates for field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationPtCoords(a).size();q++) - std::cout << Faces[i]->getIntegrationPtCoords(a)[q] << " "; - std::cout << "\n"; - } - std::cout << "No shape function derivatives tested\n"; - } - -} - diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElement.cpp b/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElement.cpp deleted file mode 100644 index 428ddd7..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElement.cpp +++ /dev/null @@ -1,150 +0,0 @@ -// Sriramajayam - -// Purpose: To check P13DElement. - - -#include "P13DElement.h" -#include -#include - -int main() -{ - double Vertices[] = {1,0,0, - 0,1,0, - 0,0,0, - 0,0,1}; - std::vector Vertices0(Vertices, Vertices+12); - - Tetrahedron::SetGlobalCoordinatesArray(Vertices0); - - P13DElement<2> TestElement(1,2,3,4); - - - Element * VirtualElement; - - sleep(2); - - std::cout << "Number of fields: " << TestElement.getFields() << " should be 2\n"; - std::cout << "Number of dof field(0): " << TestElement.getDof(0) << " should be 4\n"; - std::cout << "Number of dof field(1): " << TestElement.getDof(1) << " should be 4\n"; - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;q CopyElement(TestElement); - std::cout << "Test Copy Constructor\n"; - - std::cout<<"Number of fields: " << CopyElement.getFields() << " should be 2\n"; - std::cout<<"Number of dof field(0): " <getNumFields() << " should be 2\n"; - std::cout <<"Number of dof field(0): "<getDof(0)<<" should be 4\n"; - std::cout <<"Number of dof field(1): "<getDof(1)<<" should be 4\n"; - - for(int a=0; a<2; a++) - { - std::cout << "Shape function values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetShapes(a).size();q++) - std::cout << VirtualElement->getShapes(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Shape function derivatives values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetDShapes(a).size();q++) - std::cout << VirtualElement->getDShapes(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Integration weight values at quad points field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationWeights(a).size();q++) - std::cout << VirtualElement->getIntegrationWeights(a)[q] << " "; - std::cout << "\n"; - } - - for(int a=0; a<2; a++) - { - std::cout << "Quad points cooridnates for field("<< a<< "):\n"; - for(unsigned int q=0;qgetIntegrationPtCoords(a).size();q++) - std::cout << VirtualElement->getIntegrationPtCoords(a)[q] << " "; - std::cout << "\n"; - } - - delete VirtualElement; - - -} - diff --git a/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElementBoundaryTraces.cpp b/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElementBoundaryTraces.cpp deleted file mode 100644 index 5ad8704..0000000 --- a/maxflow/galois/apps/avi/libElm/libElement/test/testP13DElementBoundaryTraces.cpp +++ /dev/null @@ -1,67 +0,0 @@ -/* Sriramajayam */ - -// testP13DElementBoundaryTraces.cpp. - -#include "P13DElement.h" -#include - -int main() -{ - double V0[] = {1,0,0, - 0,1,0, - 0,0,0, - 0,0,1}; - - std::vector Vertices(V0, V0+12); - - Tetrahedron::SetGlobalCoordinatesArray(Vertices); - Triangle<3>::SetGlobalCoordinatesArray(Vertices); - Segment<3>::SetGlobalCoordinatesArray(Vertices); - - P13DElement<2> Elm(1,2,3,4); - - P13DElementBoundaryTraces<2> - EBT(Elm, true, false, true, true, P13DTrace<2>::ThreeDofs); - - std::cout<<"\n Number of Traces: "< - -int main() -{ - double V0[] = {1,0,0, - 0,1,0, - 0,0,0, - 0,0,1}; - - std::vector Vertices(V0, V0+12); - - Tetrahedron::SetGlobalCoordinatesArray(Vertices); - Triangle<3>::SetGlobalCoordinatesArray(Vertices); - Segment<3>::SetGlobalCoordinatesArray(Vertices); - - P13DElement<2> Elm(1,2,3,4); - - P13DTrace<2> Trace1(Elm, P13DTrace<2>::FaceOne, P13DTrace<2>::ThreeDofs); - P13DTrace<2> Trace2(Elm, P13DTrace<2>::FaceTwo, P13DTrace<2>::ThreeDofs); - P13DTrace<2> Trace3(Elm, P13DTrace<2>::FaceThree, P13DTrace<2>::ThreeDofs); - P13DTrace<2> Trace4(Elm, P13DTrace<2>::FaceFour, P13DTrace<2>::ThreeDofs); - - P13DTrace<2> * Faces[] = { &Trace1, &Trace2, &Trace3, &Trace4}; - - for(int i=1; i<2; i++) // Change to test different/all traces. - { - std::cout<<"\n Testing Face: "<GetFields()<<" should be 2\n"; - std::cout<<"\nNumber of dof field(0): "<getDof(0)<<" should be 3\n"; - std::cout<<"\nNumber of dof field(1): "<getDof(1)<<" should be 3\n"; - - - // Printing Shape functions at quadrature points. - for(int f=0; f<2; f++) - { - std::cout <<"\n Shape Function values at quad point for field "<< f<< ":\n"; - for(unsigned int q=0; qgetShapes(f).size(); q++) - std::cout << Faces[i]->getShapes(f)[q] <<" "; - - std::cout << "\n"; - } - - // Printing integration weights at quad points: - for(int f=0; f<2; f++) - { - std::cout <<"\n Integration weights at quad point for field "<< f<< ":\n"; - for(unsigned int q=0; qgetIntegrationWeights(f).size(); q++) - std::cout << Faces[i]->getIntegrationWeights(f)[q] <<" "; - - std::cout << "\n"; - } - - // Printing integration quad points: - for(int f=0; f<2; f++) - { - std::cout <<"\n Quad point coordinates for field "<< f<< ":\n"; - for(unsigned int q=0; qgetIntegrationPtCoords(f).size(); q++) - std::cout << Faces[i]->getIntegrationPtCoords(f)[q] <<" "; - - std::cout << "\n"; - } - - std::cout<<"\n Shape function derivatives not tested. \n"; - } - - std::cout<< "\n Test Successful. \n\n"; - -} - - - - diff --git a/maxflow/galois/apps/avi/libElm/libGeom/ElementGeometry.h b/maxflow/galois/apps/avi/libElm/libGeom/ElementGeometry.h deleted file mode 100644 index f8aeeb6..0000000 --- a/maxflow/galois/apps/avi/libElm/libGeom/ElementGeometry.h +++ /dev/null @@ -1,275 +0,0 @@ -/** - * ElementGeometry.h: Geometry of an element. e.g. a triangle or tetrahedron - * DG++ - * - * Created by Adrian Lew on 9/4/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef ELEMENTGEOMETRY -#define ELEMENTGEOMETRY - -#include "AuxDefs.h" -#include -#include -#include - -#include -/** - \brief ElementGeometry: Defines the geometry of the polytope over which the - interpolation takes place - - ElementGeometry consists of:\n - 1) A set of vertices that define a convex hull, the domain of the polytope\n - 2) A map from a parametric, reference polytope to the real - polytope. This map is one-to-one, and may have domain and range - in Euclidean spaces of different dimensions. In this way it is - possible to map the parametric configuration of a planar - triangle into three-dimensional real space as needed for plates - and shells.\n - 3) A name for the polytope, for identification purposes whenever needed.\n - - The idea of the class is to avoid having copies of the vertices coordinates in the object, - only the connectivity. -*/ - -class ElementGeometry -{ - public: - inline ElementGeometry(){} - - inline virtual ~ElementGeometry(){} - - inline ElementGeometry(const ElementGeometry &){} - - virtual ElementGeometry * clone() const = 0; - - //! @return number of vertices - virtual size_t getNumVertices() const = 0; - - //!@return ref to Vertices of the polytope. - virtual const std::vector & getConnectivity() const = 0; - - //! @return Name of type of polytope. - virtual const std::string getPolytopeName() const = 0; - - //! @return spatial dimension e.g. 2 for 2D - virtual size_t getSpatialDimension () const = 0; - - //! Number of dimensions in parametric configuration - virtual size_t getParametricDimension() const = 0; - - //! Number of dimensions in the real configuration - virtual size_t getEmbeddingDimension() const = 0; - - //! map from parametric to real configuration - //! @param X parametric coordinates - //! @param Y returned real coordinates - virtual void map(const double * X, double *Y) const = 0; - - //! Derivative of map from parametric to real configuration - //! @param X parametric coordinates. - //! @param Jac returns absolute value of the Jacobian of the map. - //! @param DY returns derivative of the map. - //! Here DY[a*getEmbeddingDimension()+i] - //! contains the derivative in the a-th direction - //! of the i-th coordinate. - virtual void dMap(const double * X, double *DY, double &Jac) const = 0; - - //! Consistency test for map and its derivative - //! @param X parametric coordinates at which to test - //! @param Pert size of the perturbation with which to compute numerical - //! derivatives (X->X+Pert) - virtual bool consistencyTest(const double * X, const double Pert) const = 0; - - //! Number of faces the polytope has - virtual size_t getNumFaces() const = 0; - - //! Creates and returns a new ElementGeometry object corresponding - //! to face "e" in the polytope. The object has to be destroyed - //! with delete by the recipient. - //! - //! @param e face number, starting from 0 - //! - //! Returns a null pointer if "e" is out of range - virtual ElementGeometry * getFaceGeometry(size_t e) const = 0; - - //! Computes the Inner radius of the ElementGeometry object - //! - //! This is defined as the radius of the largest sphere that can be fit inside the polytope. - virtual double getInRadius() const = 0; - - //! Computes the Outer radius of the ElementGeometry object - //! - //! This is defined as the radius of the smallest sphere that contains the object. - virtual double getOutRadius() const = 0; - - //! Compute external normal for a face - //! - //! @param e: face number for which the normal is desired - //! @param vNormal: output of the three Cartesian components of the normal vector - virtual void computeNormal (size_t e, std::vector& vNormal) const = 0; - - - /** - * Returns the value of dimension 'i' of local node 'a' of the eleement - * - * @param a local index of the node in [0..numNodes) - * @param i local index of dimension (x or y or z) in [0..Dim) - */ - virtual double getCoordinate (size_t a, size_t i) const = 0; - - /** - * Computes the center of the element (the way center is defined may be - * different for different elements) - * - * @param center output vector containing the coordinates of the center - */ - virtual void computeCenter (std::vector& center) const = 0; - -}; - -/** - * Base class with common functionality - */ -template -class AbstractGeom : public ElementGeometry { -private: - const std::vector& globalCoordVec; - std::vector connectivity; - -protected: - static const size_t SP_DIM = SPD; - /** - * @return ref to the vector that contains global coordinates for all mesh nodes - */ - const std::vector& getGlobalCoordVec () const { return globalCoordVec; } - -public: - /** - * @param globalCoordVec is a reference to the vector containing coordinates of all nodes - * Coordinates of node i in N dimensional space are in locations [N*i, N*(i+1)) - * @param connectivity is a vector containing ids of nodes of this element in the mesh - */ - - AbstractGeom (const std::vector& globalCoordVec, const std::vector& connectivity) - :ElementGeometry (), globalCoordVec(globalCoordVec), connectivity (connectivity) { - - } - - AbstractGeom (const AbstractGeom& that) - : ElementGeometry (that), globalCoordVec (that.globalCoordVec), connectivity (that.connectivity) { - - } - - virtual size_t getSpatialDimension () const { - return SP_DIM; - } - - virtual const std::vector& getConnectivity () const { - return connectivity; - } - - virtual bool consistencyTest (const double* X, const double Pert) const { - double *DYNum = new double[getParametricDimension() * getEmbeddingDimension()]; - double *DY = new double[getParametricDimension() * getEmbeddingDimension()]; - double *Xpert = new double[getParametricDimension()]; - double *Yplus = new double[getEmbeddingDimension()]; - double *Yminus = new double[getEmbeddingDimension()]; - double Jac; - - if (Pert <= 0) - std::cerr << "ElementGeometry::ConsistencyTest - Pert cannot be less or equal than zero\n"; - - for (size_t a = 0; a < getParametricDimension(); a++) - Xpert[a] = X[a]; - - dMap(X, DY, Jac); - - for (size_t a = 0; a < getParametricDimension(); a++) { - Xpert[a] = X[a] + Pert; - map(Xpert, Yplus); - - Xpert[a] = X[a] - Pert; - map(Xpert, Yminus); - - Xpert[a] = X[a]; - - for (size_t i = 0; i < getEmbeddingDimension(); i++) - DYNum[a * getEmbeddingDimension() + i] = (Yplus[i] - Yminus[i]) / (2 * Pert); - } - - double error = 0; - double normX = 0; - double normDYNum = 0; - double normDY = 0; - - for (size_t a = 0; a < getParametricDimension(); a++) { - normX += X[a] * X[a]; - - for (size_t i = 0; i < getEmbeddingDimension(); i++) { - error += pow(DY[a * getEmbeddingDimension() + i] - DYNum[a * getEmbeddingDimension() + i], 2.); - normDY += pow(DY[a * getEmbeddingDimension() + i], 2.); - normDYNum += pow(DYNum[a * getEmbeddingDimension() + i], 2.); - } - } - error = sqrt(error); - normX = sqrt(normX); - normDY = sqrt(normDY); - normDYNum = sqrt(normDYNum); - - delete[] Yplus; - delete[] Yminus; - delete[] Xpert; - delete[] DYNum; - delete[] DY; - - if (error * (normX + Pert) < (normDY < normDYNum ? normDYNum : normDY) * Pert * 10) - return true; - else - return false; - } - - - - - /** - * @param a node id - * @param i dimension id - * @return value of dimension 'i' of coordinates of node 'a' - */ - virtual double getCoordinate (size_t a, size_t i) const { - // 0-based numbering of nodes in the mesh - size_t index = getConnectivity ()[a] * getSpatialDimension() + i; - return globalCoordVec[index]; - } - - virtual void computeCenter (std::vector& center) const { - std::cerr << "computeCenter not implemented" << std::endl; - abort (); - } - - -}; - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libGeom/Segment.h b/maxflow/galois/apps/avi/libElm/libGeom/Segment.h deleted file mode 100644 index 96e87de..0000000 --- a/maxflow/galois/apps/avi/libElm/libGeom/Segment.h +++ /dev/null @@ -1,162 +0,0 @@ -/** - * Segment.h: a line segment - * DG++ - * - * Created by Adrian Lew on 10/7/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SEGMENT -#define SEGMENT - -#include "AuxDefs.h" -#include "ElementGeometry.h" - -#include -#include -#include - -/** - \brief Segment: Geometry of straight segments - - A Segment is:\n - 1) A set of indices that describe the connectivity of the segment, - properly oriented. The coordinates - are not stored in the element but wherever the application decides\n - 2) An affine map from a one-dimensional segment (parametric configuration) - with length 1 to the convex - hull of the two vertices. Segments embedded in two- and three-dimensional space - are hence easily handled. \n - - The parametric configuration is the segment (0,1).\n - The parametric coordinate used is the distance to 0. - - \warning Neither map nor dMap check for bounds of - their array arguments - -*/ - - -template -class Segment:public AbstractGeom -{ - public: - Segment (const std::vector globalCoordVec, const std::vector& connectivity) - :AbstractGeom (globalCoordVec, connectivity) { - assert (connectivity.size () == 2); - } - - - inline virtual ~Segment(){} - - Segment(const Segment & that) : AbstractGeom (that) { - } - - virtual Segment* clone() const { - return new Segment(*this); - } - - inline size_t getNumVertices() const { return 2; } - - inline const std::string getPolytopeName() const { return "SEGMENT"; } - - inline size_t getParametricDimension() const { return 1; } - - inline size_t getEmbeddingDimension() const { return SPD; } - - //! @param X first parametric coordinate - //! @param Y Output the result of the map - void map(const double * X, double *Y) const; - //! @param X first parametric coordinate - //! @param DY Output the derivative of the map - //! @param Jac Output the jacobian of the map - void dMap(const double * X, double *DY, double &Jac) const; - inline size_t getNumFaces() const { return 2; } - - //! \warning not implemented - ElementGeometry * getFaceGeometry(size_t e) const - { std::cerr << "Segment::getFaceGeometry. " - "Not implemented!\n\n"; return 0; } - - double getInRadius(void) const{ - double l; - l = 0.0; - for(size_t i=0; i::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i))* - (AbstractGeom::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i)) ; - } - - return(0.5*sqrt(l)); - } - - double getOutRadius(void) const{ - return(getInRadius()); - } - - virtual void computeNormal (size_t e, std::vector& vNormal) const { - std::cerr << "Segment::computeNormal not implemented yet" << std::endl; - abort (); - } - - - -}; - - - - - - -// Class implementation - - -template -void Segment::map(const double * X, double *Y) const -{ - for(size_t i=0; i::getCoordinate(0,i) + (1-X[0])*AbstractGeom::getCoordinate(1,i); - - return; -} - - - - -template -void Segment::dMap(const double * X, double *DY, double &Jac) const -{ - for(size_t i=0; i::getCoordinate(0,i) - AbstractGeom::getCoordinate(1,i); - - double g11=0; - - for(size_t i=0; i -#include - -#include "ElementGeometry.h" -#include "Triangle.h" - -/** - \brief Geometry of 3D tetrahedra. - - A tetrahedron is: - 1) A set of indices that describe the connectivity of the tetrahedran, properly oriented. - - 2) An affine map from a three-dimensional tetrahedron (parametric configuration) with - volume 1/6 to the - convex hull of 4 vertices. - - The parametric configuration of the tetrahedron is 0(1,0,0), 1(0,1,0), 2(0,0,0), 3(0,0,1). - The parametric coordinates are the ones associated with vertices 0,1 and 3. - The faces (for the purpose of quadrature points) are ordered as: - 1) Face 1: 2-1-0, - 2) Face 2: 2-0-3, - 3) Face 3: 2-3-1, - 4) Face 4: 0-1-3. - The convention used in numbering these faces is that the resulting normal is - always outward. - -*/ - -#define TET_SPD 3 - -class Tetrahedron: public AbstractGeom -{ -public: - static const double ParamCoord[]; - static const size_t FaceNodes[]; - - -public: - - Tetrahedron (const std::vector& globalCoordVec, const std::vector& connectivity) - :AbstractGeom (globalCoordVec, connectivity) { - assert (connectivity.size () == 4); - } - - - Tetrahedron(const Tetrahedron & that) : AbstractGeom(that) { - } - - virtual Tetrahedron* clone() const { - return new Tetrahedron(*this); - } - - - //! Returns the number of vertices. - inline size_t getNumVertices() const { return 4; } - - - //! Returns the name of the geometry. It clarifies the meaning of the connectivity array. - inline const std::string getPolytopeName() const { return "TETRAHEDRON"; } - - //! Returns the number of dimensions in the parametric configuartion. - inline size_t getParametricDimension() const { return 3; } - - //! Returns the number of dimensions in the real configuration. - inline size_t getEmbeddingDimension() const { return 3; } - - //! Number of faces the polytope has. - inline size_t getNumFaces() const { return 4; } - - //! map from parametric to real configuration. - //! \param X parametric coordinates. - //! \param Y returned real coordinates. - void map(const double *X, double *Y) const { - const size_t sd = AbstractGeom::getSpatialDimension (); - - for(size_t i=0; i::getCoordinate(0,i) + - X[1]*AbstractGeom::getCoordinate(1,i) + - X[2]*AbstractGeom::getCoordinate(3,i) + - (1.0-X[0]-X[1]-X[2])*AbstractGeom::getCoordinate(2,i); - } - - //! Derivative of the map from the parametric to the real configuration. - //! \param X parametric cooridnates - //! \param Jac returns Jacobian of the map. - //! \param DY returnd derivative of the map. - //! Here DY[a*getEmbeddingDimension()+i] contains the derivative of the a-th direction of the i-th coordinate. - void dMap(const double *X, double *DY, double &Jac) const { - const size_t sd = AbstractGeom::getSpatialDimension (); // spatial_dimension. - - for(size_t i=0; i::getCoordinate(0,i)-AbstractGeom::getCoordinate(2,i); - DY[sd*1+i] = AbstractGeom::getCoordinate(1,i)-AbstractGeom::getCoordinate(2,i); - DY[sd*2+i] = AbstractGeom::getCoordinate(3,i)-AbstractGeom::getCoordinate(2,i); - } - - Jac = 0.; - for(size_t i=0; i * getFaceGeometry(size_t e) const { - if(e<=3) { - std::vector conn(FaceNodes + 3*e, FaceNodes + 3*e + 2); - - return new Triangle (AbstractGeom::getGlobalCoordVec (), conn); - - // return new Triangle<3>(AbstractGeom::getConnectivity()[FaceNodes[3*e+0]], - // AbstractGeom::getConnectivity()[FaceNodes[3*e+1]], - // AbstractGeom::getConnectivity()[FaceNodes[3*e+2]]); - } - GALOIS_DIE("Tetrahedron::getFaceGeometry() : Request for invalid face."); - return NULL; - } - - //! get the inradius - double getInRadius(void) const { - double a[3],b[3],c[3],o,t[3],d[3],t1[3],t2[3], t3[3]; - for(size_t i=0;i<3;i++) { - o = AbstractGeom::getCoordinate(0,i); - a[i]=AbstractGeom::getCoordinate(1,i) - o; - b[i]=AbstractGeom::getCoordinate(2,i) - o; - c[i]=AbstractGeom::getCoordinate(3,i) - o; - } - cross(b,c,t); - cross(c,a,d); - d[0]+=t[0]; - d[1]+=t[1]; - d[2]+=t[2]; - cross(a,b,t); - d[0]+=t[0]; - d[1]+=t[1]; - d[2]+=t[2]; - cross(b,c,t); - cross(b,c,t1); - cross(c,a,t2); - cross(a,b,t3); - double rv = (dot(a,t)/(mag(t1)+mag(t2)+mag(t3)+ mag(d))); - - return(rv); - } - - //! get the outradius -- radius of the circumscribed sphere - double getOutRadius(void) const { - double x[4],y[4],z[4],r2[4],ones[4]; - double M11, M12, M13, M14, M15; - double **a; - a = new double*[4]; - - for(size_t i = 0; i < 4; i++) { - x[i] = AbstractGeom::getCoordinate(i,0); - y[i] = AbstractGeom::getCoordinate(i,1); - z[i] = AbstractGeom::getCoordinate(i,2); - r2[i] = x[i]*x[i] + y[i]*y[i] + z[i]*z[i]; - ones[i] = 1.0; - } - a[0] = x; - a[1] = y; - a[2] = z; - a[3] = ones; - M11 = determinant(a,4); - a[0] = r2; - M12 = determinant(a,4); - a[1] = x; - M13 = determinant(a,4); - a[2] = y; - M14 = determinant(a,4); - a[3] = z; - M15 = determinant(a,4); - - double x0,y0,z0; - x0 = 0.5 * M12/M11; - y0 = -0.5 * M13/M11; - z0 = 0.5 * M14/M11; - - delete[] a; - return(sqrt(x0*x0 + y0*y0 + z0*z0 - M15/M11)); - } - - - //! Compute external normal for a face - //! - //! @param e: face number for which the normal is desired - //! @param vNormal: output of the three Cartesian components of the normal vector - virtual void computeNormal (size_t e, std::vector& vNormal) const { - const size_t sd = AbstractGeom::SP_DIM; - - size_t n0, n1, n2; // Local node numbers of face 'e' - - n0 = FaceNodes[3*e]; n1 = FaceNodes[3*e+1]; n2 = FaceNodes[3*e+2]; - - // Finding the coordinates of each node of face 'e': - double p0[sd], p1[sd], p2[sd]; - - map(&ParamCoord[3*n0], p0); - map(&ParamCoord[3*n1], p1); - map(&ParamCoord[3*n2], p2); - - double L01[sd]; - double L02[sd]; - for(size_t k=0; k - -#include -#include - - - -/** - \brief Triangle: Geometry of planar triangles - - A Triangle is:\n - 1) A set of indices that describe the connectivity of the triangle, - properly oriented. The coordinates - are not stored in the element but wherever the application decides\n - 2) An affine map from a two-dimensional triangle (parametric configuration) - with area 1/2 to the convex - hull of the three vertices. Triangles embedded in three-dimensional space - are hence easily handled. \n - - The parametric configuration is the triangle (0,0),(1,0),(0,1).\n - The two parametric coordinates used are the ones aligned with the two axes in 2D. - - For a triangle with connectivity (1,2,3) the faces are ordered as - (1,2),(2,3),(3,1). - - Rationale for a templated class: prevent having multiple copies of - the dimension of SPD in each one of the multiple - elements in a mesh. A static variable for it would have only - allowed to use one type of triangles in a program. - - \warning Neither map nor dMap check for bounds of - their array arguments -*/ - - -template -class Triangle: public AbstractGeom { - public: - //! Connectivity in Triangle GlobalCoordinatesArray - Triangle (const std::vector& globalCoordVec, const std::vector& connectivity) - :AbstractGeom (globalCoordVec, connectivity) { - assert (connectivity.size () == 3); - } - - - inline virtual ~Triangle(){} - - Triangle(const Triangle & that) : AbstractGeom(that) { - } - - virtual Triangle* clone() const { - return new Triangle(*this); - } - - - inline size_t getNumVertices() const { return 3; } - - inline const std::string getPolytopeName() const { return "TRIANGLE"; } - - inline size_t getParametricDimension() const { return 2; } - - inline size_t getEmbeddingDimension() const { return SPD; } - - void map(const double * X, double *Y) const; - - void dMap(const double * X, double *DY, double &Jac) const; - - inline size_t getNumFaces() const { return 3; } - - virtual double getInRadius(void) const; - - virtual double getOutRadius(void) const; - - virtual Segment * getFaceGeometry(size_t e) const; - - virtual void computeNormal (size_t e, std::vector& vNormal) const; - - virtual void computeCenter (std::vector& center) const; - -private: - static size_t SegmentNodes[]; - - static double ParamCoord[]; - - static double midpoint (double x1, double x2) { return (x1 + x2) / 2; } -}; - - -// Class implementation - -template -size_t Triangle::SegmentNodes[] = {0,1,1,2,2,0}; - -template -double Triangle::ParamCoord[] = {1,0,0,1,0,0}; - - -template -void Triangle::map(const double * X, double *Y) const -{ - for(size_t i=0; i::getCoordinate(0,i) + X[1]*AbstractGeom::getCoordinate(1,i) + (1-X[0]-X[1])*AbstractGeom::getCoordinate(2,i); - - return; -} - - - - -template -void Triangle::dMap(const double * X, double *DY, double &Jac) const -{ - for(size_t i=0; i::getCoordinate(0,i) - AbstractGeom::getCoordinate(2,i); - DY[SPD+i] = AbstractGeom::getCoordinate(1,i) - AbstractGeom::getCoordinate(2,i); - } - - double g11=0; - double g22=0; - double g12=0; - - for(size_t i=0; i -Segment * Triangle::getFaceGeometry(size_t e) const -{ - std::vector conn(2); - switch(e) - { - case 0: - conn[0] = AbstractGeom::getConnectivity ()[0]; - conn[1] = AbstractGeom::getConnectivity ()[1]; - break; - - case 1: - conn[0] = AbstractGeom::getConnectivity ()[1]; - conn[1] = AbstractGeom::getConnectivity ()[2]; - break; - - case 2: - conn[0] = AbstractGeom::getConnectivity ()[2]; - conn[1] = AbstractGeom::getConnectivity ()[0]; - break; - - default: - return 0; - } - - return new Segment (AbstractGeom::getGlobalCoordVec (), conn); -} - -template -double Triangle:: getInRadius(void) const { - double a,b,c,s; - a = b = c = s = 0.0; - for(size_t i=0; i::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i))* - (AbstractGeom::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i)) ; - b += (AbstractGeom::getCoordinate(2,i) - AbstractGeom::getCoordinate(1,i))* - (AbstractGeom::getCoordinate(2,i) - AbstractGeom::getCoordinate(1,i)) ; - c += (AbstractGeom::getCoordinate(0,i) - AbstractGeom::getCoordinate(2,i))* - (AbstractGeom::getCoordinate(0,i) - AbstractGeom::getCoordinate(2,i)) ; - } - a = sqrt(a); - b = sqrt(b); - c = sqrt(c); - s = (a + b + c)/2.0; - return(2.0*sqrt(s*(s-a)*(s-b)*(s-c))/(a+b+c)); -} - - -template -double Triangle:: getOutRadius(void) const { - double a,b,c; - a = b = c = 0.0; - for(size_t i=0; i::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i))* - (AbstractGeom::getCoordinate(1,i) - AbstractGeom::getCoordinate(0,i)) ; - b += (AbstractGeom::getCoordinate(2,i) - AbstractGeom::getCoordinate(1,i))* - (AbstractGeom::getCoordinate(2,i) - AbstractGeom::getCoordinate(1,i)) ; - c += (AbstractGeom::getCoordinate(0,i) - AbstractGeom::getCoordinate(2,i))* - (AbstractGeom::getCoordinate(0,i) - AbstractGeom::getCoordinate(2,i)) ; - } - a = sqrt(a); - b = sqrt(b); - c = sqrt(c); - return(a*b*c/sqrt((a+b+c)*(b+c-a)*(c+a-b)*(a+b-c))); -} - -template -void Triangle::computeNormal(size_t e, std::vector &VNormal) const { - double NodalCoord[4]; - - size_t n[2]; - double v[2]; - - n[0] = SegmentNodes[e*2]; - n[1] = SegmentNodes[e*2+1]; - - Triangle::map(&Triangle::ParamCoord[2*n[0]], NodalCoord ); - Triangle::map(&Triangle::ParamCoord[2*n[1]], NodalCoord+2); - - v[0] = NodalCoord[2]-NodalCoord[0]; - v[1] = NodalCoord[3]-NodalCoord[1]; - - double norm = sqrt(v[0]*v[0]+v[1]*v[1]); - - if(norm<=0) { - std::cerr << - "The normal cannot be computed. Two vertices of a polytope seem to coincide\n"; - } - - VNormal.push_back( v[1]/norm); - VNormal.push_back(-v[0]/norm); -} - -/** - * computes the center of the in-circle of a triangle - * by computing the point of intersection of bisectors of the - * sides, which are perpendicular to the sides - */ -template -void Triangle::computeCenter (std::vector& center) const { - - double x1 = AbstractGeom::getCoordinate (0, 0); // node 0, x coord - double y1 = AbstractGeom::getCoordinate (0, 1); // node 0, y coord - - double x2 = AbstractGeom::getCoordinate (1, 0); // node 0, y coord - double y2 = AbstractGeom::getCoordinate (1, 1); // node 0, y coord - - - double x3 = AbstractGeom::getCoordinate (2, 0); // node 0, y coord - double y3 = AbstractGeom::getCoordinate (2, 1); // node 0, y coord - - - // check if the slope of some side will come out to inf - // and swap with third side - if (fabs(x2 - x1) < TOLERANCE) { // almost zero - std::swap (x2, x3); - std::swap (y2, y3); - } - - if (fabs(x3 - x2) < TOLERANCE) { - std::swap (x1, x2); - std::swap (y1, y2); - } - - - // mid points of the sides - double xb1 = midpoint(x1, x2); - double yb1 = midpoint(y1, y2); - - double xb2 = midpoint(x2, x3); - double yb2 = midpoint(y2, y3); - - double xb3 = midpoint(x3, x1); - double yb3 = midpoint(y3, y1); - - // slopes of all sides - double m1 = (y2 - y1) / (x2 - x1); - double m2 = (y3 - y2) / (x3 - x2); - double m3 = (y3 - y1) / (x3 - x1); - - // solve simultaneous equations for first 2 bisectors - double cy = (xb2 - xb1 + m2 * yb2 - m1 * yb1) / (m2 - m1); - double cx = (m2 * xb1 - m1 * xb2 + m2 * m1 * yb1 - m2 * m1 * yb2) / (m2 - m1); - - // check against the third bisector - if (fabs(x3-x1) > 0) { // checks if m3 == inf - assert(fabs((cx + m3 * cy) - (xb3 + m3 * yb3)) < 1e-9); - } - - // output the computed values - center[0] = cx; - center[1] = cy; -} - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libGeom/test/testSegment.cpp b/maxflow/galois/apps/avi/libElm/libGeom/test/testSegment.cpp deleted file mode 100644 index 4c0eaf8..0000000 --- a/maxflow/galois/apps/avi/libElm/libGeom/test/testSegment.cpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * testSegment.cpp - * DG++ - * - * Created by Adrian Lew on 10/8/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include -#include -#include - -#include "Segment.h" - - -int main() -{ - std::vector dummycoordinates(4); - - // Fill-in the dummy global array - dummycoordinates[0] = 0; - dummycoordinates[1] = 0; - - dummycoordinates[2] = 0.5; - dummycoordinates[3] = 0.3; - - - std::vector conn(2); - conn[0] = 0; - conn[1] = 1; - Segment<2> MySegment(dummycoordinates, conn); - - std::cout << "Number of vertices: " << MySegment.getNumVertices() << " should be 2\n"; - std::cout << "ParametricDimension: " << MySegment.getParametricDimension() << " should be 1\n"; - std::cout << "EmbeddingDimension: " << MySegment.getEmbeddingDimension() << " should be 2\n"; - - srand(time(NULL)); - - double X[2]; - X[0] = double(rand())/double(RAND_MAX); //It may be outside the segment - - if(MySegment.consistencyTest(X,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - // Test virtual mechanism and copy and clone constructors - ElementGeometry *MyElmGeo = &MySegment; - - std::cout << "Testing virtual mechanism: "; - std::cout << "Polytope name: " << MyElmGeo->getPolytopeName() << " should be SEGMENT\n"; - - const std::vector &Conn = MyElmGeo->getConnectivity(); - std::cout << "Connectivity: " << Conn[0] << " " << Conn[1] << " should be 1 2\n"; - - ElementGeometry *MyElmGeoCloned = MySegment.clone(); - std::cout << "Testing cloning mechanism: "; - std::cout << "Polytope name: " << MyElmGeoCloned->getPolytopeName() << " should be SEGMENT\n"; - const std::vector &Conn2 = MyElmGeoCloned->getConnectivity(); - std::cout << "Connectivity: " << Conn2[0] << " " << Conn2[1] << " should be 1 2\n"; - - - - std::cout << "Test Segment in 3D\n"; - - std::vector dummycoordinates3(6); - - // Fill-in the dummy global array - dummycoordinates3[0] = 0; - dummycoordinates3[1] = 0; - dummycoordinates3[2] = 0; - - dummycoordinates3[3] = 0.5; - dummycoordinates3[4] = 0.3; - dummycoordinates3[5] = 1; - - - Segment<3> MySegment3(dummycoordinates3, conn); - - std::cout << "Number of vertices: " << MySegment3.getNumVertices() << " should be 2\n"; - std::cout << "ParametricDimension: " << MySegment3.getParametricDimension() << " should be 1\n"; - std::cout << "EmbeddingDimension: " << MySegment3.getEmbeddingDimension() << " should be 3\n"; - - srand(time(NULL)); - - X[0] = double(rand())/double(RAND_MAX); // It may be outside the segment - - if(MySegment3.consistencyTest(X,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - - - return 1; - -} diff --git a/maxflow/galois/apps/avi/libElm/libGeom/test/testTetrahedron.cpp b/maxflow/galois/apps/avi/libElm/libGeom/test/testTetrahedron.cpp deleted file mode 100644 index c7ac1f5..0000000 --- a/maxflow/galois/apps/avi/libElm/libGeom/test/testTetrahedron.cpp +++ /dev/null @@ -1,82 +0,0 @@ -// Sriramajayam - -// Purpose : To check class Tetrahedron. - -#include -#include -#include "Tetrahedron.h" - -int main() -{ - - double coord[] = {2,0,0, - 0,2,0, - 0,0,0, - 0,0,2}; - - std::vector dummycoordinates(coord, coord+12); - - - int c[] = {0, 1, 2, 3}; - std::vector conn(c, c+4); - Tetrahedron MyTet(dummycoordinates, conn); - - - std::cout << "\nNumber of vertices: " << MyTet.getNumVertices(); - - std::cout << "\nParametricDimension: " << MyTet.getParametricDimension(); - - std::cout << "\nEmbeddingDimension: " << MyTet.getEmbeddingDimension(); - - std::cout<<"\n"; - const double X[3] = {0.25, 0.25, 0.25}; // Barycentric coordinates. - - if(MyTet.consistencyTest(X,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - double DY[9], Jac; - MyTet.dMap(X, DY, Jac); - std::cout<<"\n Jacobian: "<getNumVertices(); - - std::cout << "\nParametricDimension: " << face->getParametricDimension(); - - std::cout << "\nEmbeddingDimension: " << face->getEmbeddingDimension(); - - std::cout << "\nConnectivity: " << face->getConnectivity()[0] << " " - << face->getConnectivity()[1] << " "<getConnectivity()[2] - <<" should be 3 1 4\n"; - - std::cout << "\nIn Radius: " << MyTet.getInRadius() << "\nshould be 0.42264973\n"; - std::cout << "\nOut Radius: " << MyTet.getOutRadius() << "\nshould be 1.7320508075688772\n"; - std::cout << "\n"; - delete face; - - // Test virtual mechanism and copy and clone constructors - ElementGeometry *MyElmGeo = &MyTet; - - std::cout << "Testing virtual mechanism: "; - std::cout << "\nPolytope name: " << MyElmGeo->getPolytopeName() - << " should be Tetrahedron\n"; - - const std::vector &Conn = MyElmGeo->getConnectivity(); - std::cout << "\nConnectivity: " << Conn[0] << " " << Conn[1] << " " << Conn[2] - <<" "<getPolytopeName() - << " should be Tetrahedron\n"; - const std::vector &Conn2 = MyElmGeoCloned->getConnectivity(); - std::cout << "\nConnectivity: " << Conn2[0] << " " << Conn2[1] << " " - << Conn2[2] <<" "< -#include -#include -#include "Triangle.h" - - -int main() -{ - std::vector dummycoordinates(6); - - // Fill-in the dummy global array - dummycoordinates[0] = 0; - dummycoordinates[1] = 0; - - dummycoordinates[2] = 0.5; - dummycoordinates[3] = 0.3; - - dummycoordinates[4] = 0.2; - dummycoordinates[5] = 1.5; - - - GlobalNodalIndex c[] = {0, 1, 2}; - std::vector conn(c, c+3); - Triangle<2> MyTriangle(dummycoordinates, conn); - - std::cout << "Number of vertices: " << MyTriangle.getNumVertices() << " should be 3\n"; - std::cout << "ParametricDimension: " << MyTriangle.getParametricDimension() << " should be 2\n"; - std::cout << "EmbeddingDimension: " << MyTriangle.getEmbeddingDimension() << " should be 2\n"; - - srand(time(NULL)); - - double X[2]; - X[0] = double(rand())/double(RAND_MAX); - X[1] = double(rand())/double(RAND_MAX); //It may be outside the triangle - - if(MyTriangle.consistencyTest(X,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - std::cout << "\nIn Radius: " << MyTriangle.getInRadius() << "\nshould be 0.207002\n"; - std::cout << "\nOut Radius: " << MyTriangle.getOutRadius() << "\nshould be 0.790904\n"; - std::cout << "\n"; - - // Faces - ElementGeometry *face = MyTriangle.getFaceGeometry(2); - std::cout << "Number of vertices: " << face->getNumVertices() << " should be 2\n"; - std::cout << "ParametricDimension: " << face->getParametricDimension() << " should be 1\n"; - std::cout << "EmbeddingDimension: " << face->getEmbeddingDimension() << " should be 2\n"; - std::cout << "Connectivity: " << face->getConnectivity()[0] << " " - << face->getConnectivity()[1] << " should be 1 2\n"; - - - delete face; - - // Test virtual mechanism and copy and clone constructors - ElementGeometry *MyElmGeo = &MyTriangle; - - std::cout << "Testing virtual mechanism: "; - std::cout << "Polytope name: " << MyElmGeo->getPolytopeName() << " should be TRIANGLE\n"; - - const std::vector &Conn = MyElmGeo->getConnectivity(); - std::cout << "Connectivity: " << Conn[0] << " " << Conn[1] << " " << Conn[2] << " should be 1 2 3\n"; - - - ElementGeometry *MyElmGeoCloned = MyTriangle.clone(); - std::cout << "Testing cloning mechanism: "; - std::cout << "Polytope name: " << MyElmGeoCloned->getPolytopeName() << " should be TRIANGLE\n"; - const std::vector &Conn2 = MyElmGeoCloned->getConnectivity(); - std::cout << "Connectivity: " << Conn2[0] << " " << Conn2[1] << " " << Conn2[2] << " should be 1 2 3\n"; - - - - std::cout << "Test triangle in 3D\n"; - - std::vector dummycoordinates3(9); - - // Fill-in the dummy global array - dummycoordinates3[0] = 0; - dummycoordinates3[1] = 0; - dummycoordinates3[2] = 0; - - dummycoordinates3[3] = 0.5; - dummycoordinates3[4] = 0.3; - dummycoordinates3[5] = 1; - - dummycoordinates3[6] = 0.2; - dummycoordinates3[7] = 1.5; - dummycoordinates3[8] = 2; - - - Triangle<3> MyTriangle3(dummycoordinates3, conn); - - std::cout << "Number of vertices: " << MyTriangle3.getNumVertices() << " should be 3\n"; - std::cout << "ParametricDimension: " << MyTriangle3.getParametricDimension() << " should be 2\n"; - std::cout << "EmbeddingDimension: " << MyTriangle3.getEmbeddingDimension() << " should be 3\n"; - - srand(time(NULL)); - - X[0] = double(rand())/double(RAND_MAX); - X[1] = double(rand())/double(RAND_MAX); //It may be outside the triangle - - if(MyTriangle3.consistencyTest(X,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - std::cout << "\nIn Radius: " << MyTriangle3.getInRadius() << "\nshould be 0.26404\n"; - std::cout << "\nOut Radius: " << MyTriangle3.getOutRadius() << "\nshould be 1.66368\n"; - std::cout << "\n"; - - - return 1; -} diff --git a/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.cpp b/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.cpp deleted file mode 100644 index da4396b..0000000 --- a/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.cpp +++ /dev/null @@ -1,234 +0,0 @@ -/* - * Quadrature.cpp - * DG++ - * - * Created by Adrian Lew on 9/4/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include -#include "Quadrature.h" -#include - -Quadrature::Quadrature (const double * const xqdat, const double * const wqdat, const size_t NC, - const size_t NQ) : - numMapCoordinates (NC), numShapeCoordinates (NC), numQuadraturePoints (NQ) { -#if 0 - if (NQ < 0 || NC < 0) { - std::cerr << "Quadrature::Quadrature: Negative number of quadrature points or coordinates\n"; - exit (1); // Bad..., to be improved in the future with exceptions - } -#endif - - xqshape = xqmap = new double[NQ * NC]; - wq = new double[NQ]; - - for (size_t q = 0; q < NQ; q++) { - for (size_t i = 0; i < NC; i++) { - xqmap[q * NC + i] = xqdat[q * NC + i]; - } - wq[q] = wqdat[q]; - } -} - -Quadrature::Quadrature (const double * const xqdatmap, const double * const xqdatshape, - const double * const wqdat, const size_t NCmap, const size_t NCshape, const size_t NQ) : - numMapCoordinates (NCmap), numShapeCoordinates (NCshape), numQuadraturePoints (NQ) { -#if 0 - if (NQ < 0 || NCmap < 0 || NCshape < 0) { - std::cerr << "Quadrature::Quadrature: Negative number of quadrature points or coordinates\n"; - exit (1); // Bad..., to be improved in the future with exceptions - } -#endif - - xqmap = new double[NQ * NCmap]; - xqshape = new double[NQ * NCshape]; - wq = new double[NQ]; - - for (size_t q = 0; q < NQ; q++) { - for (size_t i = 0; i < NCmap; i++) { - xqmap[q * NCmap + i] = xqdatmap[q * NCmap + i]; - } - for (size_t i = 0; i < NCshape; i++) { - xqshape[q * NCshape + i] = xqdatshape[q * NCshape + i]; - } - wq[q] = wqdat[q]; - } -} - -Quadrature::Quadrature (const Quadrature &SQ) : - numMapCoordinates (SQ.numMapCoordinates),numShapeCoordinates (SQ.numShapeCoordinates), - numQuadraturePoints (SQ.numQuadraturePoints) { - xqmap = new double[numMapCoordinates * numQuadraturePoints]; - if (SQ.xqmap != SQ.xqshape) - xqshape = new double[numShapeCoordinates * numQuadraturePoints]; - else - xqshape = xqmap; - wq = new double[numQuadraturePoints]; - - for (size_t q = 0; q < numQuadraturePoints; q++) { - for (size_t i = 0; i < numMapCoordinates; i++) - xqmap[q * numMapCoordinates + i] = SQ.xqmap[q * numMapCoordinates + i]; - if (xqmap != xqshape) - for (size_t i = 0; i < numShapeCoordinates; i++) - xqshape[q * numShapeCoordinates + i] = SQ.xqshape[q * numShapeCoordinates + i]; - - wq[q] = SQ.wq[q]; - } -} - -Quadrature * Quadrature::clone () const { - return new Quadrature (*this); -} - -// Build specific quadratures - -// 3-point quadrature on Triangle (0,0), (1,0), (0,1) -const double Triangle_1::BulkCoordinates[] = - {0.6666666666666667e0,0.1666666666666667e0, - 0.1666666666666667e0,0.6666666666666667e0, - 0.1666666666666667e0,0.1666666666666667e0}; -const double Triangle_1::BulkWeights [] = {1./6.,1./6.,1./6.}; - -const double Triangle_1::FaceMapCoordinates[] = - {0.5 + 0.577350269/2., 0.5 - 0.577350269/2.}; // First barycentric coordinate in reference segment (0,1) - -const double Triangle_1::FaceOneShapeCoordinates[] = - {0.5 + 0.577350269/2., 0.5 - 0.577350269/2., - 0.5 - 0.577350269/2., 0.5 + 0.577350269/2.}; // Coordinates in the reference triangle -const double Triangle_1::FaceOneWeights [] = {1./2.,1./2.}; - -const double Triangle_1::FaceTwoShapeCoordinates[] = - {0., 0.5 + 0.577350269/2., - 0., 0.5 - 0.577350269/2.}; // Coordinates in the reference triangle -const double Triangle_1::FaceTwoWeights [] = {1./2.,1./2.}; - -const double Triangle_1::FaceThreeShapeCoordinates[] = - {0.5 - 0.577350269/2., 0., - 0.5 + 0.577350269/2., 0.}; // Coordinates in the reference triangle -const double Triangle_1::FaceThreeWeights [] = {1./2.,1./2.}; - -const Quadrature * const Triangle_1::Bulk = - new Quadrature(Triangle_1::BulkCoordinates, - Triangle_1::BulkWeights, 2, 3); -const Quadrature * const Triangle_1::FaceOne = - new Quadrature(Triangle_1::FaceMapCoordinates, - Triangle_1::FaceOneShapeCoordinates, - Triangle_1::FaceOneWeights, 1, 2, 2); -const Quadrature * const Triangle_1::FaceTwo = - new Quadrature(Triangle_1::FaceMapCoordinates, - Triangle_1::FaceTwoShapeCoordinates, - Triangle_1::FaceTwoWeights, 1, 2, 2); -const Quadrature * const Triangle_1::FaceThree = - new Quadrature(Triangle_1::FaceMapCoordinates, - Triangle_1::FaceThreeShapeCoordinates, - Triangle_1::FaceThreeWeights, 1, 2, 2); - - -// 2-point Gauss quadrature in a Segment (0,1) -const double Line_1::BulkCoordinates[] = {0.5 + 0.577350269/2., - 0.5 - 0.577350269/2.}; -const double Line_1::BulkWeights [] = {1./2.,1./2.}; - -const Quadrature * const Line_1::Bulk = - new Quadrature(Line_1::BulkCoordinates, - Line_1::BulkWeights, 1, 2); - - -// 4-point Gauss quadrature in a Tet (1,0,0), (0,1,0), (0,0,0), (0,0,1) - -const double Tet_1::BulkCoordinates[] = - {0.58541020e0, 0.13819660e0, 0.13819660e0, - 0.13819660e0, 0.58541020e0, 0.13819660e0, - 0.13819660e0, 0.13819660e0, 0.58541020e0, - 0.13819660e0, 0.13819660e0, 0.13819660e0}; - -const double Tet_1::BulkWeights [] = {1./24., - 1./24., - 1./24., - 1./24.}; - -const double Tet_1::FaceMapCoordinates[] = {2./3., 1./6., - 1./6., 2./3., - 1./6., 1./6.}; - -// Face 1 : 2-1-0. -const double Tet_1::FaceOneShapeCoordinates[] = - { 1./6., 1./6., 0., - 1./6., 2./3., 0., - 2./3., 1./6., 0.}; - -const double Tet_1::FaceOneWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 2 : 2-0-3. -const double Tet_1::FaceTwoShapeCoordinates[] = - { 1./6., 0., 1./6., - 2./3., 0., 1./6., - 1./6., 0., 2./3.}; - -const double Tet_1::FaceTwoWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 3: 2-3-1. -const double Tet_1::FaceThreeShapeCoordinates[] = - { 0., 1./6., 1./6., - 0., 1./6., 2./3., - 0., 2./3., 1./6.}; - -const double Tet_1::FaceThreeWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 4: 0-1-3. -const double Tet_1::FaceFourShapeCoordinates [] = - { 2./3., 1./6., 1./6., - 1./6., 2./3., 1./6., - 1./6., 1./6., 2./3.}; - -const double Tet_1::FaceFourWeights [] = { 1./6., 1./6., 1./6.}; - - -const Quadrature * const Tet_1::Bulk = - new Quadrature(Tet_1::BulkCoordinates, - Tet_1::BulkWeights, 3, 4); - -const Quadrature * const Tet_1::FaceOne = - new Quadrature(Tet_1::FaceMapCoordinates, - Tet_1::FaceOneShapeCoordinates, - Tet_1::FaceOneWeights, 2, 3, 3); - -const Quadrature * const Tet_1::FaceTwo = - new Quadrature(Tet_1::FaceMapCoordinates, - Tet_1::FaceTwoShapeCoordinates, - Tet_1::FaceTwoWeights, 2, 3, 3); - -const Quadrature * const Tet_1::FaceThree = - new Quadrature(Tet_1::FaceMapCoordinates, - Tet_1::FaceThreeShapeCoordinates, - Tet_1::FaceThreeWeights, 2, 3, 3); - -const Quadrature * const Tet_1::FaceFour = - new Quadrature(Tet_1::FaceMapCoordinates, - Tet_1::FaceFourShapeCoordinates, - Tet_1::FaceFourWeights, 2, 3, 3); - diff --git a/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.h b/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.h deleted file mode 100644 index 8c6057d..0000000 --- a/maxflow/galois/apps/avi/libElm/libQuad/Quadrature.h +++ /dev/null @@ -1,254 +0,0 @@ -/** - * Quadrature.h - * DG++ - * - * Created by Adrian Lew on 9/4/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef QUADRATURE -#define QUADRATURE -#include - -/** - \brief - base class for any quadrature rule - - A quadrature rule provides and approximation of the integral over a domain - \f$\Omega\f$, and it has:\n - 1) A number of quadrature point coordinates in \f$\Omega\f$. (map) - 2) Weights at each quadrature point - 3) A second set of coordinates for the same quadrature points. (Shape) - - The number of coordinates in (1) of each quadrature point should be unisolvent, i.e., - each coordinate can be varied independently. The reason for this choice is - that otherwise we need a way to convey the constraint between coordinates - when functions of these coordinates and their derivatives are considered. For - example, the mapping from the parametric configuration to the real space - in finite elements. - - In addition to the unisolvent set of coordinates above, the class also allows for - an alternative set of coordinates for each gauss point. These do not need to be - unisolvent, and can be used to localize the quadrature points when embedded in - a higher dimensional space. For example, if \f$\Omega\f$ is a triangle in 3D, then - the second set of coordinates provide the coordinates of these points in 3D. - - A specific example is the following: consider a triangle in 2D, and - the integration over a segment on its boundary. The quadrature - object for a segment should have only one (map) coordinate in (1) - to be unisolvent. This coordinate is one of the barycentric - coordinates of the quadrature point over the segment. However, if - we have a function over the triangle and would like to restrict its - values to the segment, we need the coordinates of the quadrature - points in the triangle. This is the case when integrating a shape - function in the triangle over the segment. This second set of - coordinates is given in (3). - - Based on this example we shall call the coordinates in (1) map coordinates, and the - coordinates in (3) Shape coordinates. - - Although the coordinates in (3) are not a natural inclusion in a quadrature rule, - they provide the simplest implementation for the scenario just described. - - A number of specific Quadrature objects are defined in - Quadrature.cpp, and declared here. - -*/ - -class Quadrature -{ - public: - //! @param NQ number of quadrature points - //! @param NC number of coordinates for each point - //! @param xqdat vector with coordinates of the quadrature points - //! xqdat[a*NC+i] gives the i-th coordinate of quadrature point a - //! @param wqdat vector with quadrature point weights - //! wq[a] is the weight of quadrature point a - //! This constructor sets the two sets of coordinates for the quadrature points to - //! be the same. - Quadrature(const double * const xqdat, - const double * const wqdat, - const size_t NC, - const size_t NQ); - //! @param NQ number of quadrature points - //! @param NCmap number of map coordinates for each point - //! @param NCshape number of shape coordinates for each point - //! @param xqdatmap vector with coordinates of the quadrature points - //! xqdatmap[a*NC+i] gives the i-th coordinate of quadrature point a - //! @param xqdatshape vector with coordinates of the quadrature points - //! xqdatshape[a*NC+i] gives the i-th coordinate of quadrature point a - //! @param wqdat vector with quadrature point weights - //! wq[a] is the weight of quadrature point a - Quadrature(const double * const xqdatmap, - const double * const xqdatshape, - const double * const wqdat, - const size_t NCmap, - const size_t NCshape, - const size_t NQ); - inline virtual ~Quadrature() { - if(xqmap!=xqshape) { - delete[] xqshape; xqshape = NULL; - } - delete[] xqmap; xqmap = NULL; - delete[] wq; wq = NULL; - } - - Quadrature(const Quadrature &); - Quadrature * clone() const; - - // Accessors/Mutators - inline size_t getNumQuadraturePoints() const { return numQuadraturePoints; } - - //! Returns the number of map coordinates - inline size_t getNumCoordinates() const { return numMapCoordinates; } - - //! Returns the number of shape coordinates - inline size_t getNumShapeCoordinates() const { return numShapeCoordinates; } - - //! Return map coordinates of quadrature point q - inline const double * getQuadraturePoint(size_t q) const - { return xqmap+q*numMapCoordinates; } - - //! Return shape coordinates of quadrature point q - inline const double * getQuadraturePointShape(size_t q) const { return xqshape+q*numShapeCoordinates; } - - //! Returns weight of quadrature point q - inline double getQuadratureWeights(size_t q) const { return wq[q]; } - - private: - double * xqmap; - double * xqshape; - double * wq; - size_t numMapCoordinates; - size_t numShapeCoordinates; - size_t numQuadraturePoints; -}; - - - -/** - \brief SpecificQuadratures: class used just to qualify all specific - quadrature objects used to build the quadrature rules. - */ -class SpecificQuadratures {}; - -/** - \brief 3-point Gauss quadrature coordinates in the triangle (0,0), (1,0), (0,1), and its traces. Barycentric coordinates used for the Gauss points. - */ - -class Triangle_1: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature * const Bulk; - - //! Face (1,2) quadrature - static const Quadrature * const FaceOne; - //! Face (2,3) quadrature - static const Quadrature * const FaceTwo; - //! Face (3,1) quadrature - static const Quadrature * const FaceThree; - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; - static const double FaceMapCoordinates[]; - static const double FaceOneShapeCoordinates[]; - static const double FaceOneWeights[]; - static const double FaceTwoShapeCoordinates[]; - static const double FaceTwoWeights[]; - static const double FaceThreeShapeCoordinates[]; - static const double FaceThreeWeights[]; -}; - - - - -/** - \brief 2-point Gauss quadrature coordinates in the segment (0,1). - Barycentric coordinates used for the Gauss points. - */ -class Line_1: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature * const Bulk; - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; -}; - - - - -/*! - * \brief Class for 4 point quadrature rules for tetrahedra. - * - * 4-point Gauss quadrature coordinates in the tetrahedron with - * 0(1,0,0), 1(0,1,0), 2(0,0,0), 3(0,0,1) as vertices. - * Barycentric coordinates are used for the Gauss points. - * Barycentric coordinates are specified with respect to vertices 1,2 and 4 - * in that order. Coordinate of vertex 3 is not independent. - * - * Quadrature for Faces: - * Faces are ordered as - - * Face 1: 2-1-0, - * Face 2: 2-0-3, - * Face 3: 2-3-1, - * Face 4: 0-1-3. - * - * \todo Need to include a test for this quadrature - */ - -class Tet_1: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature * const Bulk; - - //! Face (2-1-0) quadrature - static const Quadrature * const FaceOne; - //! Face (2-0-3) quadrature - static const Quadrature * const FaceTwo; - //! Face (2-3-1) quadrature - static const Quadrature * const FaceThree; - //! Face (0-1-3) quadrature - static const Quadrature * const FaceFour; - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; - static const double FaceMapCoordinates[]; - static const double FaceOneShapeCoordinates[]; - static const double FaceOneWeights[]; - static const double FaceTwoShapeCoordinates[]; - static const double FaceTwoWeights[]; - static const double FaceThreeShapeCoordinates[]; - static const double FaceThreeWeights[]; - static const double FaceFourShapeCoordinates[]; - static const double FaceFourWeights[]; -}; - - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libQuad/test/testSimpleQuadrature.cpp b/maxflow/galois/apps/avi/libElm/libQuad/test/testSimpleQuadrature.cpp deleted file mode 100644 index 4881ca1..0000000 --- a/maxflow/galois/apps/avi/libElm/libQuad/test/testSimpleQuadrature.cpp +++ /dev/null @@ -1,198 +0,0 @@ -/* - * testSimpleQuadrature.cpp - * DG++ - * - * Created by Adrian Lew on 9/7/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Quadrature.h" -#include - -int main() -{ - std::cout << Triangle_1::Bulk->getNumQuadraturePoints() << " should be " << 3 << "\n"; - std::cout << Triangle_1::Bulk->getNumShapeCoordinates() << " should be " << 2 << "\n\n"; - std::cout << Triangle_1::Bulk->getNumCoordinates() << " should be " << 2 << "\n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - { - for(int i=0; igetNumCoordinates(); i++) - std::cout << Triangle_1::Bulk->getQuadraturePoint(q)[i] << " "; - std::cout << "\n"; - } - std::cout << "should read \n" - "0.666667 0.166667 \n" - "0.166667 0.666667 \n" - "0.166667 0.166667 \n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - { - for(int i=0; igetNumShapeCoordinates(); i++) - std::cout << Triangle_1::Bulk->getQuadraturePointShape(q)[i] << " "; - std::cout << "\n"; - } - std::cout << "should read \n" - "0.666667 0.166667 \n" - "0.166667 0.666667 \n" - "0.166667 0.166667 \n\n"; - - - for(int q=0; qgetNumQuadraturePoints(); q++) - std::cout << Triangle_1::Bulk->getQuadratureWeights(q) << " "; - std::cout << "\n"; - - std::cout << "should read \n" - "0.166667 0.166667 0.166667\n\n"; - - std::cout << "\n Copy Constructor\n"; - - Quadrature GaussCopy(*Triangle_1::Bulk); - std::cout << GaussCopy.getNumQuadraturePoints() << " should be " << 3 << "\n"; - std::cout << GaussCopy.getNumShapeCoordinates() << " should be " << 2 << "\n\n"; - std::cout << GaussCopy.getNumCoordinates() << " should be " << 2 << "\n\n"; - for(int q=0; qclone(); - std::cout << GaussClone->getNumQuadraturePoints() << " should be " << 3 << "\n"; - std::cout << GaussClone->getNumCoordinates() << " should be " << 2 << "\n\n"; - for(int q=0; qgetNumQuadraturePoints(); q++) - { - for(int i=0; igetNumCoordinates(); i++) - std::cout << GaussClone->getQuadraturePoint(q)[i] << " "; - std::cout << "\n"; - } - std::cout << "should read \n" - "0.666667 0.166667 \n" - "0.166667 0.666667 \n" - "0.166667 0.166667 \n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - std::cout << GaussClone->getQuadratureWeights(q) << " "; - std::cout << "\n"; - - std::cout << "should read \n " - "0.166667 0.166667 0.166667\n\n"; - - - std::cout << Triangle_1::FaceOne->getNumQuadraturePoints() << " should be " << 2 << "\n"; - std::cout << Triangle_1::FaceOne->getNumCoordinates() << " should be " << 1 << "\n\n"; - std::cout << Triangle_1::FaceOne->getNumShapeCoordinates() << " should be " << 2 << "\n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - { - for(int i=0; igetNumCoordinates(); i++) - std::cout << Triangle_1::FaceOne->getQuadraturePoint(q)[i] << " "; - std::cout << "\n"; - } - std::cout << "should read \n" - "0.788675\n" - "0.211325\n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - { - for(int i=0; igetNumShapeCoordinates(); i++) - std::cout << Triangle_1::FaceOne->getQuadraturePointShape(q)[i] << " "; - std::cout << "\n"; - } - std::cout << "should read \n" - "0.788675 0.211325\n" - "0.211325 0.788675\n\n"; - - for(int q=0; qgetNumQuadraturePoints(); q++) - std::cout << Triangle_1::FaceOne->getQuadratureWeights(q) << " "; - std::cout << "\n"; - - std::cout << "should read \n" - "0.5 0.5\n\n"; - - - std::cout << "Test copy constructor once more\n\n"; - - Quadrature NewTriangleFace(*Triangle_1::FaceOne); - - std::cout << NewTriangleFace.getNumQuadraturePoints() << " should be " << 2 << "\n"; - std::cout << NewTriangleFace.getNumCoordinates() << " should be " << 1 << "\n\n"; - std::cout << NewTriangleFace.getNumShapeCoordinates() << " should be " << 2 << "\n\n"; - - for(int q=0; q -class Linear: public Shape { -public: - //! Constructor \n - //! \param iMap Shuffle of the barycentric coordinates. iMap[a] returns the position of the original - //! a-th barycentric coordinate after shuffling. - //! If not provided, an identity mapping is assumed iMap[a]=a - - //! \warning No way to know if iMap has the proper length. - Linear (const size_t * iMap = 0); - - inline virtual ~Linear () {} - - Linear (const Linear &); - - virtual inline Linear * clone () const { - return new Linear (*this); - } - - // Accessors/Mutators - inline size_t getNumFunctions () const { - return SPD + 1; - } - inline size_t getNumVariables () const { - return SPD; - } - - // Functionality - - //! @param a shape function number - //! @param x first SPD barycentric coordinates of the point - //! \warning Does not check range for parameter a - double getVal (size_t a, const double *x) const; - //! @param a shape function number - //! @param x first spartial_dimension barycentric coordinates of the point - //! @param i partial derivative number - //! Returns derivative with respect to the barycentric coordinates - //! \warning Does not check range for parameters a and i - double getDVal (size_t a, const double *x, size_t i) const; - -private: - size_t bctMap[SPD + 1]; -}; - -template -Linear::Linear (const size_t * iMap) { - for (size_t a = 0; a < SPD + 1; a++) - bctMap[a] = a; - - if (iMap != 0) { - for (size_t a = 0; a < SPD + 1; a++) { - bctMap[a] = iMap[a]; - } - } - - return; -} - -template -Linear::Linear (const Linear &Lin) { - for (size_t a = 0; a < SPD + 1; a++) { - bctMap[a] = Lin.bctMap[a]; - } -} - -template -double Linear::getVal (size_t a, const double *x) const { - if (bctMap[a] != SPD) { - return x[bctMap[a]]; - } - else { - double va = 0; - - for (size_t k = 0; k < SPD; k++) { - va += x[k]; - } - - return 1 - va; - } -} - -template -double Linear::getDVal (size_t a, const double *x, size_t i) const { - if (bctMap[a] != SPD) { - return bctMap[a] == i ? 1 : 0; - } else { - return -1; - } -} - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libShape/Shape.cpp b/maxflow/galois/apps/avi/libElm/libShape/Shape.cpp deleted file mode 100644 index 8361d4b..0000000 --- a/maxflow/galois/apps/avi/libElm/libShape/Shape.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Shape.cpp - * DG++ - * - * Created by Adrian Lew on 9/7/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Shape.h" -#include -#include - -bool Shape::consistencyTest (const double * X, const double Pert) const { - double *DValNum = new double[getNumFunctions () * getNumVariables ()]; - double *DValAnal = new double[getNumFunctions () * getNumVariables ()]; - double *Xpert = new double[getNumVariables ()]; - double *Valplus = new double[getNumFunctions ()]; - double *Valminus = new double[getNumFunctions ()]; - - if (Pert <= 0) - std::cerr << "Shape::ConsistencyTest - Pert cannot be less or equal than zero\n"; - - for (size_t i = 0; i < getNumVariables (); i++) { - Xpert[i] = X[i]; - for (size_t a = 0; a < getNumFunctions (); a++) - DValAnal[a * getNumVariables () + i] = getDVal (a, X, i); - } - - for (size_t i = 0; i < getNumVariables (); i++) { - Xpert[i] = X[i] + Pert; - for (size_t a = 0; a < getNumFunctions (); a++) - Valplus[a] = getVal (a, Xpert); - - Xpert[i] = X[i] - Pert; - for (size_t a = 0; a < getNumFunctions (); a++) - Valminus[a] = getVal (a, Xpert); - - Xpert[i] = X[i]; - - for (size_t a = 0; a < getNumFunctions (); a++) - DValNum[a * getNumVariables () + i] = (Valplus[a] - Valminus[a]) / (2 * Pert); - } - - double error = 0; - double normX = 0; - double normDValNum = 0; - double normDValAnal = 0; - - for (size_t i = 0; i < getNumVariables (); i++) { - normX += X[i] * X[i]; - - for (size_t a = 0; a < getNumFunctions (); a++) { - error += pow (DValAnal[a * getNumVariables () + i] - DValNum[a * getNumVariables () + i], 2.); - normDValAnal += pow (DValAnal[a * getNumVariables () + i], 2.); - normDValNum += pow (DValNum[a * getNumVariables () + i], 2.); - } - } - error = sqrt (error); - normX = sqrt (normX); - normDValAnal = sqrt (normDValAnal); - normDValNum = sqrt (normDValNum); - - delete[] Valplus; - delete[] Valminus; - delete[] Xpert; - delete[] DValNum; - delete[] DValAnal; - - if (error * (normX + Pert) < (normDValAnal < normDValNum ? normDValNum : normDValAnal) * Pert * 10) { - return true; - } else { - return false; - } -} - diff --git a/maxflow/galois/apps/avi/libElm/libShape/Shape.h b/maxflow/galois/apps/avi/libElm/libShape/Shape.h deleted file mode 100644 index 9965f98..0000000 --- a/maxflow/galois/apps/avi/libElm/libShape/Shape.h +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Shape.h - * DG++ - * - * Created by Adrian Lew on 9/4/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef SHAPE -#define SHAPE - -#include - -/** - \brief base class for any set of basis (or shape) functions and its - first derivatives - - A set of basis functions permits the evaluation of any of the functions - in the basis at any point - - Notice that two Shape classes differ if they span the same space but - have different bases. - */ - -class Shape { -public: - inline Shape () {} - - inline virtual ~Shape () {} - - inline Shape (const Shape &) {} - - virtual Shape * clone () const = 0; - - // Accessors/Mutators - virtual size_t getNumFunctions () const = 0; - virtual size_t getNumVariables () const = 0; //!< Number of arguments of the functions - - //! Value of shape \f$N_a(x)\f$ - //! @param a node id - //! @param x coordinates of the point x - //! - //! We have purposedly left the type of coordinates the point should have unspecified, for flexibility. - //! Barycentric and Cartesian coordinates are adopted thoughout the code. - //! - //! \todo It'd be nice to have some form of Coordinate object, which may derive Barycentric and Cartesian - //! coordinates, and that would guarantee that the argument to each function is always the correct one. - - virtual double getVal (size_t a, const double *x) const = 0; - - //! Value of \f$\frac{\partial N_a}{\partial x_i}(x)\f$ - //! @param a node id - //! @param x coordinates of the point a - //! @param i coordinate number - //! - //! We have purposedly left the type of coordinates the point should have unspecified, for flexibility. - //! Barycentric and Cartesian coordinates are adopted thoughout the code. - virtual double getDVal (size_t a, const double *x, size_t i) const = 0; - - //! Consistency test for getVal and getDVal - //! @param x coordinates of the point at which to test - //! @param Pert size of the perturbation with which to compute numerical - //! derivatives (x->x+Pert) - bool consistencyTest (const double *x, const double Pert) const; -}; - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libShape/test/testLinear.cpp b/maxflow/galois/apps/avi/libElm/libShape/test/testLinear.cpp deleted file mode 100644 index aa789bd..0000000 --- a/maxflow/galois/apps/avi/libElm/libShape/test/testLinear.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * testLinear.cpp - * DG++ - * - * Created by Adrian Lew on 9/9/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "Linear.h" -#include - -int main() -{ - /* 2D test */ - double coord2[] = {0.1,0.8}; - - Linear<2> Linear2D; - - std::cout << Linear2D.getNumberOfFunctions() << " should be " << 3 << "\n"; - std::cout << Linear2D.getNumberOfVariables() << " should be " << 2 << "\n"; - - if(Linear2D.consistencyTest(coord2,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - - - std::cout << "Copy Constructor\n"; - - Linear<2> Linear2DCopy(Linear2D); - double flag = 0; - - for(int a=0; agetVal(a,coord2) != Linear2D.Val(a,coord2)) - flag = 1; - if(flag) - std::cout << "Cloning failed" << "\n"; - else - std::cout << "Cloning successful" << "\n"; - - /* 3D test */ - std::cout << "3D test\n"; - - double coord3[] = {1.2, 0.1, -0.4}; - - Linear<3> Linear3D; - - std::cout << Linear3D.getNumberOfFunctions() << " should be " << 4 << "\n"; - std::cout << Linear3D.getNumberOfVariables() << " should be " << 3 << "\n\n"; - - if(Linear3D.consistencyTest(coord3,1.e-6)) - std::cout << "Consistency test successful" << "\n"; - else - std::cout << "Consistency test failed" << "\n"; - -} diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.cpp b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.cpp deleted file mode 100644 index 3b3285c..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.cpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * BasisFunctions.h - * DG++ - * - * Created by Adrian Lew on 10/21/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "BasisFunctions.h" -#include "BasisFunctionsProvided.h" - -std::vector EmptyBasisFunctions::ZeroSizeVector; -const std::vector BasisFunctionsProvidedExternalQuad::ZeroSizeVector; diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.h b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.h deleted file mode 100644 index 614e957..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctions.h +++ /dev/null @@ -1,136 +0,0 @@ -/** - * BasisFunctions.h - * DG++ - * - * Created by Adrian Lew on 10/21/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BASISFUNCTIONS -#define BASISFUNCTIONS - -#include "Shape.h" -#include "Quadrature.h" -#include "ElementGeometry.h" - - - -/** - \brief - BasisFunctions: Evaluation of basis functions and derivatives at - the quadrature points. Abstract class. - - A BasisFunctions object consists of:\n - 1) A set of quadrature points with quadrature weights\n - 2) A set of basis functions and their derivatives evaluated at these points\n - - - \todo So far the number of spatial dimensions, needed to transverse - the getDShapes and getQuadraturePointArrays is not provided, but should be - obtained from the ElementGeometry where these basis functions are in. One - way in which this may be computed is using the fact that - getQuadraturePointCoordinates().size()/getIntegrationWeights().size() = - getDShapes().size()/getShapes().size() = spatial dimensions - -*/ - -class BasisFunctions -{ - public: - inline BasisFunctions() {} - inline virtual ~BasisFunctions(){} - inline BasisFunctions(const BasisFunctions &) {} - virtual BasisFunctions * clone() const = 0; - - //! Shape functions at quadrature points - //! getShapes()[q*getBasisDimension()+a] - //! gives the value of shape function a at quadrature point q - //! - //! getShapes returns an empty vector if no shape functions are available - virtual const std::vector & getShapes() const = 0; - - //! Derivatives of shape functions at quadrature points - //! getDShapes()[q*getBasisDimension()*getNumberOfDerivativesPerFunction()+ - //! +a*getNumberOfDerivativesPerFunction()+i] gives the - //! derivative in the i-th direction of degree of freedom a at quadrature point q - //! - //! getDShapes returns an empty vector if no derivatives are - //! available - virtual const std::vector & getDShapes() const = 0; - - //! @return vector of integration weights - virtual const std::vector & getIntegrationWeights() const = 0; //!< Integration weights - - //! Coordinates of quadrature points in the real configuration - //! getQuadraturePointCoordinates() - //! q*ElementGeometry::getEmbeddingDimension()+i] - //! returns the i-th coordinate in real space of quadrature point q - virtual const std::vector & getQuadraturePointCoordinates() const = 0; - - //! returns the number of shape functions provided - virtual size_t getBasisDimension() const = 0; - - //! returns the number of directional derivative for each shape function - virtual size_t getNumberOfDerivativesPerFunction() const = 0; - - //! returns the number of number of coordinates for each Gauss point - virtual size_t getSpatialDimensions() const = 0; -}; - - - - -/** - \brief dummy set with no basis functions. - - This class contains only static data and has the mission of providing - a BasisFunctions object that has no basis functions in it. - - This becomes useful, for example, as a cheap way of providing Element - with a BasisFunction object that can be returned but that occupies no memory. - Since Element has to be able to have a BasisFunction object per field, by utilizing - this object there is no need to construct an odd order for the fields in order to - save memory. - - */ -class EmptyBasisFunctions: public BasisFunctions -{ - public: - inline EmptyBasisFunctions() {} - inline virtual ~EmptyBasisFunctions(){} - inline EmptyBasisFunctions(const EmptyBasisFunctions &) {} - virtual EmptyBasisFunctions * clone() const { return new EmptyBasisFunctions(*this); } - - const std::vector & getShapes() const { return ZeroSizeVector; } - const std::vector & getDShapes() const { return ZeroSizeVector; } - const std::vector & getIntegrationWeights() const { return ZeroSizeVector; } - const std::vector & getQuadraturePointCoordinates() const { return ZeroSizeVector; } - size_t getBasisDimension() const { return 0; } - size_t getNumberOfDerivativesPerFunction() const { return 0; } - size_t getSpatialDimensions() const { return 0; } - - private: - static std::vector ZeroSizeVector; -}; - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctionsProvided.h b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctionsProvided.h deleted file mode 100644 index ad84924..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/BasisFunctionsProvided.h +++ /dev/null @@ -1,225 +0,0 @@ -/** - * BasisFunctionsProvided.h - * DG++ - * - * Created by Adrian Lew on 10/21/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef BASISFUNCTIONSPROVIDED -#define BASISFUNCTIONSPROVIDED - -#include "Shape.h" -#include "Quadrature.h" -#include "ElementGeometry.h" -#include "BasisFunctions.h" -#include - -/** - \brief - BasisFunctionsProvidedExternalQuad: set of basis functions and derivatives at - the quadrature points provided directly at construction. The quadrature points are - referenced externally, they are not kept as a copy inside the object. - */ - -class BasisFunctionsProvidedExternalQuad: public BasisFunctions { -public: - //! Constructor - //! - //! In the following,\n - //! NQuad = QuadratureWeights.size(), the total number of quadrature points\n - //! NShapes = ShapesInput.size()/QuadratureWeights.size(), total number of shape - //! functions provided\n - //! spd = QuadratureCoords().size/QuadratureWeights.size(), number of spatial dimensions - //! - //! @param ShapesInput: Values of each shape function at each quadrature point.\n - //! ShapesInput[ q*NShapes + a] = value of shape function "a" at quadrature point "q" - //! @param DShapesInput: Values of each shape function derivative at each quadrature point.\n - //! DShapesInput[ q*NShapes*spd + a*spd + i] = value of shape function "a" derivative in the i-th - //! direction at quadrature point "q" - //! @param QuadratureWeights: QuadratureWeights[q] contains the value of the quadrature weight at quad point "q" - //! @param QuadratureCoords: QuadratureCoords[q*spd+i] contains the i-th coordinate of the position of quadrature - //! point "q" - //! - //! If not derivatives of shape functions are available, just provide and empty vector as - //! DShapesInput - inline BasisFunctionsProvidedExternalQuad (const std::vector &ShapesInput, - const std::vector &DShapesInput, const std::vector &QuadratureWeights, - const std::vector &QuadratureCoords) : - LocalShapes (ShapesInput), LocalDShapes (DShapesInput), - LocalWeights (QuadratureWeights), LocalCoordinates (QuadratureCoords) { - - - // Check that the dimensions are correct - if (LocalShapes.size () % LocalWeights.size () != 0 - || LocalCoordinates.size () % LocalWeights.size () != 0 - || LocalDShapes.size () % LocalWeights.size () != 0 - || LocalDShapes.size () % LocalShapes.size () != 0) { - std::cerr << "BasisFunctionsProvidedExternalQuad::Constructor. Error\n" - " Inconsistent length of some of the vectors provided\n"; - exit (1); - } - NumberOfShapes = LocalShapes.size () / LocalWeights.size (); - } - - //! Constructor - //! - //! In the following no shape functions are provided. An empty vector will be\n - //! place in its place. \n - //! NQuad = QuadratureWeights.size(), the total number of quadrature points\n - //! spd = QuadratureCoords().size/QuadratureWeights.size(), number of spatial dimensions - //! NDerivatives = LocalDShapes.size()/(NumberOfShapes*LocalWeights.size()) - //! - //! @param NShapes Number of shape functions for which derivatives are offered - //! @param DShapesInput: Values of each shape function derivative at each quadrature point.\n - //! DShapesInput[ q*NShapes*spd + a*spd + i] = value of shape function "a" derivative in the i-th - //! direction at quadrature point "q" - //! @param QuadratureWeights: QuadratureWeights[q] contains the value of the quadrature weight at quad point "q" - //! @param QuadratureCoords: QuadratureCoords[q*spd+i] contains the i-th coordinate of the position of quadrature - //! point "q" - inline BasisFunctionsProvidedExternalQuad (size_t NShapes, - const std::vector &DShapesInput, const std::vector &QuadratureWeights, - const std::vector &QuadratureCoords) : - LocalShapes (ZeroSizeVector), LocalDShapes (DShapesInput), NumberOfShapes (NShapes), - LocalWeights (QuadratureWeights), LocalCoordinates (QuadratureCoords) { - - // Check that the dimensions are correct - if (LocalCoordinates.size () % LocalWeights.size () != 0 || LocalDShapes.size () - % (LocalWeights.size () * NShapes) != 0 || LocalDShapes.size () % NShapes != 0) { - std::cerr << "BasisFunctionsProvidedExternalQuad::Constructor. Error\n" - " Inconsistent length of some of the vectors provided\n"; - exit (1); - } - } - - inline virtual ~BasisFunctionsProvidedExternalQuad () { - } - - inline BasisFunctionsProvidedExternalQuad (const BasisFunctionsProvidedExternalQuad &NewBas) : - LocalShapes (NewBas.LocalShapes), LocalDShapes (NewBas.LocalDShapes), - NumberOfShapes (NewBas.NumberOfShapes) , LocalWeights (NewBas.LocalWeights), LocalCoordinates (NewBas.LocalCoordinates) { - } - - virtual BasisFunctionsProvidedExternalQuad * clone () const { - return new BasisFunctionsProvidedExternalQuad (*this); - } - - //! Shape functions at quadrature points - //! getShapes()[q*Shape::getNumFunctions()+a] - //! gives the value of shape function a at quadrature point q - inline const std::vector & getShapes () const { - return LocalShapes; - } - - //! Derivatives of shape functions at quadrature points - //! getDShapes()[q*Shape::getNumFunctions()*ElementGeometry::getEmbeddingDimensions()+a*ElementGeometry::getEmbeddingDimensions()+i] - //! gives the - //! derivative in the i-th direction of degree of freedom a at quadrature point q - inline const std::vector & getDShapes () const { - return LocalDShapes; - } - - //!< Integration weights - inline const std::vector & getIntegrationWeights () const { - return LocalWeights; - } - - //! Coordinates of quadrature points in the real configuration - //! getQuadraturePointCoordinates() - //! [q*ElementGeometry::getEmbeddingDimension()+i] - //! returns the i-th coordinate in real space of quadrature point q - inline const std::vector & getQuadraturePointCoordinates () const { - return LocalCoordinates; - } - - //! returns the number of shape functions provided - inline size_t getBasisDimension () const { - return NumberOfShapes; - } - - //! returns the number of directional derivative for each shape function - inline size_t getNumberOfDerivativesPerFunction () const { - return LocalDShapes.size () / (NumberOfShapes * LocalWeights.size ()); - } - - //! returns the number of number of coordinates for each Gauss point - inline size_t getSpatialDimensions () const { - return LocalCoordinates.size () / LocalWeights.size (); - } - -private: - const std::vector& LocalShapes; - const std::vector& LocalDShapes; - size_t NumberOfShapes; - -protected: - const std::vector& LocalWeights; - const std::vector& LocalCoordinates; - static const std::vector ZeroSizeVector; -}; - -/** - \brief - BasisFunctionsProvided: set of basis functions and derivatives at - the quadrature points provided directly at construction. The quadrature points are - provided and stored inside the object - */ - -class BasisFunctionsProvided: public BasisFunctionsProvidedExternalQuad { -public: - //! Constructor - //! - //! In the following,\n - //! NQuad = QuadratureWeights.size(), the total number of quadrature points\n - //! NShapes = ShapesInput.size()/QuadratureWeights.size(), total number of shape - //! functions provided\n - //! spd = QuadratureCoords().size/QuadratureWeights.size(), number of spatial dimensions - //! - //! @param ShapesInput: Values of each shape function at each quadrature point.\n - //! ShapesInput[ q*NShapes + a] = value of shape function "a" at quadrature point "q" - //! @param DShapesInput: Values of each shape function derivative at each quadrature point.\n - //! DShapesInput[ q*NShapes*spd + a*spd + i] = value of shape function "a" derivative in the i-th - //! direction at quadrature point "q" - //! @param QuadratureWeights: QuadratureWeights[q] contains the value of the quadrature weight at quad point "q" - //! @param QuadratureCoords: QuadratureCoords[q*spd+i] contains the i-th coordinate of the position of quadrature - //! point "q" - inline BasisFunctionsProvided (const std::vector &ShapesInput, - const std::vector &DShapesInput, const std::vector &QuadratureWeights, - const std::vector &QuadratureCoords) : - BasisFunctionsProvidedExternalQuad (ShapesInput, DShapesInput, QuadratureWeights, QuadratureCoords) { - } - - inline virtual ~BasisFunctionsProvided () { - } - inline BasisFunctionsProvided (const BasisFunctionsProvided &NewBas) : - BasisFunctionsProvidedExternalQuad (NewBas) { - } - - virtual BasisFunctionsProvided * clone () const { - return new BasisFunctionsProvided (*this); - } - -}; - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.cpp b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.cpp deleted file mode 100644 index 158d4b8..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.cpp +++ /dev/null @@ -1,152 +0,0 @@ -#include "QuadratureP3.h" - - - -const double Tet_4Point::BulkCoordinates[] = - {0.58541020e0, 0.13819660e0, 0.13819660e0, - 0.13819660e0, 0.58541020e0, 0.13819660e0, - 0.13819660e0, 0.13819660e0, 0.58541020e0, - 0.13819660e0, 0.13819660e0, 0.13819660e0}; - -const double Tet_4Point::BulkWeights [] = {1./24., - 1./24., - 1./24., - 1./24.}; - -const double Tet_4Point::FaceMapCoordinates[] = {2./3., 1./6., - 1./6., 2./3., - 1./6., 1./6.}; - -// Face 1 : 2-1-0. -const double Tet_4Point::FaceOneShapeCoordinates[] = - { 1./6., 1./6., 0., - 1./6., 2./3., 0., - 2./3., 1./6., 0.}; - -const double Tet_4Point::FaceOneWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 2 : 2-0-3. -const double Tet_4Point::FaceTwoShapeCoordinates[] = - { 1./6., 0., 1./6., - 2./3., 0., 1./6., - 1./6., 0., 2./3.}; - -const double Tet_4Point::FaceTwoWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 3: 2-3-1. -const double Tet_4Point::FaceThreeShapeCoordinates[] = - { 0., 1./6., 1./6., - 0., 1./6., 2./3., - 0., 2./3., 1./6.}; - -const double Tet_4Point::FaceThreeWeights [] = { 1./6., 1./6., 1./6.}; - - -// Face 4: 0-1-3. -const double Tet_4Point::FaceFourShapeCoordinates [] = - { 2./3., 1./6., 1./6., - 1./6., 2./3., 1./6., - 1./6., 1./6., 2./3.}; - -const double Tet_4Point::FaceFourWeights [] = { 1./6., 1./6., 1./6.}; - - -const Quadrature* const Tet_4Point::Bulk = - new Quadrature(Tet_4Point::BulkCoordinates, - Tet_4Point::BulkWeights, 3, 4); - -const Quadrature* const Tet_4Point::FaceOne = - new Quadrature(Tet_4Point::FaceMapCoordinates, - Tet_4Point::FaceOneShapeCoordinates, - Tet_4Point::FaceOneWeights, 2, 3, 3); - -const Quadrature* const Tet_4Point::FaceTwo = - new Quadrature(Tet_4Point::FaceMapCoordinates, - Tet_4Point::FaceTwoShapeCoordinates, - Tet_4Point::FaceTwoWeights, 2, 3, 3); - -const Quadrature* const Tet_4Point::FaceThree = - new Quadrature(Tet_4Point::FaceMapCoordinates, - Tet_4Point::FaceThreeShapeCoordinates, - Tet_4Point::FaceThreeWeights, 2, 3, 3); - -const Quadrature* const Tet_4Point::FaceFour = - new Quadrature(Tet_4Point::FaceMapCoordinates, - Tet_4Point::FaceFourShapeCoordinates, - Tet_4Point::FaceFourWeights, 2, 3, 3); - - -const double Tet_11Point::BulkCoordinates[] = - {1./4., 1./4., 1./4., - 11./14., 1./14., 1./14., - 1./14., 11./14., 1./14., - 1./14., 1./14., 11./14., - 1./14., 1./14., 1./14., - 0.3994035761667992, 0.3994035761667992, 0.1005964238332008, - 0.3994035761667992, 0.1005964238332008, 0.3994035761667992, - 0.3994035761667992, 0.1005964238332008, 0.1005964238332008, - 0.1005964238332008, 0.3994035761667992, 0.3994035761667992, - 0.1005964238332008, 0.3994035761667992, 0.1005964238332008, - 0.1005964238332008, 0.1005964238332008, 0.3994035761667992}; - -const double Tet_11Point::BulkWeights[] = - {-74./5625., - 343./45000., - 343./45000., - 343./45000., - 343./45000., - 56./2250., - 56./2250., - 56./2250., - 56./2250., - 56./2250., - 56./2250.}; - - -const Quadrature* const Tet_11Point::Bulk = -new Quadrature(Tet_11Point::BulkCoordinates, - Tet_11Point::BulkWeights, 3, 11); - - - -const double Tet_15Point::BulkCoordinates[] = - {1./4., 1./4., 1./4., - 0., 1./3., 1./3., - 1./3., 0., 1./3., - 1./3., 1./3., 0., - 1./3., 1./3., 1./3., - 72./99., 1./11., 1./11., - 1./11., 72./99., 1./11., - 1./11., 1./11., 72./99., - 1./11., 1./11., 1./11., - 0.066550153573664, 0.066550153573664, 0.433449846426336, - 0.066550153573664, 0.433449846426336, 0.066550153573664, - 0.066550153573664, 0.433449846426336, 0.433449846426336, - 0.433449846426336, 0.066550153573664, 0.066550153573664, - 0.433449846426336, 0.433449846426336, 0.066550153573664, - 0.433449846426336, 0.066550153573664, 0.433449846426336}; - -const double Tet_15Point::BulkWeights[] = - {0.030283678097089, - 0.006026785714286, - 0.006026785714286, - 0.006026785714286, - 0.006026785714286, - 0.011645249086029, - 0.011645249086029, - 0.011645249086029, - 0.011645249086029, - 0.010949141561386, - 0.010949141561386, - 0.010949141561386, - 0.010949141561386, - 0.010949141561386, - 0.010949141561386}; - -const Quadrature* const Tet_15Point::Bulk = -new Quadrature(Tet_15Point::BulkCoordinates, - Tet_15Point::BulkWeights, 3, 15); -// Sriramajayam - diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.h b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.h deleted file mode 100644 index 8078e5c..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/QuadratureP3.h +++ /dev/null @@ -1,114 +0,0 @@ -/** - * QuadratureP3.h - * DG++ - * - * Created by Adrian Lew on 10/21/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef QUADRATUREP3 -#define QUADRATUREP3 - -#include "Quadrature.h" - -/*! - * \brief Class for 4 point quadrature rules for tetrahedrons. - * - * 4-point Gauss quadrature coordinates in the tetrahedron with - * 0(1,0,0), 1(0,1,0), 2(0,0,0), 3(0,0,1) as vertices. - * Barycentric coordinates are used for the Gauss points. - * Barycentric coordinates are specified with respect to vertices 1,2 and 4 - * in that order.Coordinate of vertex 3 is not independent. - * - * Quadrature for Faces: - * Faces are ordered as - - * Face 1: 2-1-0, - * Face 2: 2-0-3, - * Face 3: 2-3-1, - * Face 4: 0-1-3. - */ - -class Tet_4Point: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature* const Bulk; - - //! Face (2-1-0) quadrature - static const Quadrature* const FaceOne; - //! Face (2-0-3) quadrature - static const Quadrature* const FaceTwo; - //! Face (2-3-1) quadrature - static const Quadrature* const FaceThree; - //! Face (0-1-3) quadrature - static const Quadrature* const FaceFour; - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; - static const double FaceMapCoordinates[]; - static const double FaceOneShapeCoordinates[]; - static const double FaceOneWeights[]; - static const double FaceTwoShapeCoordinates[]; - static const double FaceTwoWeights[]; - static const double FaceThreeShapeCoordinates[]; - static const double FaceThreeWeights[]; - static const double FaceFourShapeCoordinates[]; - static const double FaceFourWeights[]; -}; - - - -//! \brief 11 point quadrature rule for tetrahedron -//! Degree of precision 4, number of points 11. -class Tet_11Point: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature* const Bulk; - - //! \todo Include face quadrature rules if needed. - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; -}; - -//! \brief 15 point quadrature rule for tetrahedron -//! Degree of precision 5, number of points 15. -class Tet_15Point: public SpecificQuadratures -{ -public: - //! Bulk quadrature - static const Quadrature* const Bulk; - - //! \todo Include face quadrature rules if needed. - -private: - static const double BulkCoordinates[]; - static const double BulkWeights[]; -}; - -#endif -// Sriramajayam - diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluated.cpp b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluated.cpp deleted file mode 100644 index 9ff7889..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluated.cpp +++ /dev/null @@ -1,158 +0,0 @@ -/* - * ShapesEvaluatedImpl.cpp - * DG++ - * - * Created by Adrian Lew on 9/7/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Quadrature.h" -#include "Linear.h" -#include "ShapesEvaluated.h" - -#include "util.h" - - -// #include "petscblaslapack.h" - -#include - -extern "C" void dgesv_(int *, int *, double *, int *, int *, double *, int *, int *); - -const Shape * const ShapesP12D::P12D = new Linear<2> ; -const Shape * const ShapesP12D::P11D = new Linear<1> ; - -const Shape * const ShapesP11D::P11D = new Linear<1> ; - -void ShapesEvaluated::createObject(const ElementGeometry& EG) { - const Shape& TShape = accessShape(); - const Quadrature& TQuadrature = accessQuadrature(); - - int Na = TShape.getNumFunctions(); - int Nq = TQuadrature.getNumQuadraturePoints(); - int Nd = EG.getEmbeddingDimension(); - int Np = EG.getParametricDimension(); - - LocalShapes.resize(Nq * Na); - if (Np == Nd) - LocalDShapes.resize(Nq * Na * Nd); - LocalWeights.resize(Nq); - LocalCoordinates.resize(Nq * Nd); - - double *DY = new double[Nd * Np]; - double *Y = new double[Nd]; - - for (int q = 0; q < Nq; q++) { - EG.map(TQuadrature.getQuadraturePoint(q), Y); - for (int i = 0; i < Nd; i++) - LocalCoordinates[q * Nd + i] = Y[i]; - - for (int a = 0; a < Na; a++) - LocalShapes[q * Na + a] = TShape.getVal(a, TQuadrature.getQuadraturePointShape(q)); - - double Jac; - EG.dMap(TQuadrature.getQuadraturePoint(q), DY, Jac); - - LocalWeights[q] = (TQuadrature.getQuadratureWeights(q)) * Jac; - - // Compute derivatives of shape functions only when the element map goes - // between spaces of the same dimension - if (Np == Nd) { - double *DYInv = new double[Np * Np]; - double *DYT = new double[Np * Np]; - - // Lapack - { - // Transpose DY to Fortran mode - for (int k = 0; k < Nd; k++) - for (int M = 0; M < Np; M++) { - DYT[k * Nd + M] = DY[M * Np + k]; - - //Right hand-side - DYInv[k * Nd + M] = k == M ? 1 : 0; - } - - int *IPIV = new int[Np]; - int INFO; - - // DYInv contains the transpose of the inverse - dgesv_(&Np, &Np, DYT, &Np, IPIV, DYInv, &Np, &INFO); - - - -#ifdef DEBUG_TEST - const double* ptCoord = TQuadrature.getQuadraturePoint(q); - const double* shapeCoord = TQuadrature.getQuadraturePointShape(q); - - printIter (std::cerr << "ptCoord = ", ptCoord, ptCoord + Np); - printIter (std::cerr << "shapeCoord ", shapeCoord, shapeCoord + Np); - printIter (std::cerr << "Y = ", Y, Y + Np); - printIter (std::cerr << "DY = ", DY, DY + Np * Np); - printIter (std::cerr << "DYInv = ", DYInv, DYInv + Np * Np); - std::cerr << "----------------------" << std::endl; -#endif - - if (INFO != 0) { - std::cerr << "ShapesEvaluated::CreateObject: Lapack could not invert matrix\n"; - abort (); - } - -#ifdef DEBUG_TEST // Output only useful during testing - for(int r=0; r - -#include "Shape.h" -#include "Quadrature.h" -#include "ElementGeometry.h" -#include "BasisFunctions.h" -#include "Linear.h" - - - -/** - ShapesEvaluated: Evaluation of the shape functions and derivatives at - the quadrature points. Abstract class. - - ShapesEvaluated is the class that takes the element geometry, a - quadrature rule and shape functions on the reference element, and - computes the values of the shape functions and their derivatives at - the quadrature points. As opposed to Shapes, there will be one or - more ShapesEvaluated objects per element in the mesh. Each different - interpolation needed in an element will have a different - ShapesEvaluated object. - - - This class provides all functionality for the templated derived classes ShapesEvaluated__. - Only two abstract functions are left to be defined by derived classes, - accessShape and accessQuadrature. - - Objects in this class should only be constructed by derived classes. - - ShapesEvaluated evaluates ElementGeometry::(D)map at the quadrature - point (map)Coordinates using Quadrature::getQuadraturePoint(). - - ShapesEvaluated evaluates Shape functions at the quadrature - point (Shape)Coordinates using Quadrature::getQuadraturePointShape(). - - \warning: The type of coordinates used to evaluate functions in - Shape and ElementGeometry::(D)map (barycentric, Cartesian, etc.) - should be consistent with those provided in - Quadrature::getQuadraturePoint. In other words, if the Quadrature - object returns Cartesian coordinates, the Shape and - ElementGeometry objects should evaluate functions taking - Cartesian coordinates as arguments. - - \todo It would be nice to provide an interface of iterators to navigate - the vectors, so that it is not necessary to remember in which order the - data is stored in the vector. In the interest of simiplicity, this is - for the moment skipped. - - \todo It would useful to have the option of not computing the derivatives of the - shape functions if not needed. Right now, it is not possible. - - - \todo Make coordinate types so that it is not necessary to check - whether one is using the right convention between the three related - classes. - - \todo The computation of derivatives of shape functions with - respect to the coordinates of the embedding space can only be - performed if the ElementGeometry::map has domain and ranges of the - same dimension. Otherwise, the derivatives should be computed with - respect to some system of coordinates on the manifold. This will - likely need to be revisited in the case of shells. - - \todo Need to fix the Lapack interface... it is too particular to - Mac here, and even not the best way to do it in Mac... -*/ - - - - -class ShapesEvaluated: public BasisFunctions -{ - protected: - ShapesEvaluated() {} - inline virtual ~ShapesEvaluated(){} - ShapesEvaluated(const ShapesEvaluated &SEI); - - public: - // Accessors/Mutators: - inline const std::vector & getShapes() const { return LocalShapes; } - - inline const std::vector & getDShapes() const { return LocalDShapes; } - - inline const std::vector & getIntegrationWeights() const - { return LocalWeights;} - - inline const std::vector & getQuadraturePointCoordinates() const - { return LocalCoordinates; } - - //! returns the number of shape functions provided - inline size_t getBasisDimension() const - { return accessShape().getNumFunctions(); } - - //! returns the number of directional derivative for each shape function - inline size_t getNumberOfDerivativesPerFunction() const - { return LocalDShapes.size()/LocalShapes.size(); } - - //! returns the number of number of coordinates for each Gauss point - inline size_t getSpatialDimensions() const - { return LocalCoordinates.size()/LocalWeights.size(); } - - protected: - //! Returns the specific Shape objects in derived classes. - virtual const Shape& accessShape() const = 0; - - //! Quadrature type - //! Returns the specific Quadrature objects in derived classes. - virtual const Quadrature& accessQuadrature() const = 0; - - //! Since it is not possible to have a virtual constructor, - //! one is emulated below only accessible from derived classes - //! The virtual aspect are the calls to accessShape and accessQuadrature. - void createObject(const ElementGeometry& eg); - - - private: - std::vector LocalShapes; - std::vector LocalDShapes; - std::vector LocalWeights; - std::vector LocalCoordinates; -}; - - - -/** - \brief ShapesEvaluated__: This class is the one that brings the - flexibility for building shape functions of different types - evaluated at different quadrature points. - - The class takes the Shape and Quadrature types as template arguments. - An object is constructed by providing an ElementGeometry object. - - ShapesEvaluated_ and ShapesEvaluated__ could have been made into a single - templated class. By splitting them the templated part of the class is very - small. - - \todo Should I perhaps have the constructors of the class to be - protected, making the classes using ShapesEvaluated__ friends? - Since the ElementGeometry is not stored or referenced from within - the class, it would prevent unwanted mistakes. - - \warning When a ShapesEvaluated__ object is constructed on a - geometry where the parametric and embedding dimensions are - different, the constructor is not warning that it does not compute - the derivatives for the shape functions. These objects, however, - still provide the values of the Shape functions themselves. - - \todo In the future, as remarked in ShapesEvaluated as well, we need to separate the - need to provide the Shape function values from the one of providing the derivatives as - well, perhaps through multiple inheritance. -*/ - - -template -class ShapesEvaluated__:public ShapesEvaluated -{ -public: - inline ShapesEvaluated__(const ElementGeometry& EG): ShapesEvaluated() { - createObject(EG); - } - - virtual ShapesEvaluated__* clone() const { - return new ShapesEvaluated__(*this); - } - - ShapesEvaluated__(const ShapesEvaluated__ & SE): ShapesEvaluated (SE) { - } - - const Shape& accessShape() const { return *ShapeObj;} - const Quadrature& accessQuadrature() const { return *QuadObj;} -}; - - - - -// Build specific ShapesEvaluated - - -//! Specific ShapesEvaluated types -class SpecificShapesEvaluated -{}; - - - - -//! Shape functions for P12D elements: Linear functions on Triangles\n -//! It contains two types of traces\n -//! 1) ShapesP12D::Faces are linear functions on Segments, and their degrees of freedom -//! are those associated to the nodes of the Segment. -//! -//! 2) ShapesP12D::FaceOne, ShapesP12D::FaceTwo, ShapesP12D::FaceThree are the full set -//! of linear shape functions in the element evaluated at quadrature points in each one -//! of the faces. Instead of having then 2 degrees of freedom per field per face, there are -//! here 3 degrees of freedom per field per face. Of course, some of these values are -//! trivially zero, i.e., those of the shape functions associated to the node opposite -//! to the face where the quadrature point is. These are provided since ocassionally -//! it may be necessary to have the boundary fields have the same -//! number of degrees of freedom as the bulk fields. In the most general case of -//! arbitrary bases, there is generally no shape functions that is -//! identically zero on the face, and hence bulk and trace fields have the same number of -//! degrees of freedom. With these shape functions, for example, it is possible to compute -//! the normal derivative of each basis function at the boundary. - -class ShapesP12D: public SpecificShapesEvaluated -{ - public: - //! Shape functions on reference triangle - static const Shape * const P12D; - - //! Shape functions on reference segment - static const Shape * const P11D; - - //! Bulk shape functions - typedef ShapesEvaluated__ Bulk; - - //! Shape functions for FaceOne, FaceTwo and FaceThree - typedef ShapesEvaluated__ Faces; - - //! Full shape functions for FaceOne - typedef ShapesEvaluated__ FaceOne; - //! Full shape functions for FaceTwo - typedef ShapesEvaluated__ FaceTwo; - //! Full shape functions for FaceThree - typedef ShapesEvaluated__ FaceThree; -}; - - - - -//! Shape functions for P11D elements: Linear functions on segments -class ShapesP11D: public SpecificShapesEvaluated -{ - public: - //! Shape functions on reference segment - static const Shape * const P11D; - - //! Bulk shape functions - typedef ShapesEvaluated__ Bulk; -}; - - - - -#endif diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.cpp b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.cpp deleted file mode 100644 index 44485bd..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.cpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * ShapesEvaluatedP13D.cpp - * DG++ - * - * Created by Ramsharan Rangarajan. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#include "ShapesEvaluatedP13D.h" - -// Implementation of class ShapesEvaluatedP13D : - -const size_t ShapesP13D::bctMap[] = {0,1,3,2}; - -const Shape * const ShapesP13D::P13D = new Linear<3>(ShapesP13D::bctMap); - -const Shape * const ShapesP13D::P12D = new Linear<2>; diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.h b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.h deleted file mode 100644 index 936c2b2..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/ShapesEvaluatedP13D.h +++ /dev/null @@ -1,74 +0,0 @@ -/** - * ShapesEvaluatedP13D.h - * DG++ - * - * Created by Ramsharan Rangarajan. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -#ifndef SHAPESEVALUATEDP13D -#define SHAPESEVALUATEDP13D - -#include "ShapesEvaluated.h" -#include "Quadrature.h" -#include "Linear.h" - -/** - \brief Shape functions for P13D elements: Linear functions on tetrahedra. - - It containes two types of traces - 1) ShapesP13D::Faces are linear functions on triangles and their dofs are those associated with the nodes of - the triangular face. - - 2) ShapesP13D::FaceOne, ShapesP13D::FaceTwo, ShapesP13D::FaceThree and ShapesP13D::FaceFour are the full set - of linear shape functions in the elements evaluated at quadrature points in each one of the faces. Instead of - having 3 dofs per field per face, there are 3 dofs per field per face. Some of these values are trivially - zero, i.e., those of the shape function associated to the node opposite to the face where the quadrature - point is. As was the done in ShapesP12D, these are provided since ocassionally it may be necessary to have the - same number of dofs as the bulk fields. In the most general case of arbitrary bases, there is generally no - shape function that is zero on the face, and hence bulk and trace fields have the same number of dofs. -*/ - - -class ShapesP13D: public SpecificShapesEvaluated -{ - protected: - static const size_t bctMap[]; - - public: - static const Shape * const P13D; - static const Shape * const P12D; - - typedef ShapesEvaluated__ Bulk; - - typedef ShapesEvaluated__ Faces; - - typedef ShapesEvaluated__ FaceOne; // 2-1-0 - typedef ShapesEvaluated__ FaceTwo; // 2-0-3 - typedef ShapesEvaluated__ FaceThree;// 2-3-1 - typedef ShapesEvaluated__ FaceFour; // 0-1-3 -}; - -#endif - diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided-model_output b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided-model_output deleted file mode 100644 index 5c12257..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided-model_output +++ /dev/null @@ -1,95 +0,0 @@ - -Print data before deleting original shapes - - -Test BasisFunctionProvided -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 - - -Test BasisFunctionProvidedExternalQuad -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 - - -Test BasisFunctionProvidedExternalQuadNoShapes -Function values - -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 - - -Test Copy constructors - - -Test BasisFunctionProvided -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.166667 0.166667 0.166667 - - -Test BasisFunctionProvidedExternalQuad -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 - - -Test cloning - - -Test BasisFunctionProvided -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.166667 0.166667 0.166667 - - -Test BasisFunctionProvidedExternalQuad -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 - -Print data after deleted original shapes -Function values -0.666667 0.166667 0.166667 0.166667 0.666667 0.166667 0.166667 0.166667 0.666667 -Function derivative values -1 0 0 1 -1 -1 1 0 0 1 -1 -1 1 0 0 1 -1 -1 -Integration weights -0.166667 0.166667 0.166667 -Quadrature point coordinates -0.666667 0.166667 0.166667 0.666667 0.166667 0.166667 diff --git a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided.cpp b/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided.cpp deleted file mode 100644 index d8214e8..0000000 --- a/maxflow/galois/apps/avi/libElm/libShapesEvaluated/test/testBasisFunctionsProvided.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * testLinearSE.cpp - * DG++ - * - * Created by Adrian Lew on 9/9/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Triangle.h" -#include "ShapesEvaluated.h" -#include "BasisFunctionsProvided.h" -#include - -static void PrintData(const BasisFunctions & P10Shapes); - -int main() -{ - double V0[] = {1,0,0,1,0,0}; - std::vector Vertices0(V0, V0+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - Triangle<2> P10(1,2,3); - ShapesP12D::Bulk * P10Shapes = new ShapesP12D::Bulk(&P10); - - - BasisFunctionsProvided BFS(P10Shapes->getShapes(), P10Shapes->getDShapes(), - P10Shapes->getIntegrationWeights(), P10Shapes->getQuadraturePointCoordinates()); - - BasisFunctionsProvidedExternalQuad BFSExternalQuad(P10Shapes->getShapes(), P10Shapes->getDShapes(), - P10Shapes->getIntegrationWeights(), - P10Shapes->getQuadraturePointCoordinates()); - - BasisFunctionsProvidedExternalQuad - BFSExternalQuadNoShapes(2, - P10Shapes->getDShapes(), - P10Shapes->getIntegrationWeights(), - P10Shapes->getQuadraturePointCoordinates()); - - std::cout << "\nPrint data before deleting original shapes\n"; - std::cout << "\n\nTest BasisFunctionProvided\n"; - PrintData(BFS); - - std::cout << "\n\nTest BasisFunctionProvidedExternalQuad\n"; - PrintData(BFSExternalQuad); - - std::cout << "\n\nTest BasisFunctionProvidedExternalQuadNoShapes\n"; - PrintData(BFSExternalQuadNoShapes); - - std::cout << "\n\nTest Copy constructors\n"; - BasisFunctionsProvided BFSCopy(BFS); - BasisFunctionsProvidedExternalQuad BFSExternalQuadCopy(BFSExternalQuad); - - std::cout << "\n\nTest BasisFunctionProvided\n"; - PrintData(BFSCopy); - - std::cout << "\n\nTest BasisFunctionProvidedExternalQuad\n"; - PrintData(BFSExternalQuadCopy); - - - std::cout << "\n\nTest cloning\n"; - BasisFunctions * BFSClone = BFS.clone(); - BasisFunctions * BFSExternalQuadClone = BFSExternalQuad.clone(); - - std::cout << "\n\nTest BasisFunctionProvided\n"; - PrintData(*BFSClone); - - std::cout << "\n\nTest BasisFunctionProvidedExternalQuad\n"; - PrintData(*BFSExternalQuadClone); - - delete BFSClone; - delete BFSExternalQuadClone; - - delete P10Shapes; - - std::cout << "\nPrint data after deleted original shapes \n"; - PrintData(BFS); - - -} - - - -static void PrintData(const BasisFunctions & P10Shapes) -{ - std::cout << "Function values\n"; - for(unsigned int a=0; a - -static void PrintData(ShapesP13D::Bulk PShapes); - -int main() -{ - double TempVertices0[] = {1,0,0, - 0,1,0, - 0,0,0, - 0,0,1}; - - std::vector Vertices0(TempVertices0,TempVertices0+12); - - Tetrahedron::SetGlobalCoordinatesArray(Vertices0); - - Tetrahedron P1(1,2,3,4); - ShapesP13D::Bulk P1Shapes(P1); - - std::cout << "Parametric tet\n"; - PrintData(P1Shapes); - - std::cout << "\nTwice Parametric tet\n"; - - double TempVertices1[] = {2,0,0, - 0,2,0, - 0,0,0, - 0,0,2}; - - std::vector Vertices1(TempVertices1,TempVertices1+12); - Tetrahedron::SetGlobalCoordinatesArray(Vertices1); - Tetrahedron P2(1,2,3,4); - ShapesP13D::Bulk P2Shapes(P2); - - PrintData(P2Shapes); - -} - - - - -static void PrintData(ShapesP13D::Bulk PShapes) -{ - std::cout << "Function values\n"; - for(unsigned int a=0; a - -static void PrintData(ShapesP12D::Bulk P10Shapes); - -int main() -{ - double V0[] = {1,0,0,1,0,0}; - std::vector Vertices0(V0, V0+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - Triangle<2> P10(1,2,3); - ShapesP12D::Bulk P10Shapes(&P10); - - std::cout << "Parametric triangle\n"; - PrintData(P10Shapes); - - std::cout << "\nTwice Parametric triangle\n"; - - double V1[] = {2,0,0,2,0,0}; - std::vector Vertices1(V1, V1+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices1); - Triangle<2> P11(1,2,3); - ShapesP12D::Bulk P11Shapes(&P11); - - PrintData(P11Shapes); - - std::cout << "\nReordered nodes of twice parametric triangle\n"; - - double V2[] = {0,0,2,0,0,2}; - std::vector Vertices2(V2, V2+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices2); - Triangle<2> P12(1,2,3); - ShapesP12D::Bulk P12Shapes(&P12); - - PrintData(P12Shapes); - - std::cout << "\n Equilateral triangle with area sqrt(3)\n"; - - double V3[] = {0,0,2,0,1,sqrt(3)}; - std::vector Vertices3(V3, V3+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices3); - Triangle<2> P13(1,2,3); - ShapesP12D::Bulk P13Shapes(&P13); - - PrintData(P13Shapes); - - std::cout << "\n Irregular triangle with area sqrt(3)\n"; - - double V4[] = {0,0,2,0,2.5,sqrt(3)}; - std::vector Vertices4(V4, V4+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices4); - Triangle<2> P14(1,2,3); - ShapesP12D::Bulk P14Shapes(&P14); - - PrintData(P14Shapes); - -} - - - - -static void PrintData(ShapesP12D::Bulk P10Shapes) -{ - std::cout << "Function values\n"; - for(unsigned int a=0; a - -static void PrintData(ShapesP22D::Bulk PShapes); - -int main() -{ - double TempVertices0[] = {0,0, - 1,0, - 0,1}; - - std::vector Vertices0(TempVertices0,TempVertices0+6); - - Triangle<2>::SetGlobalCoordinatesArray(Vertices0); - - Triangle<2> T1(1,2,3); - ShapesP22D::Bulk P1Shapes(&T1); - - std::cout << "Parametric triangle\n"; - PrintData(P1Shapes); - - - std::cout << "\nTwice Parametric triangle\n"; - - double TempVertices1[] = {0,0, - 2,0, - 0,2}; - - std::vector Vertices1(TempVertices1,TempVertices1+6); - Triangle<2>::SetGlobalCoordinatesArray(Vertices1); - Triangle<2> T2(1,2,3); - ShapesP22D::Bulk P2Shapes(&T2); - - PrintData(P2Shapes); - -} - - - - -static void PrintData(ShapesP22D::Bulk PShapes) -{ - std::cout << "Function values\n"; - for(unsigned int a=0; a -#include - - -// XXX: (amber) replaced 3,9. 27, 81 with NDM, MAT_SIZE -bool LinearElasticBase::getConstitutiveResponse(const std::vector& strain, std::vector& stress, std::vector & tangents, const ConstRespMode& mode) const { - - - // Compute stress - if (stress.size () != MAT_SIZE) { - stress.resize(MAT_SIZE); - } - - for (size_t i = 0; i < NDM; i++) { - for (size_t J = 0; J < NDM; J++) { - stress[i * NDM + J] = 0; - - for (size_t k = 0; k < NDM; k++) { - for (size_t L = 0; L < NDM; L++) { - stress[i * NDM + J] += getModuli(i, J, k, L) * (strain[k * NDM + L] - I_MAT[k * NDM + L]); - } - } - } - } - - // Compute tangents, if needed - if (mode == COMPUTE_TANGENTS) { - - if (tangents.size () != MAT_SIZE * MAT_SIZE) { - tangents.resize(MAT_SIZE * MAT_SIZE); - } - - for (size_t i = 0; i < NDM; i++) - for (size_t J = 0; J < NDM; J++) - for (size_t k = 0; k < NDM; k++) - for (size_t L = 0; L < NDM; L++) - tangents[i * MAT_SIZE * NDM + J * MAT_SIZE + k * NDM + L] = getModuli(i, J, k, L); - } - - return true; -} diff --git a/maxflow/galois/apps/avi/libMat/Material.cpp b/maxflow/galois/apps/avi/libMat/Material.cpp deleted file mode 100644 index c1af358..0000000 --- a/maxflow/galois/apps/avi/libMat/Material.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * NeoHookean.cpp - * DG++ - * - * Created by Adrian Lew on 10/24/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Material.h" -#include -#include -#include -#include - -const double SimpleMaterial::I_MAT[] = { 1., 0., 0., 0., 1., 0., 0., 0., 1. }; -const int IsotropicLinearElastic::DELTA_MAT[][3] = { { 1, 0, 0 }, { 0, 1, 0 }, { 0, 0, 1 } }; -const double SimpleMaterial::EPS = 1.e-6; -const double SimpleMaterial::PERT = 1.e-1; -const double SimpleMaterial::DET_MIN = 1.e-10; - -const size_t SimpleMaterial::NDF = 3; -const size_t SimpleMaterial::NDM = 3; - -bool SimpleMaterial::consistencyTest(const SimpleMaterial &SMat) { - std::vector strain(MAT_SIZE); - std::vector stress(MAT_SIZE); - std::vector tangents(MAT_SIZE * MAT_SIZE); - - std::vector stressplus(MAT_SIZE); - std::vector stressminus(MAT_SIZE); - std::vector tangentsnum(MAT_SIZE * MAT_SIZE); - - srand(time(0)); - - strain[0] = 1 + double(rand()) / double(RAND_MAX) * PERT; - strain[1] = double(rand()) / double(RAND_MAX) * PERT; - strain[2] = double(rand()) / double(RAND_MAX) * PERT; - strain[3] = double(rand()) / double(RAND_MAX) * PERT; - strain[4] = 1 + double(rand()) / double(RAND_MAX) * PERT; - strain[5] = double(rand()) / double(RAND_MAX) * PERT; - strain[6] = double(rand()) / double(RAND_MAX) * PERT; - strain[7] = double(rand()) / double(RAND_MAX) * PERT; - strain[8] = 1 + double(rand()) / double(RAND_MAX) * PERT; - - for (unsigned int i = 0; i < MAT_SIZE; i++) { - std::vector t; - double Forig = strain[i]; - - strain[i] = Forig + EPS; - SMat.getConstitutiveResponse(strain, stressplus, t, SKIP_TANGENTS); - - strain[i] = Forig - EPS; - SMat.getConstitutiveResponse(strain, stressminus, t, SKIP_TANGENTS); - - for (unsigned j = 0; j < MAT_SIZE; j++) { - tangentsnum[j * MAT_SIZE + i] = (stressplus[j] - stressminus[j]) / (2 * EPS); - } - - strain[i] = Forig; - } - - SMat.getConstitutiveResponse(strain, stress, tangents, COMPUTE_TANGENTS); - - double error = 0; - double norm = 0; - for (size_t i = 0; i < MAT_SIZE * MAT_SIZE; i++) { - error += pow(tangents[i] - tangentsnum[i], 2); - norm += pow(tangents[i], 2); - } - error = sqrt(error); - norm = sqrt(norm); - - if (error / norm > EPS * 100) { - std::cerr << "SimpleMaterial::ConsistencyTest. Material not consistent\n"; - return false; - } - return true; -} diff --git a/maxflow/galois/apps/avi/libMat/Material.h b/maxflow/galois/apps/avi/libMat/Material.h deleted file mode 100644 index f9a08d4..0000000 --- a/maxflow/galois/apps/avi/libMat/Material.h +++ /dev/null @@ -1,306 +0,0 @@ -/** - * Material.h - * DG++ - * - * Created by Adrian Lew on 10/24/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef MATERIAL_H -#define MATERIAL_H - -#include "Galois/Runtime/PerThreadStorage.h" - -#include -#include -#include -#include - -#include -#include - -/** - \brief Base class for all materials. - - \warning This is a quick fix. Will be revised later. - */ - -class Material { -}; - -/** - \brief Material whose thermodynamic state depends only on the local strain. - - Simple materials are those for which the stress depends only on the strain. - Assumes a homogeneous density in the reference configuration. - - Convention:\n - Any 3x3 second-order tensor \f${\bf A}\f$ is represented by a 2x2 matrix with components ordered - in an array \f$A\f$ as \f$A_{iJ}\f$=A[i*3+J].\n - Any 3x3x3x3 fourth-order tensor \f$\mathbb{C}\f$ is represented by a 3x3x3x3 matrix with components - ordered in an array \f$C\f$ as \f$\mathbb{C}_{iJkL}\f$=C[i*27+J*9+k*3+l].\n - */ - -class SimpleMaterial: public Material { -public: - - // (amber) some constants collected here - //! Some constants collected together here - static const double EPS; - static const double PERT; - static const double DET_MIN; - - static const size_t MAT_SIZE = 9; - static const size_t NDF; - static const size_t NDM; - - static const double I_MAT[MAT_SIZE]; - - //! for use with getConstitutiveResponse - //! optional to compute the tangents or skip - //! the computation - enum ConstRespMode { - COMPUTE_TANGENTS, - SKIP_TANGENTS, - }; - - //! \param rhoInput Density in reference configuration. If not provided, assumed to be zero. - inline SimpleMaterial(double rhoInput = 0) : - RefRho(rhoInput) { - } - - virtual ~SimpleMaterial() { - } - - //! Copy constructor - //! \param SM SimpleMaterial object to be copied. - inline SimpleMaterial(const SimpleMaterial &SM) : - RefRho(SM.RefRho) { - } - - virtual SimpleMaterial * clone() const = 0; - - /** - \brief - Returns the constitutive response of the material - - Given the local strain, it returns the local stress, and if requested, - the constitutive tangents.\n\n - More precisely:\n - The strain is assumed to be a 3x3 second-order tensor \f$F_{iJ}\f$. \n - The stress is assumed to be a 3x3 second-order tensor \f$P_{iJ}(\bf{F})\f$. \n - The constitutive tangents are a 3x3x3x3 fourth-order tensor - \f[ - A_{iJkL} = \frac{\partial P_{iJ}}{\partial F_{kL}} - \f] - - @param strain strain tensor, input - @param stress array where the stress tensor is returned - @param tangents array where the constitutive tangents are returned. If not provided, not computed. - @param mode tells whether to compute tangents vector or skip it @see ConstRespMode - - If cannot compute the constitutive relation for some reason, for example a - negative determinant in the strain, it returns false. If successful, returns true. - */ - - virtual bool getConstitutiveResponse(const std::vector& strain, std::vector& stress, std::vector& tangents - , const ConstRespMode& mode) const = 0; - - //! Returns the (uniform) density of the reference configuration. - double getDensityInReference() const { - return RefRho; - } - - /** Returns the local density that is a function of only the strain. - The density is computed as \f[ \rho = \frac{\rho_0}{\text{det}(\nabla F)}, \f] - where \f$F\f$ is the deformation gradient (as described above).
- Returns true if the computation went well and false otherwise. - - \param strain Deformation gradient. - \param LocDensity Computed local density. - */ - bool getLocalMaterialDensity(const std::vector * strain, double &LocDensity) const { - assert((*strain).size () == MAT_SIZE); - - // Compute determinant of strain. - double J = (*strain)[0] * ((*strain)[4] * (*strain)[8] - (*strain)[5] * (*strain)[7]) - (*strain)[1] * ((*strain)[3] * (*strain)[8] - - (*strain)[5] * (*strain)[6]) + (*strain)[2] * ((*strain)[3] * (*strain)[7] - (*strain)[4] * (*strain)[6]); - - if (fabs(J) < 1.e-10) - return false; - else { - LocDensity = RefRho / J; - return true; - } - } - - //! returns a string with the name of the material - virtual const std::string getMaterialName() const = 0; - - //! Consistency test\n - //! Checks that the tangets are in fact the derivatives of the stress with respect to the - //! strain. - static bool consistencyTest(const SimpleMaterial &Smat); - - //! @return speed of sound - virtual double getSoundSpeed(void) const = 0; -private: - double RefRho; -}; - - - -/** - \brief NeoHookean constitutive behavior - - */ - -class NeoHookean: public SimpleMaterial { - - /** - * Holds temporary vectors used by getConstitutiveResponse - * Instead of allocating new arrays on the stack, we reuse - * the same memory in the hope of better cache efficiency - * There is on instance of this struct per thread - */ - struct NeoHookenTmpVec { - static const size_t MAT_SIZE = SimpleMaterial::MAT_SIZE; - - double F[MAT_SIZE]; - double Finv[MAT_SIZE]; - double C[MAT_SIZE]; - double Cinv[MAT_SIZE]; - double S[MAT_SIZE]; - double M[MAT_SIZE * MAT_SIZE]; - - }; - - /** - * Per thread storage for NeoHookenTmpVec - */ - static Galois::Runtime::PerThreadStorage perCPUtmpVec; - -public: - NeoHookean(double LambdaInput, double MuInput, double rhoInput = 0) : - SimpleMaterial(rhoInput), Lambda(LambdaInput), Mu(MuInput) { - } - virtual ~NeoHookean() { - } - NeoHookean(const NeoHookean &NewMat) : - SimpleMaterial(NewMat), Lambda(NewMat.Lambda), Mu(NewMat.Mu) { - } - virtual NeoHookean * clone() const { - return new NeoHookean(*this); - } - - bool getConstitutiveResponse(const std::vector& strain, std::vector& stress, std::vector& tangents - , const ConstRespMode& mode) const; - - const std::string getMaterialName() const { - return "NeoHookean"; - } - - double getSoundSpeed(void) const { - return sqrt((Lambda + 2.0 * Mu) / getDensityInReference()); - } - -private: - // Lame constants - double Lambda; - double Mu; -}; - -/** - \brief Linear Elastic constitutive behavior - - This is an abstract class that provides the constitutive response of a linear elastic material. - However, it does not store the moduli, leaving that task for derived classes. In this way, we have the - flexibility to handle cases in which the material has different types of anisotropy or under stress. - - */ - -class LinearElasticBase: public SimpleMaterial { -public: - LinearElasticBase(double irho = 0) : - SimpleMaterial(irho) { - } - virtual ~LinearElasticBase() { - } - LinearElasticBase(const LinearElasticBase &NewMat) : - SimpleMaterial(NewMat) { - } - virtual LinearElasticBase * clone() const = 0; - - bool getConstitutiveResponse(const std::vector& strain, std::vector& stress, std::vector& tangents - , const ConstRespMode& mode) const; - - const std::string getMaterialName() const { - return "LinearElasticBase"; - } - -protected: - virtual double getModuli(int i1, int i2, int i3, int i4) const = 0; - -}; - -/** - \brief Isotropic, unstressed Linear Elastic constitutive behavior - - */ - -class IsotropicLinearElastic: public LinearElasticBase { -public: - static const int DELTA_MAT[][3]; - - IsotropicLinearElastic(double iLambda, double imu, double irho = 0) : - LinearElasticBase(irho), lambda(iLambda), mu(imu) { - } - virtual ~IsotropicLinearElastic() { - } - IsotropicLinearElastic(const IsotropicLinearElastic &NewMat) : - LinearElasticBase(NewMat), lambda(NewMat.lambda), mu(NewMat.mu) { - } - virtual IsotropicLinearElastic * clone() const { - return new IsotropicLinearElastic(*this); - } - - const std::string getMaterialName() const { - return "IsotropicLinearElastic"; - } - - double getSoundSpeed(void) const { - return sqrt((lambda + 2.0 * mu) / getDensityInReference()); - } - -protected: - double getModuli(int i1, int i2, int i3, int i4) const { - return lambda * DELTA_MAT[i1][i2] * DELTA_MAT[i3][i4] + mu * (DELTA_MAT[i1][i3] * DELTA_MAT[i2][i4] + DELTA_MAT[i1][i4] * DELTA_MAT[i2][i3]); - } - -private: - double lambda; - double mu; -}; - - -#endif diff --git a/maxflow/galois/apps/avi/libMat/NeoHookean.cpp b/maxflow/galois/apps/avi/libMat/NeoHookean.cpp deleted file mode 100644 index a713c60..0000000 --- a/maxflow/galois/apps/avi/libMat/NeoHookean.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * NeoHookean.cpp - * DG++ - * - * Created by Adrian Lew on 10/24/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Material.h" - -// calling default constructor -Galois::Runtime::PerThreadStorage NeoHookean::perCPUtmpVec; - -static double matlib_determinant(const double *A) { - double det; - - det = A[0] * (A[4] * A[8] - A[5] * A[7]) - A[1] * (A[3] * A[8] - A[5] * A[6]) + A[2] * (A[3] * A[7] - A[4] * A[6]); - - return det; -} - -static double matlib_inverse(const double *A, double *Ainv) { - double det, detinv; - - det = matlib_determinant(A); - if (fabs(det) < SimpleMaterial::DET_MIN) { - return 0.e0; - } - - detinv = 1. / det; - Ainv[0] = detinv * (A[4] * A[8] - A[5] * A[7]); - Ainv[1] = detinv * (-A[1] * A[8] + A[2] * A[7]); - Ainv[2] = detinv * (A[1] * A[5] - A[2] * A[4]); - Ainv[3] = detinv * (-A[3] * A[8] + A[5] * A[6]); - Ainv[4] = detinv * (A[0] * A[8] - A[2] * A[6]); - Ainv[5] = detinv * (-A[0] * A[5] + A[2] * A[3]); - Ainv[6] = detinv * (A[3] * A[7] - A[4] * A[6]); - Ainv[7] = detinv * (-A[0] * A[7] + A[1] * A[6]); - Ainv[8] = detinv * (A[0] * A[4] - A[1] * A[3]); - - return det; -} - -static void matlib_mults(const double *A, const double *B, double *C) { - C[0] = A[0] * B[0] + A[1] * B[1] + A[2] * B[2]; - C[1] = A[3] * B[0] + A[4] * B[1] + A[5] * B[2]; - C[2] = A[6] * B[0] + A[7] * B[1] + A[8] * B[2]; - C[3] = A[0] * B[3] + A[1] * B[4] + A[2] * B[5]; - C[4] = A[3] * B[3] + A[4] * B[4] + A[5] * B[5]; - C[5] = A[6] * B[3] + A[7] * B[4] + A[8] * B[5]; - C[6] = A[0] * B[6] + A[1] * B[7] + A[2] * B[8]; - C[7] = A[3] * B[6] + A[4] * B[7] + A[5] * B[8]; - C[8] = A[6] * B[6] + A[7] * B[7] + A[8] * B[8]; -} - -bool NeoHookean::getConstitutiveResponse(const std::vector& strain, std::vector& stress, std::vector& tangents - , const ConstRespMode& mode) const { - // XXX: (amber) replaced unknown 3's with NDM, 9 & 81 with MAT_SIZE & MAT_SIZE ^ 2 - - size_t i; - size_t j; - size_t k; - size_t l; - size_t m; - size_t n; - size_t ij; - size_t jj; - size_t kl; - size_t jk; - size_t il; - size_t ik; - size_t im; - size_t jl; - size_t kj; - size_t kn; - size_t mj; - size_t nl; - size_t ijkl; - size_t indx; - - double coef; - double defVol; - double detC; - double p; - // double trace; - - NeoHookenTmpVec& tmpVec = *perCPUtmpVec.getLocal (); - double* F = tmpVec.F; - // double* Finv = tmpVec.Finv; - double* C = tmpVec.C; - double* Cinv = tmpVec.Cinv; - double* S = tmpVec.S; - double* M = tmpVec.M; - - - // double detF; - - size_t J; - - std::copy (I_MAT, I_MAT + MAT_SIZE, F); - - /*Fill in the deformation gradient*/ - for (i = 0; i < NDF; i++) { - for (J = 0; J < NDM; J++) { - F[i * NDM + J] = strain[i * NDM + J]; - } - } - - /* compute right Cauchy-Green tensor C */ - matlib_mults(F, F, C); - - /* compute PK2 stresses and derivatives wrt C*/ - detC = matlib_inverse(C, Cinv); - // detF = matlib_inverse(F, Finv); - - if (detC < DET_MIN) { - std::cerr << "NeoHookean::GetConstitutiveResponse: close to negative jacobian\n"; - return false; - } - - defVol = 0.5 * log(detC); - p = Lambda * defVol; - - // trace = C[0] + C[4] + C[8]; - - coef = p - Mu; - - for (j = 0, ij = 0, jj = 0; j < NDF; j++, jj += NDF + 1) { - for (i = 0; i < NDM; i++, ij++) { - S[ij] = coef * Cinv[ij]; - } - S[jj] += Mu; - } - - if (mode == COMPUTE_TANGENTS) { - coef = Mu - p; - for (l = 0, kl = 0, ijkl = 0; l < NDM; l++) { - for (k = 0, jk = 0; k < NDM; k++, kl++) { - for (j = 0, ij = 0, jl = l * NDM; j < NDM; j++, jk++, jl++) { - for (i = 0, ik = k * NDM, il = l * NDM; i < NDM; i++, ij++, ik++, il++, ijkl++) { - M[ijkl] = Lambda * Cinv[ij] * Cinv[kl] + coef * (Cinv[ik] * Cinv[jl] + Cinv[il] * Cinv[jk]); - } - } - } - } - } - - if (stress.size() != MAT_SIZE) { - stress.resize(MAT_SIZE); - } - - /* PK2 -> PK1 */ - for (j = 0, ij = 0; j < NDM; j++) { - for (i = 0; i < NDM; i++, ij++) { - stress[ij] = 0.e0; - for (k = 0, ik = i, kj = j * NDM; k < NDM; k++, ik += NDM, kj++) { - stress[ij] += F[ik] * S[kj]; - } - } - } - - if (mode == COMPUTE_TANGENTS) { - if (tangents.size() != MAT_SIZE * MAT_SIZE) { - tangents.resize(MAT_SIZE * MAT_SIZE); - } - - /* apply partial push-forward and add geometrical term */ - for (l = 0, ijkl = 0; l < NDM; l++) { - for (k = 0; k < NDF; k++) { - for (j = 0, jl = l * NDF; j < NDM; j++, jl++) { - for (i = 0; i < NDF; i++, ijkl++) { - - tangents[ijkl] = 0.e0; - - /* push-forward */ - for (n = 0, kn = k, nl = l * NDF; n < NDM; n++, kn += NDM, nl++) { - indx = nl * MAT_SIZE; - for (m = 0, im = i, mj = j * NDM; m < NDM; m++, im += NDM, mj++) { - tangents[ijkl] += F[im] * M[mj + indx] * F[kn]; - } - } - - /* geometrical term */ - if (i == k) { - tangents[ijkl] += S[jl]; - } - - } - } - } - } - } - - return true; -} diff --git a/maxflow/galois/apps/avi/libMat/test/testLinearElastic.cpp b/maxflow/galois/apps/avi/libMat/test/testLinearElastic.cpp deleted file mode 100644 index c5a18c8..0000000 --- a/maxflow/galois/apps/avi/libMat/test/testLinearElastic.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * testLinearElastic.cpp - * DG++ - * - * Created by Adrian Lew on 11/21/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Material.h" -#include -#include - -int main() -{ - IsotropicLinearElastic ILE(1., 2.); - - if(ILE.consistencyTest(ILE)) - std::cout << "Successful\n"; - else - std::cout << "Failed\n"; - - return 1; -} diff --git a/maxflow/galois/apps/avi/libMat/test/testNeoHookean.cpp b/maxflow/galois/apps/avi/libMat/test/testNeoHookean.cpp deleted file mode 100644 index abbbcdf..0000000 --- a/maxflow/galois/apps/avi/libMat/test/testNeoHookean.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/* - * testNeoHookean.cpp - * DG++ - * - * Created by Adrian Lew on 10/24/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "Material.h" -#include -#include - -int main() -{ - NeoHookean NH(1., 2.); - - if(NH.consistencyTest(NH)) - std::cout << "Successful\n"; - else - std::cout << "Failed\n"; - - return 1; -} diff --git a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/CoordConn.h b/maxflow/galois/apps/avi/libMeshInit/dgmechanics/CoordConn.h deleted file mode 100644 index 5268054..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/CoordConn.h +++ /dev/null @@ -1,303 +0,0 @@ -/** CoordConn Represents the connectivity and coordinates of mesh -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 16, 2011 - * @author M. Amber Hassaan - */ - - -#ifndef COORDCONN_H_ -#define COORDCONN_H_ - -#include "AuxDefs.h" -#include "Element.h" -#include "P12DElement.h" -#include "P13DElement.h" -#include "ElementGeometry.h" -#include "Triangle.h" -#include "Tetrahedron.h" -#include "Femap.h" - -#include - -#include -#include -#include -#include -#include - -/** - * This class maintains connectivity and coordinates of the mesh read from a file. - * Connectivity is the ids of the nodes of each element, where nodes are numbered from * 0..numNodes-1 - * Coordinates is the 2D or 3D coordinates of each node in the mesh - * The elements themselves have ids 0..numElements-1 - */ -class CoordConn { - - -public: - CoordConn () {} - - virtual ~CoordConn () {} - - /** - * Connectivity of all elements in a single vector. Let NPE = nodes per element, then connectivity of - * element i is in the range [NPE*i, NPE*(i+1)) - * - * @return ref to vector - */ - virtual const std::vector& getConnectivity () const = 0; - - /** - * Coordinates of all nodes in the mesh in a single vector. Let SPD = number of spatial dimensions - * e.g. 2D or 3D, the coordinates for node i are in the range [i*SPD, (i+1)*SPD) - * - * @return ref to vector - */ - virtual const std::vector& getCoordinates () const = 0; - - virtual size_t getSpatialDim () const = 0; - - virtual size_t getNodesPerElem () const = 0; - - /** - * specific to file input format - */ - virtual size_t getTopology () const = 0; - - /** - * subdivide each element into smaller elements - */ - virtual void subdivide () = 0; - - virtual void initFromFileData (const FemapInput& neu) = 0; - - virtual size_t getNumNodes () const = 0; - - virtual size_t getNumElements () const = 0; - - /** - * helper for MeshInit - * The derived class decides the kind of element and element geometry to - * instantiate for each element addressed by elemIndex - * - * @param elemIndex - * @return Element* - */ - virtual Element* makeElem (const size_t elemIndex) const = 0; - -protected: - - /** - * populates vector elemConn with - * the connectivity of element indexed by elemIndex - * @see CoordConn::getConnectivity() - * - * @param elemIndex - * @param elemConn - * - */ - virtual void genElemConnectivity (size_t elemIndex, std::vector& elemConn) const = 0; - - -}; - -/** - * - * Common functionality and data structures - */ -template -class AbstractCoordConn: public CoordConn { -protected: - std::vector connectivity; - std::vector coordinates; - -public: - AbstractCoordConn (): CoordConn() { - - } - - AbstractCoordConn (const AbstractCoordConn& that): - CoordConn(that), connectivity (that.connectivity), coordinates (that.coordinates) { - - } - - AbstractCoordConn& operator = (const AbstractCoordConn& that) { - CoordConn::operator = (that); - if (this != &that) { - connectivity = that.connectivity; - coordinates = that.coordinates; - } - return (*this); - } - - virtual inline size_t getSpatialDim () const { - return SPD; - } - - virtual inline size_t getNodesPerElem () const { - return NODES_PER_ELEM; - } - - virtual inline size_t getTopology () const { - return TOPO; - } - - virtual const std::vector& getConnectivity () const { - return connectivity; - } - - virtual const std::vector& getCoordinates () const { - return coordinates; - } - - virtual size_t getNumNodes () const { - return getCoordinates ().size () / getSpatialDim (); - } - - virtual size_t getNumElements () const { - return getConnectivity ().size () / getNodesPerElem (); - } - - virtual void initFromFileData (const FemapInput& neu) { - - size_t nodes = neu.getNumNodes (); - size_t elements = neu.getNumElements (getTopology ()); - - coordinates.clear (); - coordinates.resize (nodes * getSpatialDim ()); - - connectivity.clear (); - connectivity.resize (elements * getNodesPerElem ()); - - transferNodes (neu); - transferElements (neu); - } - - -protected: - virtual void genElemConnectivity (size_t elemIndex, std::vector& conn) const { - const size_t npe = getNodesPerElem (); - conn.clear(); - - for (size_t i = 0; i < npe; ++i) { - conn.push_back (connectivity[elemIndex * npe + i]); - } - } - -private: - void transferNodes (const FemapInput& neu) { - - size_t n, d; - for (n = 0; n < neu.getNumNodes (); n++) { - const femapNode& nd = neu.getNode (n); - for (d = 0; d < getSpatialDim (); d++) { - coordinates[getSpatialDim () * n + d] = nd.x[d]; - } - } - - } - - - void transferElements (const FemapInput& neu) { - size_t i, j; - for (i = 0; i < neu.getNumElements (); i++) { - const femapElement& e = neu.getElement (i); - if (e.topology == getTopology ()) { - for (j = 0; j < getNodesPerElem (); ++j) { - // changed following to make node ids start from 0. - // connectivity[nodesPerElem * iv + j] = neu.getNodeId(e.node[j]) + 1; - connectivity[getNodesPerElem () * i + j] = neu.getNodeId (e.node[j]); - } - - } else { - std::cerr << "Warning: topology " << e.topology << " of element " << neu.getElementId (e.id) - << " is not supported for conversion to ADLIB. Skipping. " << std::endl; - abort (); - } - } - - return; - } - - -}; - - - -/** - * represents an edge between two mesh nodes - */ -struct edgestruct { - size_t elemId; - size_t edgeId; - GlobalNodalIndex node0; - GlobalNodalIndex node1; - - edgestruct(size_t ielem, size_t iedge, GlobalNodalIndex _node0, GlobalNodalIndex _node1) : - elemId(ielem), edgeId(iedge) { - - // sort the args in the increasing order - // can't sort fields due to const - if (_node1 < _node0) { - std::swap (_node0, _node1); - } - - // sort of node id's of an edge in a consistent manner is necessary - // in order to sort a list of edgestruct objects - - node0 = _node0; - node1 = _node1; - - assert (node0 <= node1); - - } - - - /** - * ordering based on node ids - * - * @param that - */ - bool operator < (const edgestruct &that) const { - // compare the nodes of the two edges - int result = compare (that); - return result < 0; - } - - /** - * comparison function that compares - * two objects based on the node ids in the edge - * Therefore it's necessary to store the node ids within an edge - * in sorted order to allow lexicographic comparison - * - * @param that - */ - inline int compare (const edgestruct& that) const { - int result = this->node0 - that.node0; - if (result == 0) { - result = this->node1 - that.node1; - } - return result; - } - -}; - -#endif /* COORDCONN_H_ */ diff --git a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.cpp b/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.cpp deleted file mode 100644 index 2811138..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.cpp +++ /dev/null @@ -1,385 +0,0 @@ -#include "MeshInit.h" - -const double TriMeshInit::PARAM[] = {1, 0, 0, 1, 0, 0 }; -const double TetMeshInit::PARAM[] = -{ 1, 0, 0, - 0, 1, 0, - 0, 0, 0, - 0, 0, 1 }; - -const double MeshInit::RHO = 1.0; -const double MeshInit::MU = 0.5; -const double MeshInit::LAMBDA = 0.0; -const int MeshInit::PID = 0; - -const double MeshInit::DELTA = 0.1; -const double MeshInit::T_INIT = 0.0; - -const size_t MeshInit::MAX_FNAME = 1024; - -/** - * - * @param fileName - * @param ndiv - * number of times to subdivide the initial mesh - */ -void MeshInit::initializeMesh (const std::string& fileName, int ndiv) { - - FemapInput input (fileName.c_str ()); - - this->cc = makeCoordConn (); - - - cc->initFromFileData (input); - - for (int i = 0; i < ndiv; ++i) { - cc->subdivide (); - } - - - fieldsUsed.resize (numFields ()); - // fileds numbered from 0..N-1 - for (size_t i = 0; i < fieldsUsed.size (); ++i) { - fieldsUsed[i] = i; - } - - geomVec.clear (); - elemVec.clear (); - - for (size_t i = 0; i < cc->getNumElements (); ++i) { - - Element* elem = cc->makeElem (i); - - elemVec.push_back(elem); - geomVec.push_back (const_cast (&(elem->getGeometry ()))); - } - - - this->l2gMap = new StandardP1nDMap (elemVec); - - this->ile = new NeoHookean (LAMBDA, MU, RHO); - - massResidueVec.clear (); - operationsVec.clear (); - - for (std::vector::const_iterator i = elemVec.begin (); i != elemVec.end (); ++i) { - Residue* m = new DiagonalMassForSW (*(*i), *ile, fieldsUsed); - massResidueVec.push_back (m); - - DResidue* sw = new StressWork (*(*i), *ile, fieldsUsed); - operationsVec.push_back (sw); - } - - - size_t totalDof = l2gMap->getTotalNumDof (); - - VecDouble massVec (totalDof, 0.0); - - VecDouble dofArray (totalDof, 0.0); - - Residue::assemble (massResidueVec, *l2gMap, dofArray, massVec); - - - aviVec.clear (); - - for (size_t i = 0; i < elemVec.size (); ++i) { - const Element* e = elemVec[i]; - - std::vector bcfuncVec(getNodesPerElem ()); - - std::vector< std::vector > itypeMat( getSpatialDim (), - std::vector (getNodesPerElem (), StandardAVI::ZERO)); - - - getBCs (*e, itypeMat, bcfuncVec); - - // TODO: AVI init broken here - AVI* avi = new StandardAVI (*l2gMap, *operationsVec[i], massVec, i, itypeMat, bcfuncVec, DELTA, T_INIT); - - this->aviVec.push_back (avi); - - assert (operationsVec[i]->getFields ().size () == fieldsUsed.size()); - } - - - this->aviWriteInterval = std::vector (getNumElements(), 0); - -} - -void MeshInit::getBCs (const Element& e, std::vector< std::vector >& itypeMat, - std::vector& bcfuncVec) const { - - const double* param = getParam (); - double* coord = new double[this->getSpatialDim ()]; - - for (size_t a = 0; a < getNodesPerElem (); ++a) { - e.getGeometry ().map (param + this->getSpatialDim () * a, coord); - - BCFunc bc = getBCFunc (coord); - - BCImposedType itypeVal = StandardAVI::ZERO; - if (bc == NULL) { - itypeVal = StandardAVI::ZERO; - } - else { - itypeVal = StandardAVI::ONE; - } - - bcfuncVec[a] = bc; // XXX: sometimes BCFunc is NULL - - for (size_t i = 0; i < this->getSpatialDim (); ++i) { - itypeMat[i][a] = itypeVal; - } - - } - - delete[] coord; -} - -/** - * - * @param avi - * @param Qval - * displacement - * @param Vbval - * velocity - * @param Tval - * time - */ -void MeshInit::writeSync (const AVI& avi, const VecDouble& Qval, const VecDouble& Vbval, const VecDouble& Tval) { - // end time of the write interval for element 'avi' - double interEnd = (this->aviWriteInterval[avi.getGlobalIndex ()] * this->writeInc); - - // when the first time update time of 'a' goes past the current write - // interval - // we dump some state into a file - if (avi.getNextTimeStamp () > interEnd) { - ++this->aviWriteInterval[avi.getGlobalIndex ()]; - - // if 'a' is the first to enter a new write interval - // then open a new file - // and also close the old file - if (avi.getNextTimeStamp () > (this->writeInterval * this->writeInc)) { - - if (syncFileWriter != NULL) { - // will be true after the first interval - fclose (syncFileWriter); - syncFileWriter = NULL; // being defensive ... - - printf ("myid = %d, done with syncfiles for interval = %d, simulation time for interval = %g\n", PID, - (this->writeInterval - 1), (this->writeInterval * this->writeInc)); - // TODO: measure - // syncfile writing - // time per interval - } - - char syncFileName[MAX_FNAME]; - sprintf(syncFileName, "sync.%d_%d.dat", this->writeInterval, PID); - - this->syncFileWriter = fopen (syncFileName, "w"); - - if (syncFileWriter == NULL) { - std::cerr << "Failed to open log file for writing: " << syncFileName << std::endl; - abort (); - } - - // increment to define the end limit for the new interval. - ++this->writeInterval; - } - - assert (this->syncFileWriter != NULL); - - const std::vector& conn = avi.getGeometry ().getConnectivity (); - - for (size_t aa = 0; aa < conn.size (); ++aa) { - GlobalNodalIndex nodeNum = conn[aa]; - - fprintf (syncFileWriter, "%zd %zd ", avi.getGlobalIndex (), nodeNum); - - int idx = -1; - for (size_t f = 0; f < avi.getGeometry ().getEmbeddingDimension (); ++f) { - idx = l2gMap->map (f, aa, avi.getGlobalIndex ()); - - // XXX: commented out and printing vector values instead - // double pos = Qval[idx] + Vbval[idx] * (aviWriteInterval[avi.getGlobalIndex ()] * this->writeInc - Tval[idx]); - // fprintf (syncFileWriter, "%12.10f ", pos); - fprintf (syncFileWriter, "%12.10f ", Qval[idx]); - - fprintf (syncFileWriter, "%12.10f ", Vbval[idx]); - } - - fprintf (syncFileWriter, "%12.10f \n", Tval[idx]); - - } - - } - -} - -void MeshInit::stretchInternal (VecDouble& dispOrVel, bool isVel) const { - // int localsize = this->getSpatialDim () * this->getNodesPerElem () * this->elemVec.size (); - - double* coord = new double[this->getSpatialDim ()]; - double stretch; - - const double* param = getParam (); - - for (size_t e = 0, i = 0; e < this->elemVec.size (); ++e) { - for (size_t f = 0; f < this->numFields (); ++f) { - for (size_t a = 0; a < this->getNodesPerElem (); ++a, ++i) { - - elemVec[e]->getGeometry ().map (param + this->numFields () * a, coord); - if (isVel) { - stretch = this->initVel (coord, f); - } - else { - stretch = this->initDisp (coord, f); - } - - size_t index = l2gMap->map (f, a, e); - dispOrVel[index] = stretch; - } - } - } - - delete[] coord; -} - -// makes a copy of the arguments to sort them etc. -bool MeshInit::computeDiffAVI (std::vector listA, std::vector listB, bool printDiff) { - bool result = false; - - const char* nameA = "this->aviList"; - const char* nameB = "that.aviList"; - - if (listA.size () != listB.size ()) { - result = false; - if (printDiff) { - fprintf (stderr, "Comparing lists of different sizes, %s.size() = %zd, %s.size() = %zd\n", - nameA, listA.size (), nameB, listB.size ()); - } - } - else { - // sort in increasing order of next update time - AVIComparator aviCmp; - - std::sort (listA.begin (), listA.end (), aviCmp); - std::sort (listB.begin (), listB.end (), aviCmp); - - result = true; - for (size_t i = 0; i < listA.size (); ++i) { - const AVI& aviA = *listA[i]; - const AVI& aviB = *listB[i]; - - double diff = fabs (aviA.getTimeStamp () - aviB.getTimeStamp ()); - if (diff > TOLERANCE) { - result = false; - if (printDiff) { - fprintf (stderr, "(%s[%zd] = (id=%zd,time:%g)) != (%s[%zd]= (id=%zd,time=%g)) diff=%g\n", - nameA, i, aviA.getGlobalIndex (), aviA.getTimeStamp (), - nameB, i, aviB.getGlobalIndex (), aviB.getTimeStamp (), - diff); - } - else { - break; // no use continuing on if not printing - } - } - } // end for - } - - return result; - -} - - -void MeshInit::writeMeshCenters (const char* outFileName) const { - - if (getSpatialDim () != 2) { - std::cerr << "Mesh plotting implemented for 2D elements only" << std::endl; - abort (); - } - - FILE* plotFile = fopen (outFileName, "w"); - - if (plotFile == NULL) { abort (); } - - std::vector center (getSpatialDim(), 0); - - fprintf (plotFile , "center_x, center_y, timestamp\n"); - for (std::vector::const_iterator i = getAVIVec ().begin (), ei = getAVIVec ().end (); - i != ei ; ++i) { - - const AVI& avi = **i; - - std::fill (center.begin (), center.end (), 0.0); - avi.getElement ().getGeometry ().computeCenter (center); - - fprintf (plotFile, "%g, %g, %g\n", center[0], center[1], avi.getNextTimeStamp ()); - - } - - fclose (plotFile); -} - - -void MeshInit::writeMesh (const char* polyFileName, const char* coordFileName) const { - - - FILE* polyFile = fopen (polyFileName, "w"); - - if (polyFile == NULL) { abort (); } - - for (size_t i = 0; i < cc->getNodesPerElem (); ++i) { - fprintf (polyFile, "node%zd, ", i); - } - fprintf (polyFile , "timestamp\n"); - - for (std::vector::const_iterator i = getAVIVec ().begin (), ei = getAVIVec ().end (); - i != ei ; ++i) { - - const AVI& avi = **i; - - const std::vector& conn = avi.getElement ().getGeometry ().getConnectivity (); - - for (size_t j = 0; j < conn.size (); ++j) { - fprintf (polyFile, "%zd, ", conn[j]); - } - - fprintf (polyFile, "%g\n", avi.getNextTimeStamp ()); - - } - - fclose (polyFile); - - - FILE* coordFile = fopen (coordFileName, "w"); - - if (coordFile == NULL) { abort (); } - - for (size_t i = 0; i < cc->getNodesPerElem (); ++i) { - fprintf (coordFile, "dim%zd", i); - - if (i < cc->getNodesPerElem () - 1) { - fprintf (coordFile, ", "); - } else { - fprintf (coordFile, "\n"); - } - - - } - - const std::vector& coord = cc->getCoordinates (); - for (size_t i = 0; i < coord.size (); i += cc->getSpatialDim ()) { - for (size_t j = i; j < i + cc->getSpatialDim (); ++j) { - if (j != i) { // not first iter - fprintf (coordFile, ", "); - } - fprintf (coordFile, "%g", coord[j]); - } - - fprintf (coordFile, "\n"); - } - - fclose (coordFile); - -} diff --git a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.h b/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.h deleted file mode 100644 index 9f524e2..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/MeshInit.h +++ /dev/null @@ -1,397 +0,0 @@ -/** MeshInit combines reading and initializtion of mesh -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef MESHINIT_H_ -#define MESHINIT_H_ - -#include -#include -#include -#include - -#include -#include -#include - -#include - -#include "AuxDefs.h" -#include "StandardAVI.h" -#include "Element.h" -#include "Femap.h" -#include "Material.h" -#include "CoordConn.h" -#include "TriLinearCoordConn.h" -#include "TetLinearCoordConn.h" - -class MeshInit : private boost::noncopyable { - -public: - typedef StandardAVI::BCFunc BCFunc; - typedef StandardAVI::BCImposedType BCImposedType; - -private: - static const double RHO; - static const double MU; - static const double LAMBDA; - static const int PID; - - static const double DELTA; - static const double T_INIT; - - // length of filenames - static const size_t MAX_FNAME; - -private: - double simEndTime; - bool wave; - - - - - //! to be freed - LocalToGlobalMap* l2gMap; - CoordConn* cc; - SimpleMaterial* ile; - - //! vectors to keep track of all the memory - std::vector geomVec; - std::vector elemVec; - std::vector massResidueVec; - std::vector operationsVec; - std::vector aviVec; - - - std::vector fieldsUsed; - - double writeInc; - int writeInterval; - std::vector aviWriteInterval; - FILE* syncFileWriter; - -private: - void stretchInternal (VecDouble& dispOrVel, bool isVel) const ; - void getBCs (const Element& e, std::vector< std::vector >& itypeMat, - std::vector& bcfuncVec) const; - - static bool computeDiffAVI (std::vector listA, std::vector listB, bool printDiff); - - template - static void destroyVecOfPtr (std::vector& vec) { - for (typename std::vector::iterator i = vec.begin (), ei = vec.end (); - i != ei; ++i) { - - delete *i; - *i = NULL; - } - } - - void destroy () { - - destroyVecOfPtr (geomVec); - destroyVecOfPtr (elemVec); - destroyVecOfPtr (massResidueVec); - destroyVecOfPtr (operationsVec); - destroyVecOfPtr (aviVec); - - delete l2gMap; - delete cc; - delete ile; - - } - -public: - - /** - * - * @param simEndTime - * @param wave - */ - MeshInit (double simEndTime, bool wave): - simEndTime (simEndTime), wave(wave) { - - //TODO: writeInc should depend on simEndTime and - // number of intervals intended - if (wave) { - this->writeInc = 0.005; // XXX: from testPAVI2D - } - else { - this->writeInc = 0.1; - } - - writeInterval = 0; - syncFileWriter = NULL; - - } - - virtual ~MeshInit () { - destroy (); - } - - /** - * - * main function to call after creating an instance. This - * initializes all the data structures by reading this file - * - * @param fileName - * @param ndiv: number of times the mesh (read in from the file) should be subdivided - */ - void initializeMesh (const std::string& fileName, int ndiv); - - virtual size_t getSpatialDim () const = 0; - - virtual size_t getNodesPerElem () const = 0; - - bool isWave () const { return wave; } - - double getSimEndTime () const { return simEndTime; } - - //! Number of elements in the mesh - int getNumElements () const { return cc->getNumElements (); } - - //! number of nodes (vertices) in the mesh - int getNumNodes () const { return cc->getNumNodes (); } - - //! number of nodes times the dimensionality - unsigned int getTotalNumDof () const { return l2gMap->getTotalNumDof (); } - - const std::vector& getAVIVec () const { return aviVec; } - - //! mapping function from local per element vectors (for target functions) to global vectors - //! this tells what indices in the global vector each element contributes to - - const LocalToGlobalMap& getLocalToGlobalMap () const { return *l2gMap; } - - /** - * setup initial conditions - * to be called before starting the simulation loop - * - * @param disp: global dispalcements vector - */ - void setupDisplacements (VecDouble& disp) const { stretchInternal (disp, false); } - - /** - * setup initial conditions - * to be called before starting the simulation loop - * - * @param vel: global velocities vector - */ - void setupVelocities (VecDouble& vel) const { stretchInternal (vel, true); } - - /** - * Write the values in global vectors corresponding to this avi element - * to a file at regular intervals - * - * @param avi - * @param Qval - * @param Vbval - * @param Tval - */ - void writeSync (const AVI& avi, const VecDouble& Qval, const VecDouble& Vbval, const VecDouble& Tval) ; - - - /** - * Compare state of avi vector against other object - * Use for verification between different versions - * - * @param that - */ - bool cmpState (const MeshInit& that) const { return computeDiffAVI (this->aviVec, that.aviVec, false); } - - /** - * Compare state of avi vector against other object - * Use for verification between different versions - * and also print out the differences - * - * @param that - */ - void printDiff (const MeshInit& that) const { computeDiffAVI (this->aviVec, that.aviVec, true); } - - - void writeMeshCenters (const char* outFileName="mesh-centers.csv") const; - - void writeMesh (const char* polyFileName="mesh-poly.csv", const char* coordFileName="mesh-coord.csv") const; - -protected: - - //! functions to compute boundary condtions - //! @param coord - virtual BCFunc getBCFunc (const double* coord) const = 0; - - //! returns the correct derived type of CoordConn - virtual CoordConn* makeCoordConn () const = 0; - - //! parametric node numbering of an element (triangle or tetrahedron) - virtual const double* getParam () const = 0; - - //! internal function used by @see setupDisplacements - //! @param coord - //! @param f - virtual double initDisp (const double* coord, int f) const = 0; - - //! internal function used by @see setupVelocities - //! @param coord - //! @param f - virtual double initVel (const double* coord, int f) const = 0; - - //! number of fields often the same as dimensionality - virtual size_t numFields () const = 0; - -}; - -class TriMeshInit: public MeshInit { -private: - - static double topBC (int f, int a, double t) { - if (f == 0) { - return (0.1 * cos (t)); - } else { - return (0.0); - } - } - - static double botBC (int f, int a, double t) { - return (0.0); - } - -public: - static const double PARAM[]; - - TriMeshInit (double simEndTime, bool wave=false): MeshInit(simEndTime, wave) { - } - - - virtual size_t getSpatialDim () const { - return TriLinearTraits::SPD; - } - virtual size_t getNodesPerElem () const { - return TriLinearTraits::NODES_PER_ELEM; - } - - -protected: - - - virtual CoordConn* makeCoordConn () const { return new TriLinearCoordConn(); } - - virtual const double* getParam () const { return PARAM; } - - virtual size_t numFields () const { return TriLinearTraits::NFIELDS;} - - virtual BCFunc getBCFunc (const double* coord) const { - if (coord[0] == 0.0) { - return botBC; - } else if (coord[0] == 10.0) { - return topBC; - } else { - return NULL; - } - } - - virtual double initDisp (const double* coord, int f) const { - double stretch; - if (f == 0) { - // XXX: some weird code??? - stretch = coord[0] * 0.2 - 1.0; - stretch = coord[0] * 0.01 - 0.05; - } else { - stretch = coord[1] * 0.2 - 1.0; - stretch = coord[1] * 0.01 - 0.05; - } - - return stretch; - } - - virtual double initVel (const double* coord, int f) const { - if (coord[0] == 0.0) { - return 0.1; - } else { - return 0.0; - } - } - -}; - -class TetMeshInit: public MeshInit { -private: - - static double topBC (int f, int a, double t) { - if (f == 2) { - return (0.1 * sin (t)); - } else { - return (0.0); - } - } - - static double botBC (int f, int a, double t) { - return (0.0); - } - - -public: - static const double PARAM[]; - - TetMeshInit (double simEndTime, bool wave=false): MeshInit(simEndTime, wave) { - } - - - virtual size_t getSpatialDim () const { - return TetLinearTraits::SPD; - } - virtual size_t getNodesPerElem () const { - return TetLinearTraits::NODES_PER_ELEM; - } - - -protected: - - - virtual CoordConn* makeCoordConn () const { return new TetLinearCoordConn(); } - - virtual const double* getParam () const { return PARAM; } - - virtual size_t numFields () const { return TetLinearTraits::NFIELDS;} - - virtual BCFunc getBCFunc (const double* coord) const { - if (fabs (coord[0]) < 0.01) { - return botBC; - } else if (fabs (coord[0] - 5.0) < 0.01) { - return topBC; - } else { - return NULL; - } - } - - virtual double initDisp (const double* coord, int f) const { - double stretch = coord[f] * 0.10 - 0.25; - return stretch; - } - - virtual double initVel (const double* coord, int f) const { - return 0.0; - } - -}; - - -#endif /* MESHINIT_H_ */ diff --git a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TetLinearCoordConn.h b/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TetLinearCoordConn.h deleted file mode 100644 index fe67d1e..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TetLinearCoordConn.h +++ /dev/null @@ -1,284 +0,0 @@ -/** TetLinearCoordConn represents a mesh containing linear tetrahedra -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef _TET_LINEAR_COORD_CONN_H_ -#define _TET_LINEAR_COORD_CONN_H_ - -/** - * important constants for linear tetrahedron - */ -struct TetLinearTraits { - enum { - SPD = 3, - NODES_PER_ELEM = 4, - TOPO = 6, - NUM_EDGES = 6, - NFIELDS = SPD, - }; -}; - -class TetLinearCoordConn -: public AbstractCoordConn { - public: - static const size_t NUM_EDGES = TetLinearTraits::NUM_EDGES; - - protected: - /** - * Return an instance of 3D with linear shape functions - * and linear tetrahedron as geometry - */ - virtual Element* makeElem (const size_t elemIndex) const { - std::vector conn; - - genElemConnectivity (elemIndex, conn); - - Tetrahedron* tetGeom = new Tetrahedron (coordinates, conn); - return new P13D::Bulk (*tetGeom); - } - - private: - /** - * - * @param neighbors: the output vector - * for each element p, populate an indexed list L of pairs (q,j), where i is index of each pair, - * such that - * p shares it's i with q's edge j. - * There should be a corresponding entry (p,i) in q's list at index j. - * - */ - - void getEdgeNeighborList (std::vector > > &neighbors) const { - - size_t iElements = getNumElements (); - - neighbors.clear(); - neighbors.resize(iElements); - - std::vector edges; - - size_t V1[] = { 0, 1, 0, 2, 0, 1 }; - size_t V2[] = { 1, 2, 2, 3, 3, 3 }; - - // the 4 nodes of a tet are numbered 0..3 - // edges are 0-1, 0-2, 0-3, 1-2, 1-3, 2-3 - // the nodes corresponding to edges are picked up in an involved manner using two arrays V1 and V2 - // the edges must be sorted in a consistent manner, due to which the nodes in an edge must be - // sorted. - - // Creating a list of all possible edges. - for (size_t e = 0; e < iElements; e++) { - neighbors[e].resize(NUM_EDGES); - - const size_t econn[] = { connectivity[e * 4 + 0], connectivity[e * 4 + 1], connectivity[e * 4 + 2], connectivity[e * 4 + 3] }; - - for (size_t edgenum = 0; edgenum < NUM_EDGES; edgenum++) { - GlobalNodalIndex node0; - GlobalNodalIndex node1; - - node0 = econn[V1[edgenum]]; - node1 = econn[V2[edgenum]]; - edgestruct myedge(e, edgenum, node0, node1); - edges.push_back(myedge); - } - } - - std::sort(edges.begin(), edges.end()); - - // Edges that have exactly the same connectivity should appear consecutively. - // If there is no repetition, the edgeId is free. - std::vector::iterator it1 = edges.begin(); - while (it1 != edges.end()) { - std::vector repedges; - repedges.clear(); - repedges.push_back(*it1); - std::vector::iterator it2 = it1 + 1; - - while (true && it2 != edges.end()) { - // check for same connectivity - if ((it2->node0 == it1->node0) && (it2->node1 == it1->node1)) { - repedges.push_back(*it2); - it2++; - - } else { - break; - } - } - - if (repedges.size() > 1) { // Shared edgeId. - for (size_t p = 0; p < repedges.size(); p++) { - for (size_t q = 0; q < repedges.size(); q++) { - if (p != q) { - neighbors[repedges[p].elemId][repedges[p].edgeId]. push_back(repedges[q].elemId); - - neighbors[repedges[p].elemId][repedges[p].edgeId]. push_back(repedges[q].edgeId); - } - } - } - } - - it1 = it2; - } - // done. - } - - public: - - /** - * Purpose : Subdivide a tetrahedron in 8 smaller ones. - * Algorithm to subdivide a test: - * Parent tet: ABCD. - * Since a consistent numbering of edges is crucial, the following convention - * is adopted : 1 - AB, 2-BC, 3-CA, 4-CD, 5-AD, 6-BD. - * Midpoints of edges AB,BC,CA,CD,AD,BD are M1, M2, M3, M4, M5, M6 resply. - - * Tet1: A-M1-M3-M5, - * Tet2: M1-B-M2-M6, - * Tet3: M3-M2-C-M4, - * Tet4: M5-M6-M4-D, - - * Tet5: M1-M4-M5-M6, - * Tet6: M1-M4-M6-M2, - * Tet7: M1-M4-M2-M3, - * Tet8: M1-M4-M3-M5. - */ - virtual void subdivide () { - - size_t sd = getSpatialDim(); - size_t eNodes = getNodesPerElem(); // Number of nodes per element. - - size_t iElements = getNumElements(); // Number of elements. - size_t iNodes = getNumNodes(); - - std::vector > > neighbors; - getEdgeNeighborList(neighbors); - - // Connectivity for mid-points of each edgeId for each element. - // size_t midconn[iElements][NUM_EDGES]; - - std::vector > midconn (iElements); - for (size_t i = 0; i < midconn.size (); ++i) { - midconn[i].resize (NUM_EDGES); - } - - size_t count = iNodes; - - for (size_t e = 0; e < iElements; e++) { - for (size_t f = 0; f < NUM_EDGES; f++) { - - // Number of elements sharing edgeId 'f' of element 'e'. - size_t nNeighbors = neighbors[e][f].size() / 2; - - if (nNeighbors == 0) { // Free edgeId. - - // for 0-based node numbering we increment 'count' afterwards - // count++; - midconn[e][f] = count; - ++count; - - } else { // Shared edgeId - // Find the least element neighbor number. - size_t minElem = e; - for (size_t p = 0; p < nNeighbors; p++) { - if (minElem > neighbors[e][f][2 * p]) { - minElem = neighbors[e][f][2 * p]; - } - } - - if (e == minElem) { // Allot only once for a shared edgeId. - // for 0-based node numbering we increment 'count' afterwards - // count++; - midconn[e][f] = count; - - for (size_t p = 0; p < nNeighbors; p++) { - size_t nelem = neighbors[e][f][2 * p]; - size_t nedge = neighbors[e][f][2 * p + 1]; - midconn[nelem][nedge] = count; - } - // increment 'count' now - ++count; - } - } - } - } - - // Creating new coordinates and connectivity arrays: - // Each tet is subdivided into 8. - std::vector newCoord(count * sd); - std::vector newConn; - - for (size_t i = 0; i < coordinates.size(); i++) { - newCoord[i] = coordinates[i]; - } - - // Coordinates for midside nodes: - size_t V1[] = { 0, 1, 0, 2, 0, 1 }; - size_t V2[] = { 1, 2, 2, 3, 3, 3 }; - for (size_t e = 0; e < iElements; e++) - for (size_t f = 0; f < NUM_EDGES; f++) { - // for 0-based node numbering, we don't need to subtract 1 from node ids in connectivity - // size_t v1 = connectivity[e * eNodes + V1[f]] - 1; - // size_t v2 = connectivity[e * eNodes + V2[f]] - 1; - // for (size_t k = 0; k < sd; k++) - // newCoord[(midconn[e][f] - 1) * sd + k] = 0.5 * (coordinates[v1 * sd + k] + coordinates[v2 * sd + k]); - size_t v1 = connectivity[e * eNodes + V1[f]]; - size_t v2 = connectivity[e * eNodes + V2[f]]; - for (size_t k = 0; k < sd; k++) { - newCoord[midconn[e][f] * sd + k] = 0.5 * (coordinates[v1 * sd + k] + coordinates[v2 * sd + k]); - } - } - - for (size_t e = 0; e < iElements; e++) { - // tet 1-8 - // four at conrners - size_t t1conn[] = { connectivity[e * eNodes + 0], midconn[e][0], midconn[e][2], midconn[e][4] }; - size_t t2conn[] = { midconn[e][0], connectivity[e * eNodes + 1], midconn[e][1], midconn[e][5] }; - size_t t3conn[] = { midconn[e][2], midconn[e][1], connectivity[e * eNodes + 2], midconn[e][3] }; - size_t t4conn[] = { midconn[e][4], midconn[e][5], midconn[e][3], connectivity[e * eNodes + 3] }; - - // four in the middle - size_t t5conn[] = { midconn[e][0], midconn[e][3], midconn[e][4], midconn[e][5] }; - size_t t6conn[] = { midconn[e][0], midconn[e][3], midconn[e][5], midconn[e][1] }; - size_t t7conn[] = { midconn[e][0], midconn[e][3], midconn[e][1], midconn[e][2] }; - size_t t8conn[] = { midconn[e][0], midconn[e][3], midconn[e][2], midconn[e][4] }; - - newConn.insert (newConn.end (), &t1conn[0], &t1conn[eNodes]); - newConn.insert (newConn.end (), &t2conn[0], &t2conn[eNodes]); - newConn.insert (newConn.end (), &t3conn[0], &t3conn[eNodes]); - newConn.insert (newConn.end (), &t4conn[0], &t4conn[eNodes]); - newConn.insert (newConn.end (), &t5conn[0], &t5conn[eNodes]); - newConn.insert (newConn.end (), &t6conn[0], &t6conn[eNodes]); - newConn.insert (newConn.end (), &t7conn[0], &t7conn[eNodes]); - newConn.insert (newConn.end (), &t8conn[0], &t8conn[eNodes]); - - } - coordinates.clear(); - connectivity.clear(); - coordinates.assign(newCoord.begin(), newCoord.end()); - connectivity.assign(newConn.begin(), newConn.end()); - - // nodes = size_t(coordinates.size() / 3); - // elements = size_t(connectivity.size() / 4); - } -}; - -#endif // _TET_LINEAR_COORD_CONN_H_ diff --git a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TriLinearCoordConn.h b/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TriLinearCoordConn.h deleted file mode 100644 index 420d10f..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/dgmechanics/TriLinearCoordConn.h +++ /dev/null @@ -1,161 +0,0 @@ -/** TriLinearCoordConn represents a mesh of linear triangles -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef _TRI_LINEAR_COORD_CONN_H_ -#define _TRI_LINEAR_COORD_CONN_H_ - -#include "CoordConn.h" - -/** - * important constants for linear triangle - */ -struct TriLinearTraits { - enum { - SPD = 2, - NODES_PER_ELEM = 3, - TOPO = 2, - NUM_EDGES = 3, - NFIELDS = SPD, - }; -}; - -class TriLinearCoordConn -: public AbstractCoordConn { - - protected: - /** - * - * Return a 2d element with linear shape functions and linear triangle as the geometry - * - * @param elemIndex - */ - - virtual Element* makeElem (const size_t elemIndex) const { - std::vector conn; - - genElemConnectivity (elemIndex, conn); - - Triangle* triGeom = new Triangle (coordinates, conn); - return new P12D::Bulk (*triGeom); - } - - public: - - /** - * divides each triangle in the mesh in to 4 triangles - * The main idea is to join the mid points of the three segments (called edges here) - */ - virtual void subdivide () { - // Check for consistency of connectivity and coordinates arrays: - std::vector faces; - - size_t iElements = getNumElements(); // 3 nodes per element. - size_t iNodes = getNumNodes(); // Assume 2D triangles. - - for (size_t e = 0; e < iElements; e++) { - // std::vector conn; - GlobalNodalIndex node0; - GlobalNodalIndex node1; - - - node0 = connectivity[e * 3 + 0]; - node1 = connectivity[e * 3 + 1]; - faces.push_back (edgestruct (e, 0, node0, node1)); - - node0 = connectivity[e * 3 + 1]; - node1 = connectivity[e * 3 + 2]; - faces.push_back (edgestruct (e, 1, node0, node1)); - - node0 = connectivity[e * 3 + 2]; - node1 = connectivity[e * 3 + 0]; - faces.push_back (edgestruct (e, 2, node0, node1)); - } - - std::sort (faces.begin (), faces.end ()); - - std::vector NodeInfo (faces.size () * 2, 0); - size_t middlenodenum = iNodes; - - // Create middle nodes - for (std::vector::iterator it = faces.begin (); it != faces.end (); it++) { - // for 1-based node numbering - // double xm = (coordinates[2 * (it->conn[0] - 1)] + coordinates[2 * (it->conn[1] - 1)]) / 2.; - // double ym = (coordinates[2 * (it->conn[0] - 1) + 1] + coordinates[2 * (it->conn[1] - 1) + 1]) / 2.; - // - // for 0-based node numbering, we don't need to subtract 1 from conn field of facestruct - double xm = (coordinates[2 * it->node0] + coordinates[2 * it->node1]) / 2.; - double ym = (coordinates[2 * it->node0 + 1] + coordinates[2 * it->node1 + 1]) / 2.; - coordinates.push_back (xm); - coordinates.push_back (ym); - - NodeInfo[it->elemId * 6 + it->edgeId * 2 + 0] = it->edgeId; - // for 0-based node numbering, don't add 1 - // NodeInfo[it->elemId * 6 + it->edgeId * 2 + 1] = middlenodenum + 1; - NodeInfo[it->elemId * 6 + it->edgeId * 2 + 1] = middlenodenum; - - if (it + 1 != faces.end ()) { - if (it->node0 == (it + 1)->node0 && it->node1 == (it + 1)->node1) { - it++; - NodeInfo[it->elemId * 6 + it->edgeId * 2 + 0] = it->edgeId; - // for 0-based node numbering, don't add 1 - // NodeInfo[it->elemId * 6 + it->edgeId * 2 + 1] = middlenodenum + 1; - NodeInfo[it->elemId * 6 + it->edgeId * 2 + 1] = middlenodenum; - } - } - - ++middlenodenum; - } - - // Create connectivity - std::vector copyconn (connectivity); - connectivity.resize (iElements * 3 * 4); - - for (size_t e = 0; e < iElements; e++) { - // triangle 1 - connectivity[e * 4 * 3 + 0 * 3 + 0] = copyconn[e * 3]; - connectivity[e * 4 * 3 + 0 * 3 + 1] = NodeInfo[e * 6 + 0 * 2 + 1]; - connectivity[e * 4 * 3 + 0 * 3 + 2] = NodeInfo[e * 6 + 2 * 2 + 1]; - - // triangle 2 - connectivity[e * 4 * 3 + 1 * 3 + 0] = copyconn[e * 3 + 1]; - connectivity[e * 4 * 3 + 1 * 3 + 1] = NodeInfo[e * 6 + 1 * 2 + 1]; - connectivity[e * 4 * 3 + 1 * 3 + 2] = NodeInfo[e * 6 + 0 * 2 + 1]; - - // triangle 3 - connectivity[e * 4 * 3 + 2 * 3 + 0] = copyconn[e * 3 + 2]; - connectivity[e * 4 * 3 + 2 * 3 + 1] = NodeInfo[e * 6 + 2 * 2 + 1]; - connectivity[e * 4 * 3 + 2 * 3 + 2] = NodeInfo[e * 6 + 1 * 2 + 1]; - - // triangle 4 - connectivity[e * 4 * 3 + 3 * 3 + 0] = NodeInfo[e * 6 + 0 * 2 + 1]; - connectivity[e * 4 * 3 + 3 * 3 + 1] = NodeInfo[e * 6 + 1 * 2 + 1]; - connectivity[e * 4 * 3 + 3 * 3 + 2] = NodeInfo[e * 6 + 2 * 2 + 1]; - } - - // nodes = int(coordinates.size() / 2); - // elements = int(connectivity.size() / 3); - } - -}; - -#endif // _TRI_LINEAR_COORD_CONN_H_ diff --git a/maxflow/galois/apps/avi/libMeshInit/femap/Femap.cpp b/maxflow/galois/apps/avi/libMeshInit/femap/Femap.cpp deleted file mode 100644 index a2ac084..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/femap/Femap.cpp +++ /dev/null @@ -1,552 +0,0 @@ -#include "Femap.h" - - - -// -// FemapInput::FemapInput(const char* fileName) -// Constructor. Opens input file and checks to make sure it worked. -// Searches for "-1" indicator, identifies following record, -// and calls appropriate function to deal with the record. -// -FemapInput::FemapInput(const char* fileName) : _ifs() { - - std::ifstream gzfile (fileName, std::ios_base::in | std::ios_base::binary); - - _ifs.push (boost::iostreams::gzip_decompressor ()); - _ifs.push (gzfile); - - - if (_ifs) { - std::cout << std::endl - << "Femap Neutral file " << fileName << " is open for input." - << std::endl - << std::endl; - } - else { - std::cerr << "Cannot open Femap Neutral file " << fileName - << ". Quitting\n"; - exit(1); - } - - std::string s; - int id; - - for ( _ifs >> s; ( _ifs >> id ) && ( s == "-1" ) ; _ifs >> s ) { - nextLine(); - switch ( id ) { - case 100: _readHeader(); break; - //case 402: _readProperty(); break; - case 403: _readNodes(); break; - case 404: _readElements(); break; - //case 408: _readGroups(); break; - //case 506: _readConstraints(); break; - case 507: _readLoads(); break; - //case 601: _readMaterial(); break; - case 999: break; - default: - //std::cout << "Skipping Data Block " << id << ". Not supported.\n"; - do { getline( _ifs, s ); s.assign(s,0,5); } while ( s != " -1" ); - } - } - std::cout << "\nDone reading Neutral File input. Closing file " << fileName << ".\n\n"; - - return; - -} - -// -// FemapInput::_readHeader() -// Called for Data Block ID 100. -// Reads Neutral File Header and prints to stdout -// -void FemapInput::_readHeader() -{ - std::string s; - _ifs >> s; - if (s=="") s=""; - std::cout << "Database Title: " << s <> s; - std::cout << "Created with version: " << s <> s; - if (s!="-1") { - std::cerr << "Too many records in Data Block 100.\n"; - while (s!="-1") _ifs >> s; - } - - return; -} - -// -// FemapInput::_readProperty() -// -void FemapInput::_readProperty() -{ - std::string s, sdumm; - std::cout << "Reading Properties.\n"; - - femapProperty p; - - // read prop id, etc. - getline( _ifs, s ); - - do{ - sscanf(s.c_str(), "%zd,%*d,%zd,%zd,%*d,%*d", &(p.id), &(p.matId), &(p.type)); - - // read & print title - _ifs >> p.title; - if (p.title=="") p.title=""; - std::cout << "Id: " << p.id << " Title: " << p.title << std::endl; - - nextLine(); - - // read flag[0,3] - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d", p.flag, p.flag+1, p.flag+2, p.flag+3); - - // skip laminate data - int i; - _ifs >> i; - nextLine(); - nextLine(i/8); - if (i%8) nextLine(); - - // get # of prop values & size value std::vector accordingly - _ifs >> p.num_val; - p.value.resize(p.num_val); - - // get values - nextLine(); - for (i=0; i> p.value[i]; _ifs >> dumm; } - - // read num_outline - int num_outline; - _ifs >> num_outline; - nextLine(); - - // skip outline point definitions - nextLine(num_outline); - - _properties.push_back(p); - _propertyIdMap[p.id] = _properties.size() - 1; - - - // Look for more properties - getline( _ifs, s ); - - } while ( sdumm.assign(s,0,5) != " -1" ); - - return; -} - -// -// FemapInput::_readNodes() -// -void FemapInput::_readNodes() -{ - std::string s; - std::cout << "Reading Nodes.\n"; - femapNode n; - getline( _ifs, s ); - while ( sscanf(s.c_str(), "%zd,%*d,%*d,%*d,%*d,%d,%d,%d,%d,%d,%d,%lg,%lg,%lg,%*d,", - &(n.id), &(n.permBc[0]), &(n.permBc[1]), &(n.permBc[2]), &(n.permBc[3]), - &(n.permBc[4]), &(n.permBc[5]), &(n.x[0]), &(n.x[1]), &(n.x[2]) ) == 10 ) - { - _nodes.push_back(n); - _nodeIdMap[n.id] = _nodes.size() - 1; - getline( _ifs, s ); - } - - std::cout << "Read " << _nodes.size() << " nodes.\n" << std::flush; - return; -} - -// -// FemapInput::_readElements() -// -void FemapInput::_readElements() -{ - std::string s; - std::cout << "Reading Elements.\n"; - - int nd[20]; - getline( _ifs, s ); - int id, propId, type, topology, geomId, formulation; - - while ( sscanf(s.c_str(), "%d,%*d,%d,%d,%d,%*d,%*d,%*d,%d,%d,%*d,%*d,", - &(id), &(propId), &(type), &(topology), &(geomId), - &(formulation) ) == 6 ) - { - femapElement e; - e.id = id; - e.propId = propId; - e.type = type; - e.topology = topology; - e.geomId = geomId; - e.formulation = formulation; - - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,", - &(nd[0]),&(nd[1]),&(nd[2]),&(nd[3]),&(nd[4]), - &(nd[5]),&(nd[6]),&(nd[7]),&(nd[8]),&(nd[9]) ); - - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,", - &(nd[10]),&(nd[11]),&(nd[12]),&(nd[13]),&(nd[14]), - &(nd[15]),&(nd[16]),&(nd[17]),&(nd[18]),&(nd[19]) ); - - for (int i = 0; i < 20; i++ ) - if ( nd[i] != 0 ) e.node.push_back( nd[i] ); - - nextLine(3); - - getline( _ifs, s ); - int flg[4]; - sscanf(s.c_str(), "%*d,%*d,%*d,%*d,%*d,%*d,%*d,%*d,%*d,%*d,%*d,%*d,%d,%d,%d,%d,", - &(flg[0]), &(flg[1]), &(flg[2]), &(flg[3]) ); - if ( flg[0]!=0 || flg[1]!=0 || flg[2]!=0 || flg[3]!=0 ) { - std::cerr << "Unexpected node lists attatched to element " << e.id << ". Quitting.\n"; - exit(-1); - } - - _elements.push_back(e); - - _elementIdMap[e.id] = _elements.size() - 1; - - getline( _ifs, s ); - } - - std::cout << "Read " << _elements.size() << " elements.\n"; - - return; -} - -// -// FemapInput::_readConstraints() -// -void FemapInput::_readConstraints() -{ - std::cout << "Reading Constraints.\n"; - std::string s; - femapConstraintSet cs; - - _ifs >> cs.id; - - if ( static_cast (cs.id) != -1 ) { - _ifs >> cs.title; - - - const size_t NDOF = 6; - int dofread[NDOF]; - - constraint c; - getline( _ifs, s ); - while ( sscanf(s.c_str(), "%zd,%*d,%*d,%d,%d,%d,%d,%d,%d,%d", - &(c.id), &(dofread[0]), &(dofread[1]), &(dofread[2]), - &(dofread[3]), &(dofread[4]), &(dofread[5]), &(c.ex_geom) ) == 8 - && static_cast (c.id) != -1 ) { - - for (size_t i = 0; i < NDOF; ++i) { - c.dof[i] = !(dofread[i] == 0); // 0 -> false, non-zero -> true - } - - cs.nodalConstraint.push_back(c); - } - - _constraintSets.push_back(cs); - } - // skip other types of constraints (geom., etc.) - do { getline( _ifs, s ); s.assign(s,0,5); } while ( s != " -1" ); - - return; -} - -// -// FemapInput::_readLoads() -// -void FemapInput::_readLoads() -{ - std::cout << "Reading Loads.\n"; - std::string s; - femapLoadSet ls; - - //_ifs >> ls.id; - getline( _ifs, s ); - sscanf( s.c_str(), "%zd,", &(ls.id) ); - - if ( static_cast (ls.id) != -1 ) { - getline( _ifs, ls.title ); - std::cout << ls.title << std::endl << std::flush; - - getline( _ifs, s ); - - int tempOn_int, gravOn_int, omegaOn_int; - sscanf(s.c_str(), "%*d,%lg,%d,%d,%d,", - &(ls.defTemp),&(tempOn_int),&(gravOn_int),&(omegaOn_int) ); - - ls.tempOn = !(tempOn_int == 0); // 0-> false, non-zero -> true - ls.gravOn = !(gravOn_int == 0); // 0-> false, non-zero -> true - ls.omegaOn = !(omegaOn_int == 0); // 0-> false, non-zero -> true - - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,", - &(ls.grav[0]), &(ls.grav[1]), &(ls.grav[2]) ); - - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,", - &(ls.grav[3]), &(ls.grav[4]), &(ls.grav[5]) ); - - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,", - &(ls.origin[0]), &(ls.origin[1]), &(ls.origin[2]) ); - - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,", - &(ls.omega[0]), &(ls.omega[1]), &(ls.omega[2]) ); - - nextLine(14); // skip some junk we won't use - - int id, type, exp; - getline( _ifs, s ); - sscanf( s.c_str(), "%d,%d,%*d,%*d,%*d,%*d,%d,", &id, &type, &exp ); - while ( id != -1 ) - { - load l; - l.id = id; - l.type = type; - l.is_expanded = exp; - - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d", - &(l.dof_face[0]), &(l.dof_face[1]), &(l.dof_face[2]) ); - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,%*g,%*g,", - &(l.value[0]), &(l.value[1]), &(l.value[2]) ); - nextLine(4); - ls.loads.push_back(l); - - getline( _ifs, s ); - sscanf( s.c_str(), "%d,%d,%*d,%*d,%*d,%*d,%d,", &id, &type, &exp ); - } - - _loadSets.push_back(ls); - - } - - // skip geometry-based and non-structural loads - do { getline( _ifs, s ); s.assign(s,0,5); } while ( s != " -1" ); - - return; -} - -// -// FemapInput::_readMaterial() -// -void FemapInput::_readMaterial() -{ - std::string s, sdumm; //temporary string - int i; - std::cout << "Reading Materials.\n"; - femapMaterial m; - - int functioncount; - // read mat id, etc. - getline( _ifs, s ); - - do{ - sscanf( s.c_str(), "%zd,%*d,%*d,%zd,%zd,%*d,%d", - &(m.id), &(m.type), &(m.subtype), &functioncount); - - // read & print title - _ifs >> m.title; - if (m.title=="") m.title=""; - std::cout << "Id: " << m.id << " Title: " << m.title << std::endl; - - nextLine(2); - - // get bval[0,9] - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", - &(m.bval[0]), &(m.bval[1]), &(m.bval[2]), &(m.bval[3]), &(m.bval[4]), - &(m.bval[5]), &(m.bval[6]), &(m.bval[7]), &(m.bval[8]), &(m.bval[9])); - - nextLine(); - - // get ival[0,24]; 2 rows of 10 and 1 row of 5. - for (i=0; i<2; i++) { - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d,%d,%d,%d,%d,%d,%d", - &(m.ival[10*i+0]), &(m.ival[10*i+1]), &(m.ival[10*i+2]), - &(m.ival[10*i+3]), &(m.ival[10*i+4]), &(m.ival[10*i+5]), - &(m.ival[10*i+6]), &(m.ival[10*i+7]), &(m.ival[10*i+8]), &(m.ival[10*i+9])); - } - getline( _ifs, s ); - sscanf(s.c_str(), "%d,%d,%d,%d,%d", - &(m.ival[10*i+0]),&(m.ival[10*i+1]), - &(m.ival[10*i+2]),&(m.ival[10*i+3]),&(m.ival[10*i+4])); - - nextLine(); - - // get mval[0,199]; 20 rows of 10. - for (i=0; i<20; i++) { - getline( _ifs, s ); - sscanf(s.c_str(), "%lg,%lg,%lg,%lg,%lg,%lg,%lg,%lg,%lg,%lg", - &(m.mval[10*i+0]), &(m.mval[10*i+1]), &(m.mval[10*i+2]), - &(m.mval[10*i+3]), &(m.mval[10*i+4]), &(m.mval[10*i+5]), - &(m.mval[10*i+6]), &(m.mval[10*i+7]), &(m.mval[10*i+8]), &(m.mval[10*i+9])); - } - - // skip function data - nextLine(14+functioncount); - - _materials.push_back(m); - _materialIdMap[m.id] = _materials.size() - 1; - - // Look for more materials - getline( _ifs, s ); - - } while ( sdumm.assign(s,0,5) != " -1" ); - - return; -} - - // -// FemapInput::_readGroups() -// -void FemapInput::_readGroups () { - std::string s; - std::cout << "Reading Groups." << std::endl << std::flush; - - int id, need_eval; - - getline (_ifs, s); - sscanf (s.c_str (), "%d,%d,%*d,", &id, &need_eval); - - while (id != -1) { - - femapGroup g; - - g.id = id; - g.need_eval = need_eval; - - getline (_ifs, g.title); - std::cout << g.title << std::endl << std::flush; - - getline (_ifs, s); - sscanf (s.c_str (), "%d,%d,%d,", - &(g.layer[0]), &(g.layer[1]), &(g.layer_method)); - - nextLine (20); // skip clipping info - - // read group rules - size_t max; - getline (_ifs, s); - sscanf (s.c_str (), "%zd,", &max); - - getline (_ifs, s); - groupRule r; - sscanf (s.c_str (), "%zd,", &(r.type)); - while (static_cast (r.type) != -1) { - if (r.type < max) { - getline (_ifs, s); - sscanf (s.c_str (), "%zd,%zd,%zd,%zd,", - &(r.startID), &(r.stopID), &(r.incID), &(r.include)); - while (static_cast (r.startID) != -1) { - g.rules.push_back (r); - getline (_ifs, s); - sscanf (s.c_str (), "%zd,%zd,%zd,%zd,", - &(r.startID), &(r.stopID), &(r.incID), &(r.include)); - } - } else - nextLine (); - - getline (_ifs, s); - sscanf (s.c_str (), "%zd,", &(r.type)); - } - - // read group lists - getline (_ifs, s); - sscanf (s.c_str (), "%zd,", &max); - - groupList l; - getline (_ifs, s); - sscanf (s.c_str (), "%zd,", &(l.type)); - - while (static_cast (l.type) != -1) { - if (l.type < max) { - getline (_ifs, s); - sscanf (s.c_str (), "%d,", &id); - while (id != -1) { - l.entityID.push_back (id); - getline (_ifs, s); - sscanf (s.c_str (), "%d,", &id); - } - g.lists.push_back (l); - } else { - nextLine (); - } - - getline (_ifs, s); - sscanf (s.c_str (), "%zd,", &(l.type)); - } - - _groups.push_back (g); - _groupIdMap[g.id] = _groups.size () - 1; - - getline (_ifs, s); - sscanf (s.c_str (), "%d,%d,%*d,", &id, &need_eval); - } - - // do { getline( _ifs, s ); s.assign(s,0,5); } - // while ( s != " -1" ); - - return; -} -// MO 1/9/01 begin -/* -inline void FemapInput::nextLine(int n=1) -{ -std::string s; - for (int i = 0; i < n; i++) getline( _ifs, s ); - return; -} -*/ -inline void FemapInput::nextLine(int n) -{ - std::string s; - for (int i = 0; i < n; i++) { - getline( _ifs, s ); - } - return; -} -inline void FemapInput::nextLine() -{ - std::string s; - getline( _ifs, s ); - return; -} -// MO 1/9/01 end - -size_t Femap::getNumElements(size_t t) const -{ - size_t n = 0; - for (std::vector::const_iterator e = _elements.begin(); e != _elements.end(); e++ ) { - if ( e->topology == t ) { - n++; - } - } - - return n; -} - -void Femap::getElements (size_t t, std::vector& vout) const -{ - for (std::vector::const_iterator e = _elements.begin(); e != _elements.end(); e++ ) { - if ( e->topology == t ) { - vout.push_back(*e); - } - } - -} diff --git a/maxflow/galois/apps/avi/libMeshInit/femap/Femap.h b/maxflow/galois/apps/avi/libMeshInit/femap/Femap.h deleted file mode 100644 index b99bd7b..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/femap/Femap.h +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Femap.h: Classes for parsing and writing Femap Neutral file format - * DG++ - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ -#ifndef _FEMAP_H -#define _FEMAP_H - -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -// #include -#include -#include -#include - - -#include "FemapData.h" - -class Femap { - public: - size_t getNumMaterials() const { return _materials.size(); } - size_t getNumProperties() const { return _properties.size(); } - size_t getNumNodes() const { return _nodes.size(); } - size_t getNumElements() const { return _elements.size(); } - //! no. of elements with topology indicator t - //! @param t - size_t getNumElements(size_t t) const ; - size_t getNumConstraintSets() const { return _constraintSets.size(); } - size_t getNumLoadSets() const { return _loadSets.size(); } - size_t getNumGroups() const { return _groups.size(); } - - const std::vector& getMaterials() const { return _materials; } - const std::vector& getProperties() const { return _properties; } - const std::vector& getNodes() const { return _nodes; } - const std::vector& getElements() const { return _elements; } - const std::vector& getConstraintSets() const { return _constraintSets; } - const std::vector& getLoadSets() const { return _loadSets; } - const std::vector& getGroups() const { return _groups; } - - //! get elements with topology indicator t - //! @param t - //! @param vout: output vector - void getElements(size_t t, std::vector& vout) const ; // gets elements with topology t - - const femapMaterial& getMaterial(size_t i) const { return _materials[i]; } - const femapProperty& getProperty(size_t i) const { return _properties[i]; } - const femapNode& getNode(size_t i) const { return _nodes[i]; } - const femapElement& getElement(size_t i) const { return _elements[i]; } - const femapConstraintSet& getConstraintSet(size_t i) const { return _constraintSets[i]; } - const femapLoadSet& getLoadSet(size_t i) const { return _loadSets[i]; } - const femapGroup& getGroup(size_t i) const { return _groups[i]; } - - int getMaterialId(size_t i) const { return _materialIdMap.at (i); } - int getPropertyId(size_t i) const { return _propertyIdMap.at (i); } - int getNodeId(size_t i) const { return _nodeIdMap.at (i); } - int getElementId(size_t i) const { return _elementIdMap.at (i); } - int getConstraintSetId(size_t i) const { return _constraintSetIdMap.at (i); } - int getLoadSetId(size_t i) const { return _loadSetIdMap.at (i); } - int getGroupId(size_t i) const { return _groupIdMap.at (i); } - - - protected: - std::vector _materials; - std::vector _properties; - std::vector _nodes; - std::vector _elements; - std::vector _groups; - std::vector _constraintSets; - std::vector _loadSets; - - std::map _materialIdMap; - std::map _propertyIdMap; - std::map _nodeIdMap; - std::map _elementIdMap; - std::map _groupIdMap; - std::map _constraintSetIdMap; - std::map _loadSetIdMap; - - private: - -}; - -class FemapInput : public Femap { - public: - FemapInput(const char* fileName); - - - - private: - boost::iostreams::filtering_istream _ifs; - // std::ifstream _ifs; - - void _readHeader(); - void _readProperty(); - void _readNodes(); - void _readElements(); - void _readGroups(); - void _readConstraints(); - void _readLoads(); - void _readMaterial(); -// MO 1/9/01 begin -//void nextLine(int=1); - void nextLine(int); - void nextLine(); -// MO 1/9/01 end -}; - -class FemapOutput : public Femap { - public: - FemapOutput(const char* fileName) : _ofs(fileName) {} - - private: - std::ofstream _ofs; -}; -#endif //_FEMAP_H diff --git a/maxflow/galois/apps/avi/libMeshInit/femap/FemapData.h b/maxflow/galois/apps/avi/libMeshInit/femap/FemapData.h deleted file mode 100644 index 16c306c..0000000 --- a/maxflow/galois/apps/avi/libMeshInit/femap/FemapData.h +++ /dev/null @@ -1,118 +0,0 @@ -/** - * FemapData.h: Contains data structures that store data from Femap Neutral file format - * DG++ - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -struct femapMaterial { - size_t id; - size_t type; - size_t subtype; - std::string title; - int bval[10]; - int ival[25]; - double mval[200]; -}; - -struct femapProperty { - size_t id; - size_t matId; - size_t type; - std::string title; - int flag[4]; - int num_val; - std::vector value; -}; - -struct femapNode { - size_t id; - //! coordinates - double x[3]; - int permBc[6]; -}; - -struct femapElement { - size_t id; - size_t propId; - size_t type; - size_t topology; - size_t geomId; - int formulation; // int is a guess--documentation doesn't give type - std::vector node; -}; - -struct constraint { - size_t id; - bool dof[6]; - int ex_geom; -}; - -struct femapConstraintSet { - size_t id; - std::string title; - std::vector nodalConstraint; -}; - -struct load { - size_t id; - size_t type; - int dof_face[3]; - double value[3]; - bool is_expanded; -}; - -struct femapLoadSet { - size_t id; - std::string title; - double defTemp; - bool tempOn; - bool gravOn; - bool omegaOn; - double grav[6]; - double origin[3]; - double omega[3]; - std::vector loads; -}; - -struct groupRule { - size_t type; - size_t startID; - size_t stopID; - size_t incID; - size_t include; -}; - -struct groupList { - size_t type; - std::vector entityID; -}; - -struct femapGroup { - size_t id; - short int need_eval; - std::string title; - int layer[2]; - int layer_method; - std::vector rules; - std::vector lists; -}; diff --git a/maxflow/galois/apps/avi/main/AVIabstractMain.h b/maxflow/galois/apps/avi/main/AVIabstractMain.h deleted file mode 100644 index 3b8ae74..0000000 --- a/maxflow/galois/apps/avi/main/AVIabstractMain.h +++ /dev/null @@ -1,389 +0,0 @@ -/** Common code for different AVI algorithms -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#ifndef AVI_ABSTRACT_MAIN_H_ -#define AVI_ABSTRACT_MAIN_H_ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - - -#include "AVI.h" -#include "MeshInit.h" -#include "GlobalVec.h" -#include "LocalVec.h" - - - -#include "Galois/Accumulator.h" -#include "Galois/Galois.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Statistic.h" -#include "Galois/Runtime/Sampling.h" -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -namespace cll = llvm::cl; - -static cll::opt fileNameOpt("f", cll::desc(""), cll::Required); -static cll::opt spDimOpt("d", cll::desc("spatial dimensionality of the problem i.e. 2 for 2D, 3 for 3D"), cll::init(2)); -static cll::opt ndivOpt("n", cll::desc("number of times the mesh should be subdivided"), cll::init(0)); -static cll::opt simEndTimeOpt("e", cll::desc("simulation end time"), cll::init(1.0)); - - -static const char* name = "Asynchronous Variational Integrators"; -static const char* desc = "Performs elasto-dynamic simulation of a mesh with minimal number of simulation updates"; -static const char* url = "asynchronous_variational_integrators"; - -/** - * Common functionality for different versions and algorithms - */ -class AVIabstractMain { -private: - // TODO: add support for verifying from a file - struct InputConfig { - - std::string fileName; - int spDim; - int ndiv; - double simEndTime; - std::string verifile; - std::string wltype; - - InputConfig (const std::string& fileName, int spDim, int ndiv, double simEndTime, const std::string& verifile, std::string w) - :fileName (fileName), spDim (spDim), ndiv (ndiv), simEndTime (simEndTime), verifile (verifile), wltype(w) { - } - }; - -private: - static const std::string getUsage (); - - static InputConfig readCmdLine (); - - static MeshInit* initMesh (const InputConfig& input); - - static void initGlobalVec (const MeshInit& meshInit, GlobalVec& g); - - - -protected: - static const int CHUNK_SIZE = 32; - typedef Galois::WorkList::dChunkedFIFO AVIWorkList; - - typedef Galois::GAccumulator IterCounter; - - std::string wltype; - - /** version name */ - virtual const std::string getVersion () const = 0; - - /** - * To be implemented by derived classes for some type specific initialization - * e.g. unordered needs element adjacency graph - * while ordered needs a lock per node of the original mesh. - * @param meshInit - * @param g - */ - virtual void initRemaining (const MeshInit& meshInit, const GlobalVec& g) = 0; - - -public: - - /** - * - * @param meshInit - * @param g - * @param createSyncFiles - */ - virtual void runLoop (MeshInit& meshInit, GlobalVec& g, bool createSyncFiles) = 0; - - /** - * The main method to call - * @param argc - * @param argv - */ - void run (int argc, char* argv[]); - - void verify (const InputConfig& input, const MeshInit& meshInit, const GlobalVec& g) const; - - /** - * Code common to loop body of different versions - * Performs the updates to avi parameter - * - * @param avi - * @param meshInit - * @param g - * @param l - * @param createSyncFiles - */ - inline static void simulate (AVI* avi, MeshInit& meshInit, - GlobalVec& g, LocalVec& l, bool createSyncFiles); - - virtual ~AVIabstractMain() { } -}; - -/** - * Serial ordered AVI algorithm - */ -class AVIorderedSerial: public AVIabstractMain { - -protected: - virtual const std::string getVersion () const { - return std::string ("Serial"); - } - - virtual void initRemaining (const MeshInit& meshInit, const GlobalVec& g) { - // Nothing to do, so far - } - -public: - virtual void runLoop (MeshInit& meshInit, GlobalVec& g, bool createSyncFiles); -}; - -AVIabstractMain::InputConfig AVIabstractMain::readCmdLine () { - const char* fileName = fileNameOpt.c_str(); - int spDim = spDimOpt; - int ndiv = ndivOpt; - double simEndTime = simEndTimeOpt; - std::string wltype; - - return InputConfig (fileName, spDim, ndiv, simEndTime, "", wltype); -} - -MeshInit* AVIabstractMain::initMesh (const AVIabstractMain::InputConfig& input) { - MeshInit* meshInit = NULL; - - if (input.spDim == 2) { - meshInit = new TriMeshInit (input.simEndTime); - } - else if (input.spDim == 3) { - meshInit = new TetMeshInit (input.simEndTime); - } - else { - std::cerr << "ERROR: Wrong spatical dimensionality, run with -help" << std::endl; - std::cerr << spDimOpt.HelpStr << std::endl; - std::abort (); - } - - // read in the mesh from file and setup the mesh, bc etc - meshInit->initializeMesh (input.fileName, input.ndiv); - - return meshInit; -} - -void AVIabstractMain::initGlobalVec (const MeshInit& meshInit, GlobalVec& g) { - if (meshInit.isWave ()) { - meshInit.setupVelocities (g.vecV); - meshInit.setupVelocities (g.vecV_b); - } - else { - meshInit.setupDisplacements (g.vecQ); - } -} - -void AVIabstractMain::run (int argc, char* argv[]) { - Galois::StatManager sm; - LonestarStart(argc, argv, name, desc, url); - - // print messages e.g. version, input etc. - InputConfig input = readCmdLine (); - wltype = input.wltype; - - MeshInit* meshInit = initMesh (input); - - GlobalVec g(meshInit->getTotalNumDof ()); - - const std::vector& aviList = meshInit->getAVIVec (); - for (size_t i = 0; i < aviList.size (); ++i) { - assert (aviList[i]->getOperation ().getFields ().size () == meshInit->getSpatialDim()); - } - - - initGlobalVec (*meshInit, g); - - - // derived classes may have some data to initialze before running the loop - initRemaining (*meshInit, g); - - - - printf ("PAVI %s version\n", getVersion ().c_str ()); - printf ("input mesh: %d elements, %d nodes\n", meshInit->getNumElements (), meshInit->getNumNodes ()); - - - Galois::StatTimer t; - t.start (); - - Galois::Runtime::beginSampling (); - // don't write to files when measuring time - runLoop (*meshInit, g, false); - Galois::Runtime::endSampling (); - - t.stop (); - - if (!skipVerify) { - verify (input, *meshInit, g); - } - - delete meshInit; - -} - - -void AVIabstractMain::verify (const InputConfig& input, const MeshInit& meshInit, const GlobalVec& g) const { - - if (input.verifile == ("")) { - AVIorderedSerial* serial = new AVIorderedSerial (); - - MeshInit* serialMesh = initMesh (input); - - GlobalVec sg(serialMesh->getTotalNumDof ()); - - initGlobalVec (*serialMesh, sg); - - // do write to sync files when verifying - serial->runLoop (*serialMesh, sg, true); - - // compare the global vectors for equality (within some tolerance) - bool gvecCmp = g.cmpState (sg); - - // compare the final state of avi elements in the mesh - bool aviCmp = meshInit.cmpState (*serialMesh); - - if (!gvecCmp || !aviCmp) { - g.printDiff (sg); - - meshInit.printDiff (*serialMesh); - - std::cerr << "BAD: results don't match against Serial" << std::endl; - abort (); - } - - std::cout << ">>> OK: result verified against serial" << std::endl; - - delete serialMesh; - delete serial; - - } - else { - std::cerr << "TODO: cmp against file data needs implementation" << std::endl; - abort (); - } -} - - -void AVIabstractMain::simulate (AVI* avi, MeshInit& meshInit, - GlobalVec& g, LocalVec& l, bool createSyncFiles) { - - if (createSyncFiles) { - meshInit.writeSync (*avi, g.vecQ, g.vecV_b, g.vecT); - } - - const LocalToGlobalMap& l2gMap = meshInit.getLocalToGlobalMap(); - - avi->gather (l2gMap, g.vecQ, g.vecV, g.vecV_b, g.vecT, - l.q, l.v, l.vb, l.ti); - - avi->computeLocalTvec (l.tnew); - - if (avi->getTimeStamp () == 0.0) { - avi->vbInit (l.q, l.v, l.vb, l.ti, l.tnew, - l.qnew, l.vbinit, - l.forcefield, l.funcval, l.deltaV); - avi->update (l.q, l.v, l.vbinit, l.ti, l.tnew, - l.qnew, l.vnew, l.vbnew, - l.forcefield, l.funcval, l.deltaV); - } - else { - avi->update (l.q, l.v, l.vb, l.ti, l.tnew, - l.qnew, l.vnew, l.vbnew, - l.forcefield, l.funcval, l.deltaV); - } - - avi->incTimeStamp (); - - avi->assemble (l2gMap, l.qnew, l.vnew, l.vbnew, l.tnew, g.vecQ, g.vecV, g.vecV_b, g.vecT, g.vecLUpdate); -} - -void AVIorderedSerial::runLoop (MeshInit& meshInit, GlobalVec& g, bool createSyncFiles) { - - typedef std::priority_queue, AVIReverseComparator> PQ; - // typedef std::set PQ; - - // temporary matrices - int nrows = meshInit.getSpatialDim (); - int ncols = meshInit.getNodesPerElem (); - - LocalVec l(nrows, ncols); - - const std::vector& aviList = meshInit.getAVIVec (); - - for (size_t i = 0; i < aviList.size (); ++i) { - assert (aviList[i]->getOperation ().getFields ().size () == meshInit.getSpatialDim()); - } - - - PQ pq; - for (std::vector::const_iterator i = aviList.begin (), e = aviList.end (); i != e; ++i) { - pq.push (*i); - // pq.insert (*i); - } - - - int iter = 0; - while (!pq.empty ()) { - - AVI* avi = pq.top (); pq.pop (); - // AVI* avi = *pq.begin (); pq.erase (pq.begin ()); - - assert (avi != NULL); - - AVIabstractMain::simulate (avi, meshInit, g, l, createSyncFiles); - - - if (avi->getNextTimeStamp () < meshInit.getSimEndTime ()) { - pq.push (avi); - // pq.insert (avi); - } - - ++iter; - } - - - // printf ("iterations = %d, time taken (in ms) = %d, average time per iter = %g\n", iter, time, ((double)time)/iter); - printf ("iterations = %d\n", iter); - -} -#endif // AVI_ABSTRACT_MAIN_H_ diff --git a/maxflow/galois/apps/avi/main/AVIodgExplicit.cpp b/maxflow/galois/apps/avi/main/AVIodgExplicit.cpp deleted file mode 100644 index 31e5752..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgExplicit.cpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * AVIodgExplicit.cpp - * - * Created on: Jun 21, 2011 - * Author: amber - */ - -#include "AVIodgExplicit.h" - -int main (int argc, char* argv[]) { - AVIodgExplicit um; - um.run (argc, argv); - return 0; -} - diff --git a/maxflow/galois/apps/avi/main/AVIodgExplicit.h b/maxflow/galois/apps/avi/main/AVIodgExplicit.h deleted file mode 100644 index 338382c..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgExplicit.h +++ /dev/null @@ -1,453 +0,0 @@ -/** AVI unordered algorithm with abstract locks -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef AVI_ODG_EXPLICIT_H -#define AVI_ODG_EXPLICIT_H - - -#include "Galois/Atomic.h" -#include "Galois/Accumulator.h" -#include "Galois/Galois.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Runtime/PerThreadStorage.h" -#include "Galois/WorkList/WorkList.h" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "AuxDefs.h" -#include "AVI.h" -#include "Element.h" - -#include "AVIabstractMain.h" - -/** - * - * Unordered AVI algorithm uses two key data structures - * - * 1) Element Adjacency Graph - * 2) in degree vector - * - * This graph has a node for each mesh element and - * keeps track of node-adjacency between AVI elements. Two elements - * are adjacent if they share a node in the mesh between them. - * We create a graph by connecting adjacent elements with an edge. - * Conceptually the edge is directed from the avi element with smaller - * time stamp to the greater one. But in implementation this direction information - * is not kept in the graph but in an array 'inDegVec', which has an entry corresponding - * to each AVI element. - * An avi element with 0 in edges has the minimum time stamp among its neighbors - * and is therefore eligible for an update - * It is assumed that AVI elements have unique integer id's 0..numElements-1, and - * the id is used to index into inDegVec - * - */ - -//#define USE_LC_GRAPH - -class AVIodgExplicit: public AVIabstractMain { - -protected: - static const bool DEBUG = false; - -#ifdef USE_LC_GRAPH - typedef Galois::Graph::LC_CSR_Graph Graph; - typedef Graph::GraphNode GNode; -#else - typedef Galois::Graph::FirstGraph Graph; - typedef Graph::GraphNode GNode; -#endif - - - Graph graph; - - virtual const std::string getVersion () const { - return "ODG explicit, abstract locks on ODG nodes"; - } - - /** - * Generate element adjacency graph, where nodes are elements - * in the mesh, and there is an edge between the nodes if their - * corresponding elements share a vertex in the mesh - * - * @param meshInit - * @param g - */ - void genElemAdjGraph (const MeshInit& meshInit, const GlobalVec& g) { - -#ifdef USE_LC_GRAPH - typedef Galois::Graph::FirstGraph MGraph; - typedef MGraph::GraphNode MNode; - - MGraph mgraph; -#else - Graph& mgraph = graph; - typedef GNode MNode; -#endif - - - std::vector aviAdjNodes; - - const std::vector& aviList = meshInit.getAVIVec (); - - for (std::vector::const_iterator i = aviList.begin (), e = aviList.end (); i != e; ++i) { - AVI* avi = *i; - MNode gn = mgraph.createNode (avi); - mgraph.addNode (gn); - - aviAdjNodes.push_back (gn); - } - - - - // map where - // key is node id - // value is a list of avi elements that share this node - std::vector< std::vector > nodeSharers(meshInit.getNumNodes ()); - - // for (int i = 0; i < nodeSharers.size (); ++i) { - // nodeSharers[i] = new ArrayList> (); - // } - - for (std::vector::const_iterator i = aviAdjNodes.begin (), ei = aviAdjNodes.end (); i != ei; ++i) { - MNode aviAdjN = *i; - AVI* avi = mgraph.getData (aviAdjN, Galois::MethodFlag::NONE); - const std::vector& conn = avi->getGeometry ().getConnectivity (); - - for (std::vector::const_iterator j = conn.begin (), ej = conn.end (); j != ej; ++j) { - GlobalNodalIndex n = *j; - nodeSharers[n].push_back (aviAdjN); - } - - } - - int numEdges = 0; - - for (std::vector< std::vector >::const_iterator it = nodeSharers.begin (), ei = nodeSharers.end (); - it != ei; ++it) { - - const std::vector& adjElms = *it; - - // adjElms is the list of elements who share the node with id == current index pos in the array - // and therefore form a clique among themselves - for (size_t i = 0; i < adjElms.size (); ++i) { - // populate the upper triangle of the adj matrix - for (size_t j = i + 1; j < adjElms.size (); ++j) { -// if (!adjElms[i].hasNeighbor (adjElms[j])) { - if (mgraph.findEdge(adjElms[i], adjElms[j]) == mgraph.edge_end(adjElms[i])) { - ++numEdges; - } - mgraph.addEdge (adjElms[i], adjElms[j]); - } - } - - } - -#ifdef USE_LC_GRAPH - graph.copyFromGraph (mgraph); -#endif - - printf ("Graph created with %u nodes and %d edges\n", graph.size (), numEdges); - } - - virtual void initRemaining (const MeshInit& meshInit, const GlobalVec& g) { - - Galois::StatTimer t_graph ("Time spent in creating the graph: "); - - t_graph.start (); - genElemAdjGraph (meshInit, g); - t_graph.stop (); - } - - //! Functor for loop body - struct Process { - Graph& graph; - std::vector& inDegVec; - MeshInit& meshInit; - GlobalVec& g; - Galois::Runtime::PerThreadStorage& perIterLocalVec; - bool createSyncFiles; - IterCounter& iter; - - Process ( - Graph& graph, - std::vector& inDegVec, - MeshInit& meshInit, - GlobalVec& g, - Galois::Runtime::PerThreadStorage& perIterLocalVec, - bool createSyncFiles, - IterCounter& iter): - - graph (graph), - inDegVec (inDegVec), - meshInit (meshInit), - g (g), - perIterLocalVec (perIterLocalVec), - createSyncFiles (createSyncFiles), - iter (iter) {} - - - /** - * Loop body - * - * The loop body uses one-shot optimization, where we grab abstract locks on the node - * and its neighbors before performing the udpates. This removes the need for saving - * and performing undo operations. - * - * - * @param src is active elemtn - * @param lwl is the worklist handle - */ - template - void operator () (GNode& src, ContextTy& lwl) { - // one-shot optimization: acquire abstract locks on active node and - // neighbors (all its neighbors, in this case) before performing any modifications - - AVI* srcAVI = graph.getData (src, Galois::MethodFlag::CHECK_CONFLICT); - - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::CHECK_CONFLICT) - , ende = graph.edge_end (src, Galois::MethodFlag::CHECK_CONFLICT); e != ende; ++e) { - } - - - // past the fail-safe point now - - - int inDeg = inDegVec[srcAVI->getGlobalIndex ()]; - // assert inDeg == 0 : String.format ("active node %s with inDeg = %d\n", srcAVI, inDeg); - - // // TODO: DEBUG - // std::cout << "Processing element: " << srcAVI->toString() << std::endl; - - assert (inDeg == 0); - - - LocalVec& l = *perIterLocalVec.getLocal(); - - AVIabstractMain::simulate(srcAVI, meshInit, g, l, createSyncFiles); - - - // update the inEdges count and determine - // which neighbor is at local minimum and needs to be added to the worklist - - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::NONE) - , ende = graph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - const GNode& dst = graph.getEdgeDst (e); - AVI* dstAVI = graph.getData (dst, Galois::MethodFlag::NONE); - - if (AVIComparator::compare (srcAVI, dstAVI) > 0) { - // if srcAVI has a higher time stamp that dstAVI - - ++inDegVec[srcAVI->getGlobalIndex ()]; - - int din = (--inDegVec[dstAVI->getGlobalIndex ()] ); - - if (din == 0) { - // dstAVI has become minimum among its neighbors - if (dstAVI->getNextTimeStamp () < meshInit.getSimEndTime ()) { - lwl.push (dst); - } - } - } - - } // end for - - if (inDegVec[srcAVI->getGlobalIndex ()] == 0) { - // srcAVI is still the minimum among its neighbors - if (srcAVI->getNextTimeStamp () < meshInit.getSimEndTime ()) { - lwl.push (src); - } - } - - - iter += 1; - - // if (iter.get () == 5000) { - // meshInit.writeMesh (); - // meshInit.plotMeshCenters (); - // } - - } - }; - - template - void initWorkList (std::vector& initWL, std::vector& inDegVec) { - - Galois::StatTimer t_wl ("Time to populate the worklist"); - t_wl.start (); - - for (Graph::iterator i = graph.begin (), e = graph.end (); i != e; ++i) { - const GNode& src = *i; - AVI* srcAVI = graph.getData (src, Galois::MethodFlag::NONE); - - // calculate the in degree of src by comparing it against its neighbors - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::NONE), - ende = graph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - GNode dst = graph.getEdgeDst (e); - AVI* dstAVI = graph.getData (dst, Galois::MethodFlag::NONE); - if (AVIComparator::compare (srcAVI, dstAVI) > 0) { - ++inDegVec[srcAVI->getGlobalIndex ()]; - } - } - - // if src is less than all its neighbors then add to initWL - if (inDegVec[srcAVI->getGlobalIndex ()] == 0) { - initWL.push_back (src); - } - } - - t_wl.stop (); - printf ("Initial worklist contains %zd elements\n", initWL.size ()); - - } -public: - - virtual void runLoop (MeshInit& meshInit, GlobalVec& g, bool createSyncFiles) { - ///////////////////////////////////////////////////////////////// - // populate an initial worklist - ///////////////////////////////////////////////////////////////// - std::vector inDegVec(meshInit.getNumElements (), 0); - - std::vector initWL; - - initWorkList (initWL, inDegVec); - - -// // TODO: DEBUG -// std::cout << "Initial Worklist = " << std::endl; -// for (size_t i = 0; i < initWL.size (); ++i) { -// std::cout << graph.getData (initWL[i], Galois::MethodFlag::NONE)->toString () << ", "; -// } -// std::cout << std::endl; - - ///////////////////////////////////////////////////////////////// - // perform the simulation - ///////////////////////////////////////////////////////////////// - - // uncomment to plot the mesh - meshInit.writeMesh (); - // meshInit.plotMeshCenters (); - writeAdjacencyGraph (meshInit, graph); - - // temporary matrices - size_t nrows = meshInit.getSpatialDim (); - size_t ncols = meshInit.getNodesPerElem(); - - Galois::Runtime::PerThreadStorage perIterLocalVec; - for (unsigned int i = 0; i < perIterLocalVec.size(); ++i) - *perIterLocalVec.getRemote(i) = LocalVec(nrows, ncols); - - IterCounter iter; - - Process p(graph, inDegVec, meshInit, g, perIterLocalVec, createSyncFiles, iter); - - Galois::for_each(initWL.begin (), initWL.end (), p, Galois::wl()); - - printf ("iterations = %zd\n", iter.reduce ()); - - } - - - static void writeAdjacencyGraph (const MeshInit& meshInit, Graph& graph, - const char* nodesFileName="mesh-nodes.csv", const char* edgesFileName="mesh-edges.csv") { - - if (meshInit.getSpatialDim () != 2) { - std::cerr << "implemented for 2D elements only" << std::endl; - abort (); - } - - - FILE* nodesFile = fopen (nodesFileName, "w"); - if (nodesFile == NULL) { abort (); } - - fprintf (nodesFile, "nodeId, inDeg, outDeg, centerX, centerY, timeStamp\n"); - - // a set of edges computed by picking outgoing edges for each node - std::vector > outEdges; - - std::vector center (meshInit.getSpatialDim(), 0.0); - - for (Graph::iterator i = graph.begin (), e = graph.end (); i != e; ++i) { - const GNode& src = *i; - AVI* srcAVI = graph.getData (src, Galois::MethodFlag::NONE); - - size_t inDeg = 0; - // calculate the in degree of src by comparing it against its neighbors - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::NONE) - , ende = graph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - - GNode dst = graph.getEdgeDst (e); - AVI* dstAVI = graph.getData (dst, Galois::MethodFlag::NONE); - if (AVIComparator::compare (srcAVI, dstAVI) > 0) { - ++inDeg; - - } else { // is an out-going edge - outEdges.push_back (std::make_pair(src, dst)); - } - } - - // size_t outDeg = graph.neighborsSize(src, Galois::MethodFlag::NONE) - inDeg; - size_t outDeg = std::distance (graph.edge_begin (src, Galois::MethodFlag::NONE), graph.edge_end (src, Galois::MethodFlag::NONE)); - - std::fill (center.begin (), center.end (), 0.0); - srcAVI->getElement ().getGeometry ().computeCenter (center); - - fprintf (nodesFile, "%zd, %zd, %zd, %g, %g, %g\n", - srcAVI->getGlobalIndex(), inDeg, outDeg, center[0], center[1], srcAVI->getNextTimeStamp()); - - } - - fclose (nodesFile); - - FILE* edgesFile = fopen (edgesFileName, "w"); - if (edgesFile == NULL) { abort (); } - - - fprintf (edgesFile, "srcId, dstId\n"); - for (std::vector >::const_iterator i = outEdges.begin(), ei = outEdges.end(); - i != ei; ++i) { - size_t srcId = graph.getData (i->first, Galois::MethodFlag::NONE)->getGlobalIndex (); - size_t dstId = graph.getData (i->second, Galois::MethodFlag::NONE)->getGlobalIndex (); - - fprintf (edgesFile, "%zd, %zd\n", srcId, dstId); - - } - - fclose (edgesFile); - - } - -}; - -#endif diff --git a/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.cpp b/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.cpp deleted file mode 100644 index 8871f5c..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.cpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * AVIodgExplicitNoLock.cpp - * - * Created on: Jun 21, 2011 - * Author: amber - */ - -#include "AVIodgExplicitNoLock.h" - -int main (int argc, char* argv[]) { - AVIodgExplicitNoLock um; - um.run (argc, argv); - return 0; -} - diff --git a/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.h b/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.h deleted file mode 100644 index 07dc2e1..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgExplicitNoLock.h +++ /dev/null @@ -1,254 +0,0 @@ -/** AVI unordered algorithm with no abstract locks -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef AVI_UNORDERED_NO_LOCK_H_ -#define AVI_UNORDERED_NO_LOCK_H_ - - -#include "Galois/Graph/Graph.h" -#include "Galois/Graph/FileGraph.h" - -#include "Galois/Galois.h" -#include "Galois/Atomic.h" - -#include "Galois/Runtime/PerThreadStorage.h" - -#include -#include -#include -#include -#include -#include - -#include - -#include "AuxDefs.h" -#include "AVI.h" -#include "Element.h" - -#include "AVIabstractMain.h" -#include "AVIodgExplicit.h" - -/** - * AVI unordered algorithm that uses atomic integers - * and no abstract locks - */ -class AVIodgExplicitNoLock: public AVIodgExplicit { - typedef Galois::GAtomicPadded AtomicInteger; - -protected: - - virtual const std::string getVersion () const { - return "ODG explicit, no abstract locks"; - } - - /** - * Functor for loop body - */ - struct Process { - - Graph& graph; - std::vector& inDegVec; - MeshInit& meshInit; - GlobalVec& g; - Galois::Runtime::PerThreadStorage& perIterLocalVec; - bool createSyncFiles; - IterCounter& iter; - - Process ( - Graph& graph, - std::vector& inDegVec, - MeshInit& meshInit, - GlobalVec& g, - Galois::Runtime::PerThreadStorage& perIterLocalVec, - bool createSyncFiles, - IterCounter& iter): - - graph (graph), - inDegVec (inDegVec), - meshInit (meshInit), - g (g), - perIterLocalVec (perIterLocalVec), - createSyncFiles (createSyncFiles), - iter (iter) {} - - /** - * Loop body - * - * The key condition is that a node and its neighbor cannot be active at the same time, - * and it must be impossible for two of them to be processed in parallel - * Therefore a node can add its newly active neighbors to the workset as the last step only when - * it has finished performing all other updates. *(As per current semantics of for_each, adds to worklist - * happen on commit. If this is not the case, then each thread should - * accumulate adds in a temp vec and add to the worklist all together in the - * end) - * For the same reason, active node src must update its own in degree before updating the - * indegree of any of the neighbors. Imagine the alternative, where active node updates its in - * degree and that of it's neighbor in the same loop. For example A is current active node and - * has a neighbor B. A > B, therefore A increments its own in degree and decrements that of B to - * 1. Another active node C is neighbor of B but not of A, and C decreases in degree of B to 0 - * and adds B to the workset while A is not finished yet. This violates our key condition - * mentioned above - * - * @param gn is active elemtn - * @param lwl is the worklist handle - * @param avi is the avi object - */ - template - GALOIS_ATTRIBUTE_PROF_NOINLINE void addToWL (C& lwl, const GNode& gn, AVI* avi) { - assert (graph.getData (gn, Galois::MethodFlag::NONE) == avi); - - if (avi->getNextTimeStamp () < meshInit.getSimEndTime ()) { - lwl.push (gn); - } - } - - template - GALOIS_ATTRIBUTE_PROF_NOINLINE void updateODG (const GNode& src, AVI* srcAVI, C& lwl) { - unsigned addAmt = 0; - - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::NONE) - , ende = graph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - GNode dst = graph.getEdgeDst (e); - AVI* dstAVI = graph.getData (dst, Galois::MethodFlag::NONE); - - if (AVIComparator::compare (srcAVI, dstAVI) > 0) { - ++addAmt; - } - - } - - - // may be the active node is still at the local minimum - // and no updates to neighbors are necessary - if (addAmt == 0) { - addToWL (lwl, src, srcAVI); - } - else { - inDegVec[srcAVI->getGlobalIndex ()] += addAmt; - - for (Graph::edge_iterator e = graph.edge_begin (src, Galois::MethodFlag::NONE) - , ende = graph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - GNode dst = graph.getEdgeDst (e); - AVI* dstAVI = graph.getData (dst, Galois::MethodFlag::NONE); - - if (AVIComparator::compare (srcAVI, dstAVI) > 0) { - int din = --inDegVec[dstAVI->getGlobalIndex ()]; - - assert (din >= 0); - - if (din == 0) { - addToWL (lwl, dst, dstAVI); - // // TODO: DEBUG - // std::cout << "Adding: " << dstAVI->toString () << std::endl; - } - } - - } // end for - - } // end else - } - - template - void operator () (const GNode& src, ContextTy& lwl) { - AVI* srcAVI = graph.getData (src, Galois::MethodFlag::NONE); - - int inDeg = (int)inDegVec[srcAVI->getGlobalIndex ()]; - // assert inDeg == 0 : String.format ("active node %s with inDeg = %d\n", srcAVI, inDeg); - -// // TODO: DEBUG -// std::cout << "Processing element: " << srcAVI->toString() << std::endl; - - assert (inDeg == 0); - - LocalVec& l = *perIterLocalVec.getLocal(); - - AVIabstractMain::simulate(srcAVI, meshInit, g, l, createSyncFiles); - - - // update the inEdges count and determine - // which neighbor is at local minimum and needs to be added to the worklist - updateODG (src, srcAVI, lwl); - - - // for debugging, remove later - iter += 1; - - - } - }; - -public: - - /** - * For the in degree vector, we use a vector of atomic integers - * This along with other changes in the loop body allow us to - * no use abstract locks. @see Process - */ - virtual void runLoop (MeshInit& meshInit, GlobalVec& g, bool createSyncFiles) { - ///////////////////////////////////////////////////////////////// - // populate an initial worklist - ///////////////////////////////////////////////////////////////// - std::vector inDegVec(meshInit.getNumElements (), AtomicInteger (0)); - std::vector initWL; - - initWorkList (initWL, inDegVec); - -// // TODO: DEBUG -// std::cout << "Initial Worklist = " << std::endl; -// for (size_t i = 0; i < initWL.size (); ++i) { -// std::cout << graph.getData (initWL[i], Galois::MethodFlag::NONE)->toString () << ", "; -// } -// std::cout << std::endl; - - ///////////////////////////////////////////////////////////////// - // perform the simulation - ///////////////////////////////////////////////////////////////// - - // temporary matrices - size_t nrows = meshInit.getSpatialDim (); - size_t ncols = meshInit.getNodesPerElem(); - - Galois::Runtime::PerThreadStorage perIterLocalVec; - for (unsigned int i = 0; i < perIterLocalVec.size(); ++i) - *perIterLocalVec.getRemote(i) = LocalVec(nrows, ncols); - - - IterCounter iter; - - Process p (graph, inDegVec, meshInit, g, perIterLocalVec, createSyncFiles, iter); - - Galois::for_each(initWL.begin (), initWL.end (), p, Galois::wl()); - - printf ("iterations = %zd\n", iter.reduce ()); - - } - - - -}; - -#endif - diff --git a/maxflow/galois/apps/avi/main/AVIodgOrdered.cpp b/maxflow/galois/apps/avi/main/AVIodgOrdered.cpp deleted file mode 100644 index a5bd810..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgOrdered.cpp +++ /dev/null @@ -1,13 +0,0 @@ -/* - * Created on: Jun 21, 2011 - * Author: amber - */ - -#include "AVIodgOrdered.h" - -int main (int argc, char* argv[]) { - AVIodgOrdered um; - um.run(argc, argv); - return 0; -} - diff --git a/maxflow/galois/apps/avi/main/AVIodgOrdered.h b/maxflow/galois/apps/avi/main/AVIodgOrdered.h deleted file mode 100644 index 8bde870..0000000 --- a/maxflow/galois/apps/avi/main/AVIodgOrdered.h +++ /dev/null @@ -1,210 +0,0 @@ -/** AVI a version without explicit ODG using deterministic infrastructure -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef AVI_ODG_ORDERED_H -#define AVI_ODG_ORDERED_H - -#include "Galois/Galois.h" -#include "Galois/Runtime/PerThreadStorage.h" -#include "Galois/WorkList/WorkList.h" - -#include - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "AuxDefs.h" -#include "AVI.h" -#include "Element.h" - -#include "AVIabstractMain.h" - - -enum ExecType { - useAddRem, - useTwoPhase, -}; - -static cll::opt execType ( - cll::desc ("Ordered Executor Type:"), - cll::values ( - clEnumVal(useAddRem, "Use Add-Remove executor"), - clEnumVal(useTwoPhase, "Use Two-Phase executor"), - clEnumValEnd), - cll::init (useAddRem)); - -class AVIodgOrdered: public AVIabstractMain { -protected: - typedef Galois::Graph::FirstGraph Graph; - typedef Graph::GraphNode Lockable; - typedef std::vector Locks; - - Graph graph; - Locks locks; - - virtual const std::string getVersion() const { - return "Parallel version, ODG automatically managed"; - } - - virtual void initRemaining(const MeshInit& meshInit, const GlobalVec& g) { - assert(locks.empty()); - locks.reserve(meshInit.getNumNodes()); - for (int i = 0; i < meshInit.getNumNodes(); ++i) { - locks.push_back(graph.createNode(nullptr)); - } - } - - struct Update { - AVI* avi; - double ts; - Update(AVI* a, double t): avi(a), ts(t) { } - - Update updatedCopy () const { - return Update (avi, avi->getNextTimeStamp ()); - } - - friend std::ostream& operator << (std::ostream& out, const Update& up) { - return (out << "(id:" << up.avi->getGlobalIndex() << ", ts:" << up.ts << ")"); - } - }; - struct Comparator { - - bool operator() (const Update& a, const Update& b) const { - int c = DoubleComparator::compare (a.ts, b.ts); - if (c == 0) { - c = a.avi->getGlobalIndex () - b.avi->getGlobalIndex (); - } - return (c < 0); - } - }; - - - struct MakeUpdate: public std::unary_function { - Update operator()(AVI* avi) const { return Update(avi, avi->getNextTimeStamp ()); } - }; - - struct NhoodVisit { - Graph& graph; - Locks& locks; - - NhoodVisit(Graph& g, Locks& l): graph(g), locks(l) { } - - template - void operator()(const Update& item, C&) { - typedef std::vector V; - - const V& conn = item.avi->getGeometry().getConnectivity(); - - for (V::const_iterator ii = conn.begin(), ei = conn.end(); ii != ei; ++ii) { - graph.getData(locks[*ii]); - } - } - }; - - struct NhoodVisitAddRem: public NhoodVisit { - typedef int tt_has_fixed_neighborhood; - NhoodVisitAddRem (Graph& g, Locks& l): NhoodVisit (g, l) {} - }; - - struct Process { - MeshInit& meshInit; - GlobalVec& g; - Galois::Runtime::PerThreadStorage& perIterLocalVec; - bool createSyncFiles; - IterCounter& niter; - - Process( - MeshInit& meshInit, - GlobalVec& g, - Galois::Runtime::PerThreadStorage& perIterLocalVec, - bool createSyncFiles, - IterCounter& niter): - meshInit(meshInit), - g(g), - perIterLocalVec(perIterLocalVec), - createSyncFiles(createSyncFiles), - niter(niter) { } - - void operator()(const Update& item, Galois::UserContext& ctx) { - // for debugging, remove later - niter += 1; - - LocalVec& l = *perIterLocalVec.getLocal(); - - AVIabstractMain::simulate(item.avi, meshInit, g, l, createSyncFiles); - - if (item.avi->getNextTimeStamp() < meshInit.getSimEndTime()) { - ctx.push(item.updatedCopy ()); - } - } - }; - - -public: - virtual void runLoop(MeshInit& meshInit, GlobalVec& g, bool createSyncFiles) { - const size_t nrows = meshInit.getSpatialDim(); - const size_t ncols = meshInit.getNodesPerElem(); - - Galois::Runtime::PerThreadStorage perIterLocalVec; - for (unsigned int i = 0; i < perIterLocalVec.size(); ++i) - *perIterLocalVec.getRemote(i) = LocalVec(nrows, ncols); - - IterCounter niter; - - NhoodVisit nhVisitor(graph, locks); - NhoodVisitAddRem nhVisitorAddRem (graph, locks); - Process p(meshInit, g, perIterLocalVec, createSyncFiles, niter); - - const std::vector& elems = meshInit.getAVIVec(); - - switch (execType) { - case useAddRem: - Galois::for_each_ordered ( - boost::make_transform_iterator(elems.begin(), MakeUpdate()), - boost::make_transform_iterator(elems.end(), MakeUpdate()), - Comparator(), nhVisitorAddRem, p); - break; - case useTwoPhase: - Galois::for_each_ordered ( - boost::make_transform_iterator(elems.begin(), MakeUpdate()), - boost::make_transform_iterator(elems.end(), MakeUpdate()), - Comparator(), nhVisitor, p); - break; - default: - GALOIS_ERROR(true, "Unknown executor type"); - break; - } - - - printf("iterations = %lu\n", niter.reduce()); - } -}; - -#endif diff --git a/maxflow/galois/apps/avi/main/AVIorderedSerial.cpp b/maxflow/galois/apps/avi/main/AVIorderedSerial.cpp deleted file mode 100644 index 25abe38..0000000 --- a/maxflow/galois/apps/avi/main/AVIorderedSerial.cpp +++ /dev/null @@ -1,14 +0,0 @@ -/* - * AVIorderedSerial.cpp - * - * Created on: Jun 21, 2011 - * Author: amber - */ -#include "AVIabstractMain.h" - -int main (int argc, char* argv[]) { - AVIorderedSerial serial; - serial.run (argc, argv); - return 0; -} - diff --git a/maxflow/galois/apps/avi/main/GlobalVec.h b/maxflow/galois/apps/avi/main/GlobalVec.h deleted file mode 100644 index 5b2c203..0000000 --- a/maxflow/galois/apps/avi/main/GlobalVec.h +++ /dev/null @@ -1,117 +0,0 @@ -/** Global vectors for functions being computed over the mesh -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 20, 2011 - * @author M. Amber Hassaan - */ - - -#ifndef GLOBALVEC_H_ -#define GLOBALVEC_H_ - -#include "AuxDefs.h" - -#include - -#include -#include - -struct GlobalVec { - - - //! Global vectors computed for each mesh node - //! Q is displacement - //! V is velocity, V_b is half step value - //! T is time - VecDouble vecQ; - VecDouble vecV; - VecDouble vecV_b; - VecDouble vecT; - VecDouble vecLUpdate; - - //! @param totalNDOF is total number of Mesh nodes times the dimensionality - GlobalVec (unsigned int totalNDOF) { - vecQ = VecDouble (totalNDOF, 0.0); - - vecV = VecDouble (vecQ); - vecV_b = VecDouble (vecQ); - vecT = VecDouble (vecQ); - vecLUpdate = VecDouble (vecQ); - } - -private: - static bool computeDiff (const VecDouble& vecA, const char* nameA, const VecDouble& vecB, const char* nameB, bool printDiff) { - bool result = false; - if (vecA.size () != vecB.size ()) { - if (printDiff) { - fprintf (stderr, "Arrays of different length %s.size () = %zd, %s.size () = %zd\n", nameA, vecA.size (), nameB, vecB.size ()); - } - result = false; - } - else { - result = true; // start optimistically :) - for (size_t i = 0; i < vecA.size (); ++i) { - double diff = fabs (vecA[i] - vecB[i]); - if ( diff > TOLERANCE) { - result = false; - if (printDiff) { - fprintf (stderr, "(%s[%zd] = %g) != (%s[%zd] = %g), diff=%g\n", - nameA, i, vecA[i], nameB, i, vecB[i], diff); - } - else { - break; // no use continuing on if not printing diff; - } - } - } - } - - return result; - } - - bool computeDiffInternal (const GlobalVec& that, bool printDiff) const { - return true - && computeDiff (this->vecQ, "this->vecQ", that.vecQ, "that.vecQ", printDiff) - && computeDiff (this->vecV, "this->vecV", that.vecV, "that.vecV", printDiff) - && computeDiff (this->vecV_b, "this->vecV_b", that.vecV_b, "that.vecV_b", printDiff) - && computeDiff (this->vecT, "this->vecT", that.vecT, "that.vecT", printDiff) - && computeDiff (this->vecLUpdate, "this->vecLUpdate", that.vecLUpdate, "that.vecLUpdate", printDiff); - } - -public: - /** - * compare the values of global vectors element by element - * - * @param that - */ - bool cmpState (const GlobalVec& that) const { - return computeDiffInternal (that, false); - } - - /** compare the values of global vector element by element - * and print the differences - * - * @param that - */ - void printDiff (const GlobalVec& that) const { - computeDiffInternal (that, true); - } -}; - -#endif /* GLOBALVEC_H_ */ diff --git a/maxflow/galois/apps/avi/main/LocalVec.h b/maxflow/galois/apps/avi/main/LocalVec.h deleted file mode 100644 index 01aa2bb..0000000 --- a/maxflow/galois/apps/avi/main/LocalVec.h +++ /dev/null @@ -1,83 +0,0 @@ -/** Per AVI element local vectors -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 20, 2011 - * @author M. Amber Hassaan - */ - -#ifndef LOCALVEC_H_ -#define LOCALVEC_H_ - -#include "AuxDefs.h" -#include "StandardAVI.h" - -#include - -struct LocalVec { - typedef StandardAVI::BCImposedType BCImposedType; - - - //! initial state as read from GlobalVec using gather - MatDouble q; - MatDouble v; - MatDouble vb; - MatDouble ti; - - //! updated state computed using initial state - MatDouble qnew; - MatDouble vnew; - MatDouble vbnew; - MatDouble vbinit; - MatDouble tnew; - - //! some temporaries so that we don't need to allocate memory in every iteration - MatDouble forcefield; - MatDouble funcval; - MatDouble deltaV; - - - /** - * - * @param nrows - * @param ncols - */ - LocalVec (size_t nrows=0, size_t ncols=0) { - q = MatDouble (nrows, VecDouble (ncols, 0.0)); - - v = MatDouble (q); - vb = MatDouble (q); - ti = MatDouble (q); - qnew = MatDouble (q); - vnew = MatDouble (q); - vbnew = MatDouble (q); - vbinit = MatDouble (q); - tnew = MatDouble (q); - - - forcefield = MatDouble (q); - funcval = MatDouble (q); - deltaV = MatDouble (q); - - } - - - -}; -#endif /* LOCALVEC_H_ */ diff --git a/maxflow/galois/apps/avi/util/AuxDefs.h b/maxflow/galois/apps/avi/util/AuxDefs.h deleted file mode 100644 index 777fe1e..0000000 --- a/maxflow/galois/apps/avi/util/AuxDefs.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * AuxDefs.h: Common definitions - * DG++ - * - * Created by Adrian Lew on 9/4/06. - * - * Copyright (c) 2006 Adrian Lew - * - * Permission is hereby granted, free of charge, to any person obtaining - * a copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sublicense, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject - * to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY - * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -#ifndef _AUXDEFS_H_ -#define _AUXDEFS_H_ - -#include -#include -#include - -#include - -//! Nodal indices, starting at 0. : amber -typedef size_t GlobalNodalIndex; - -//! Degree of freedom indices, starting at 0. -typedef size_t GlobalDofIndex; - -//! Element indices, starting at 0. -typedef size_t GlobalElementIndex; - - -//! commonly used vector and vector -typedef std::vector VecDouble; -typedef std::vector< std::vector > MatDouble; -typedef std::vector< std::vector < std::vector < std::vector > > > FourDVecDouble; - -typedef std::vector VecBool; -typedef std::vector< std::vector< bool> > MatBool; - - -//! constants -const double TOLERANCE = 1e-20; - -struct DoubleComparator { - static inline int compare (double left, double right) { - double tdiff = left - right; - - if (fabs (tdiff) < TOLERANCE) { - return 0; - - } else if (tdiff > 0.0) { - return 1; - - } else if (tdiff < 0.0) { - return -1; - - } else { - abort (); // shouldn't reach here - return 0; - } - - } -}; - - -#endif diff --git a/maxflow/galois/apps/avi/util/util.h b/maxflow/galois/apps/avi/util/util.h deleted file mode 100644 index be94498..0000000 --- a/maxflow/galois/apps/avi/util/util.h +++ /dev/null @@ -1,50 +0,0 @@ -/** Some debug utilities etc. -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef UTIL_H -#define UTIL_H - -#include -#include - -template -std::ostream& operator << (std::ostream& out, const std::vector& v) { - out << "{ "; - for (typename std::vector::const_iterator i = v.begin(); i != v.end(); ++i) { - out << *i << ", "; - } - out << "}"; - - return out; -} - -template -void printIter (std::ostream& out, I begin, I end) { - out << "{ "; - for (I i = begin; i != end; ++i) { - out << *i << ", "; - } - out << "}" << std::endl; -} - -#endif diff --git a/maxflow/galois/apps/barneshut/Barneshut.cpp b/maxflow/galois/apps/barneshut/Barneshut.cpp deleted file mode 100644 index 1ace903..0000000 --- a/maxflow/galois/apps/barneshut/Barneshut.cpp +++ /dev/null @@ -1,581 +0,0 @@ -/** Barnes-hut application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Martin Burtscher - * @author Donald Nguyen - */ -#include "Galois/config.h" -#include "Galois/Galois.h" -#include "Galois/Statistic.h" -#include "Galois/Bag.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include -#include - -#include GALOIS_CXX11_STD_HEADER(array) -#include -#include -#include -#include -#include GALOIS_CXX11_STD_HEADER(deque) - -#include "Point.h" - -const char* name = "Barnshut N-Body Simulator"; -const char* desc = - "Simulates gravitational forces in a galactic cluster using the " - "Barnes-Hut n-body algorithm"; -const char* url = "barneshut"; - -static llvm::cl::opt nbodies("n", llvm::cl::desc("Number of bodies"), llvm::cl::init(10000)); -static llvm::cl::opt ntimesteps("steps", llvm::cl::desc("Number of steps"), llvm::cl::init(1)); -static llvm::cl::opt seed("seed", llvm::cl::desc("Random seed"), llvm::cl::init(7)); - -struct Node { - Point pos; - double mass; - bool Leaf; -}; - -struct Body : public Node { - Point vel; - Point acc; -}; - -/** - * A node in an octree is either an internal node or a leaf. - */ -struct Octree : public Node { - std::array, 8> child; - char cLeafs; - char nChildren; - - Octree(const Point& p) { - Node::pos = p; - Node::Leaf = false; - cLeafs = 0; - nChildren = 0; - } -}; - -std::ostream& operator<<(std::ostream& os, const Body& b) { - os << "(pos:" << b.pos - << " vel:" << b.vel - << " acc:" << b.acc - << " mass:" << b.mass << ")"; - return os; -} - -struct BoundingBox { - Point min; - Point max; - explicit BoundingBox(const Point& p) : min(p), max(p) { } - BoundingBox() : - min(std::numeric_limits::max()), - max(std::numeric_limits::min()) { } - - void merge(const BoundingBox& other) { - min.pairMin(other.min); - max.pairMax(other.max); - } - - void merge(const Point& other) { - min.pairMin(other); - max.pairMax(other); - } - - double diameter() const { return (max - min).minDim(); } - double radius() const { return diameter() * 0.5; } - Point center() const { return (min + max) * 0.5; } -}; - -std::ostream& operator<<(std::ostream& os, const BoundingBox& b) { - os << "(min:" << b.min << " max:" << b.max << ")"; - return os; -} - -struct Config { - const double dtime; // length of one time step - const double eps; // potential softening parameter - const double tol; // tolerance for stopping recursion, <0.57 to bound error - const double dthf, epssq, itolsq; - Config(): - dtime(0.5), - eps(0.05), - tol(0.05), //0.025), - dthf(dtime * 0.5), - epssq(eps * eps), - itolsq(1.0 / (tol * tol)) { } -}; - -std::ostream& operator<<(std::ostream& os, const Config& c) { - os << "Barnes-Hut configuration:" - << " dtime: " << c.dtime - << " eps: " << c.eps - << " tol: " << c.tol; - return os; -} - -Config config; - -inline int getIndex(const Point& a, const Point& b) { - int index = 0; - for (int i = 0; i < 3; ++i) - if (a[i] < b[i]) - index += (1 << i); - return index; -} - -inline Point updateCenter(Point v, int index, double radius) { - for (int i = 0; i < 3; i++) - v[i] += (index & (1 << i)) > 0 ? radius : -radius; - return v; -} - -typedef Galois::InsertBag Bodies; -typedef Galois::InsertBag BodyPtrs; -//FIXME: reclaim memory for multiple steps -typedef Galois::InsertBag Tree; - -struct BuildOctree { - Octree* root; - Tree& T; - double root_radius; - - BuildOctree(Octree* _root, Tree& _t, double radius) - : root(_root), T(_t), root_radius(radius) { } - - void operator()(Body* b) { insert(b, root, root_radius); } - - void insert(Body* b, Octree* node, double radius) { - int index = getIndex(node->pos, b->pos); - Node* child = node->child[index].getValue(); - - //go through the tree lock-free while we can - if (child && !child->Leaf) { - insert(b, static_cast(child), radius); - return; - } - - node->child[index].lock(); - child = node->child[index].getValue(); - - if (child == NULL) { - node->child[index].unlock_and_set(b); - return; - } - - radius *= 0.5; - if (child->Leaf) { - // Expand leaf - Octree* new_node = &T.emplace(updateCenter(node->pos, index, radius)); - assert(node->pos != b->pos); - //node->child[index].unlock_and_set(new_node); - insert(b, new_node, radius); - insert(static_cast(child), new_node, radius); - node->child[index].unlock_and_set(new_node); - } else { - node->child[index].unlock(); - insert(b, static_cast(child), radius); - } - } -}; - -unsigned computeCenterOfMass(Octree* node) { - double mass = 0.0; - Point accum; - unsigned num = 1; - - //Reorganize leaves to be dense - //remove copies values - int index = 0; - for (int i = 0; i < 8; ++i) - if (node->child[i].getValue()) - node->child[index++].setValue(node->child[i].getValue()); - for (int i = index; i < 8; ++i) - node->child[i].setValue(NULL); - node->nChildren = index; - - for (int i = 0; i < index; i++) { - Node* child = node->child[i].getValue(); - if (!child->Leaf) { - num += computeCenterOfMass(static_cast(child)); - } else { - node->cLeafs |= (1 << i); - ++num; - } - mass += child->mass; - accum += child->pos * child->mass; - } - - node->mass = mass; - - if (mass > 0.0) - node->pos = accum / mass; - return num; -} - -/* -void printRec(std::ofstream& file, Node* node, unsigned level) { - static const char* ct[] = { - "blue", "cyan", "aquamarine", "chartreuse", - "darkorchid", "darkorange", - "deeppink", "gold", "chocolate" - }; - if (!node) return; - file << "\"" << node << "\" [color=" << ct[node->owner / 4] << (node->owner % 4 + 1) << (level ? "" : " style=filled") << " label = \"" << (node->Leaf ? "L" : "N") << "\"];\n"; - if (!node->Leaf) { - Octree* node2 = static_cast(node); - for (int i = 0; i < 8 && node2->child[i]; ++i) { - if (level == 3 || level == 6) - file << "subgraph cluster_" << level << "_" << i << " {\n"; - file << "\"" << node << "\" -> \"" << node2->child[i] << "\" [weight=0.01]\n"; - printRec(file, node2->child[i], level + 1); - if (level == 3 || level == 6) - file << "}\n"; - } - } -} - -void printTree(Octree* node) { - std::ofstream file("out.txt"); - file << "digraph octree {\n"; - file << "ranksep = 2\n"; - file << "root = \"" << node << "\"\n"; - // file << "overlap = scale\n"; - printRec(file, node, 0); - file << "}\n"; -} -*/ - -Point updateForce(Point delta, double psq, double mass) { - // Computing force += delta * mass * (|delta|^2 + eps^2)^{-3/2} - double idr = 1 / sqrt((float) (psq + config.epssq)); - double scale = mass * idr * idr * idr; - return delta * scale; -} - -struct ComputeForces { - // Optimize runtime for no conflict case - typedef int tt_does_not_need_aborts; - typedef int tt_needs_per_iter_alloc; - typedef int tt_does_not_need_push; - - Octree* top; - double diameter; - double root_dsq; - - ComputeForces(Octree* _top, double _diameter) : - top(_top), - diameter(_diameter) { - root_dsq = diameter * diameter * config.itolsq; - } - - template - void operator()(Body* b, Context& cnx) { - Point p = b->acc; - b->acc = Point(0.0, 0.0, 0.0); - iterate(*b, root_dsq, cnx); - b->vel += (b->acc - p) * config.dthf; - } - - struct Frame { - double dsq; - Octree* node; - Frame(Octree* _node, double _dsq) : dsq(_dsq), node(_node) { } - }; - - template - void iterate(Body& b, double root_dsq, Context& cnx) { - std::deque::other> stack(cnx.getPerIterAlloc()); - stack.push_back(Frame(top, root_dsq)); - - while (!stack.empty()) { - const Frame f = stack.back(); - stack.pop_back(); - - Point p = b.pos - f.node->pos; - double psq = p.dist2(); - - // Node is far enough away, summarize contribution - if (psq >= f.dsq) { - b.acc += updateForce(p, psq, f.node->mass); - continue; - } - - double dsq = f.dsq * 0.25; - for (int i = 0; i < f.node->nChildren; i++) { - Node* n = f.node->child[i].getValue(); - assert(n); - if (f.node->cLeafs & (1 << i)) { - assert(n->Leaf); - if (static_cast(&b) != n) { - Point p = b.pos - n->pos; - b.acc += updateForce(p, p.dist2(), n->mass); - } - } else { -#ifndef GALOIS_CXX11_DEQUE_HAS_NO_EMPLACE - stack.emplace_back(static_cast(n), dsq); -#else - stack.push_back(Frame(static_cast(n), dsq)); -#endif - __builtin_prefetch(n); - } - } - } - } -}; - -struct AdvanceBodies { - // Optimize runtime for no conflict case - typedef int tt_does_not_need_aborts; - - AdvanceBodies() { } - - template - void operator()(Body* b, Context&) { - operator()(b); - } - - void operator()(Body* b) { - Point dvel(b->acc); - dvel *= config.dthf; - Point velh(b->vel); - velh += dvel; - b->pos += velh * config.dtime; - b->vel = velh + dvel; - } -}; - -struct ReduceBoxes { - // NB: only correct when run sequentially or tree-like reduction - typedef int tt_does_not_need_stats; - BoundingBox initial; - - void operator()(const Body* b) { - initial.merge(b->pos); - } -}; - -struct mergeBox { - void operator()(ReduceBoxes& lhs, ReduceBoxes& rhs) { - return lhs.initial.merge(rhs.initial); - } -}; - -double nextDouble() { - return rand() / (double) RAND_MAX; -} - -struct InsertBody { - BodyPtrs& pBodies; - Bodies& bodies; - InsertBody(BodyPtrs& pb, Bodies& b): pBodies(pb), bodies(b) { } - void operator()(const Body& b) { - //Body b2 = b; - //b2.owner = Galois::Runtime::LL::getTID(); - pBodies.push_back(&(bodies.push_back(b))); - } -}; - -struct centerXCmp { - template - bool operator()(const T& lhs, const T& rhs) const { - return lhs.pos[0] < rhs.pos[0]; - } -}; - -struct centerYCmp { - template - bool operator()(const T& lhs, const T& rhs) const { - return lhs.pos[1] < rhs.pos[1]; - } -}; - -struct centerYCmpInv { - template - bool operator()(const T& lhs, const T& rhs) const { - return rhs.pos[1] < lhs.pos[1]; - } -}; - - -template -void divide(const Iter& b, const Iter& e) { - if (std::distance(b,e) > 32) { - std::sort(b,e, centerXCmp()); - Iter m = Galois::split_range(b,e); - std::sort(b,m, centerYCmpInv()); - std::sort(m,e, centerYCmp()); - divide(b, Galois::split_range(b,m)); - divide(Galois::split_range(b,m), m); - divide(m,Galois::split_range(m,e)); - divide(Galois::split_range(m,e), e); - } else { - std::random_shuffle(b,e); - } -} - -/** - * Generates random input according to the Plummer model, which is more - * realistic but perhaps not so much so according to astrophysicists - */ -void generateInput(Bodies& bodies, BodyPtrs& pBodies, int nbodies, int seed) { - double v, sq, scale; - Point p; - double PI = boost::math::constants::pi(); - - srand(seed); - - double rsc = (3 * PI) / 16; - double vsc = sqrt(1.0 / rsc); - - std::vector tmp; - - for (int body = 0; body < nbodies; body++) { - double r = 1.0 / sqrt(pow(nextDouble() * 0.999, -2.0 / 3.0) - 1); - do { - for (int i = 0; i < 3; i++) - p[i] = nextDouble() * 2.0 - 1.0; - sq = p.dist2(); - } while (sq > 1.0); - scale = rsc * r / sqrt(sq); - - Body b; - b.mass = 1.0 / nbodies; - b.pos = p * scale; - do { - p[0] = nextDouble(); - p[1] = nextDouble() * 0.1; - } while (p[1] > p[0] * p[0] * pow(1 - p[0] * p[0], 3.5)); - v = p[0] * sqrt(2.0 / sqrt(1 + r * r)); - do { - for (int i = 0; i < 3; i++) - p[i] = nextDouble() * 2.0 - 1.0; - sq = p.dist2(); - } while (sq > 1.0); - scale = vsc * v / sqrt(sq); - b.vel = p * scale; - b.Leaf = true; - tmp.push_back(b); - //pBodies.push_back(&bodies.push_back(b)); - } - - //sort and copy out - divide(tmp.begin(), tmp.end()); - Galois::do_all(tmp.begin(), tmp.end(), InsertBody(pBodies, bodies)); -} - -struct CheckAllPairs { - Bodies& bodies; - - CheckAllPairs(Bodies& b): bodies(b) { } - - double operator()(const Body& body) { - const Body* me = &body; - Point acc; - for (Bodies::iterator ii = bodies.begin(), ei = bodies.end(); ii != ei; ++ii) { - Body* b = &*ii; - if (me == b) - continue; - Point delta = me->pos - b->pos; - double psq = delta.dist2(); - acc += updateForce(delta, psq, b->mass); - } - - double dist2 = acc.dist2(); - acc -= me->acc; - double retval = acc.dist2() / dist2; - return retval; - } -}; - -double checkAllPairs(Bodies& bodies, int N) { - Bodies::iterator end(bodies.begin()); - std::advance(end, N); - - return Galois::ParallelSTL::map_reduce(bodies.begin(), end, - CheckAllPairs(bodies), - 0.0, - std::plus()) / N; -} - -void run(Bodies& bodies, BodyPtrs& pBodies) { - typedef Galois::WorkList::dChunkedLIFO<256> WL_; - typedef Galois::WorkList::AltChunkedLIFO<32> WL; - typedef Galois::WorkList::StableIterator WLL; - - for (int step = 0; step < ntimesteps; step++) { - // Do tree building sequentially - BoundingBox box = Galois::Runtime::do_all_impl(Galois::Runtime::makeLocalRange(pBodies), ReduceBoxes(), mergeBox(), "reduceBoxes", true).initial; - //std::for_each(bodies.begin(), bodies.end(), ReduceBoxes(box)); - - Tree t; - Octree& top = t.emplace(box.center()); - - Galois::StatTimer T_build("BuildTime"); - T_build.start(); - Galois::do_all_local(pBodies, BuildOctree(&top, t, box.radius()), Galois::loopname("BuildTree")); - T_build.stop(); - - //update centers of mass in tree - unsigned size = computeCenterOfMass(&top); - //printTree(&top); - std::cout << "Tree Size: " << size << "\n"; - - Galois::StatTimer T_compute("ComputeTime"); - T_compute.start(); - Galois::for_each_local(pBodies, ComputeForces(&top, box.diameter()), Galois::loopname("compute"), Galois::wl()); - T_compute.stop(); - - if (!skipVerify) { - std::cout << "MSE (sampled) " << checkAllPairs(bodies, std::min((int) nbodies, 100)) << "\n"; - } - //Done in compute forces - Galois::do_all_local(pBodies, AdvanceBodies(), Galois::loopname("advance")); - - std::cout << "Timestep " << step << " Center of Mass = "; - std::ios::fmtflags flags = - std::cout.setf(std::ios::showpos|std::ios::right|std::ios::scientific|std::ios::showpoint); - std::cout << top.pos; - std::cout.flags(flags); - std::cout << "\n"; - } -} - -int main(int argc, char** argv) { - Galois::StatManager M; - LonestarStart(argc, argv, name, desc, url); - - std::cout << config << "\n"; - std::cout << nbodies << " bodies, " - << ntimesteps << " time steps\n"; - - Bodies bodies; - BodyPtrs pBodies; - generateInput(bodies, pBodies, nbodies, seed); - - Galois::StatTimer T; - T.start(); - run(bodies, pBodies); - T.stop(); -} diff --git a/maxflow/galois/apps/barneshut/CMakeLists.txt b/maxflow/galois/apps/barneshut/CMakeLists.txt deleted file mode 100644 index ffeb708..0000000 --- a/maxflow/galois/apps/barneshut/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -if(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") -endif() -app(barneshut Barneshut.cpp) diff --git a/maxflow/galois/apps/barneshut/Point.h b/maxflow/galois/apps/barneshut/Point.h deleted file mode 100644 index b976e61..0000000 --- a/maxflow/galois/apps/barneshut/Point.h +++ /dev/null @@ -1,102 +0,0 @@ - -struct Point { - double val[3]; - Point() { val[0] = val[1] = val[2] = 0.0; } - //Point(double _x, double _y, double _z) : val{_x,_y,_z} {} - Point(double _x, double _y, double _z) { val[0] = _x; val[1] = _y; val[2] = _z; } - //explicit Point(double v) : val{v,v,v} {} - explicit Point(double v) { val[0] = v; val[1] = v; val[2] = v; } - - double operator[](const int index) const { - return val[index]; - } - - double& operator[](const int index) { - return val[index]; - } - - double x() const { - return val[0]; - } - - double y() const { - return val[1]; - } - - double z() const { - return val[2]; - } - - bool operator==(const Point& other) const { - return val[0] == other.val[0] && val[1] == other.val[1] && val[2] == other.val[2]; - } - - bool operator!=(const Point& other) const { - return !operator==(other); - } - - Point& operator+=(const Point& other) { - for (int i = 0; i < 3; ++i) - val[i] += other.val[i]; - return *this; - } - - Point& operator-=(const Point& other) { - for (int i = 0; i < 3; ++i) - val[i] -= other.val[i]; - return *this; - } - - Point& operator*=(double value) { - for (int i = 0; i < 3; ++i) - val[i] *= value; - return *this; - } - - Point operator-(const Point& other) const { - return Point(val[0] - other.val[0], val[1] - other.val[1], val[2] - other.val[2]); - } - - Point operator+(const Point& other) const { - return Point(val[0] + other.val[0], val[1] + other.val[1], val[2] + other.val[2]); - } - - Point operator*(double d) const { - return Point(val[0] * d, val[1] * d, val[2] * d); - } - - Point operator/(double d) const { - return Point(val[0] / d, val[1] / d, val[2] / d); - } - - double dist2() const { - return dot(*this); - } - - double dot(const Point& p2) const { - return val[0] * p2.val[0] + val[1] * p2.val[1] + val[2] * p2.val[2]; - } - - void pairMin(const Point& p2) { - for (int i = 0; i < 3; ++i) - if (p2.val[i] < val[i]) - val[i] = p2.val[i]; - } - - void pairMax(const Point& p2) { - for (int i = 0; i < 3; ++i) - if (p2.val[i] > val[i]) - val[i] = p2.val[i]; - } - - double minDim() const { - return std::min(val[0], std::min(val[1], val[2])); - } - -}; - -std::ostream& operator<<(std::ostream& os, const Point& p) { - os << "(" << p[0] << "," << p[1] << "," << p[2] << ")"; - return os; -} - diff --git a/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityInner.cpp b/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityInner.cpp deleted file mode 100644 index 381a36d..0000000 --- a/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityInner.cpp +++ /dev/null @@ -1,487 +0,0 @@ -/** Betweenness Centrality -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Betweenness centrality. Implementation based on Ligra - * - * @author Andrew Lenharth - */ -#include "Galois/config.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Timer.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/LCGraph.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" -#include "Galois/Graph/GraphNodeBag.h" -#include "HybridBFS.h" - -#include GALOIS_CXX11_STD_HEADER(atomic) -#include -#include -#include -#include - -static const char* name = "Betweenness Centrality"; -static const char* desc = 0; -static const char* url = 0; - -enum Algo { - async, - leveled -}; - -namespace cll = llvm::cl; -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric")); -static cll::opt startNode("startNode", cll::desc("Node to start search from"), cll::init(0)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::async, "async", "Async Algorithm"), - clEnumValN(Algo::leveled, "leveled", "Leveled Algorithm"), - clEnumValEnd), cll::init(Algo::async)); - -template -void initialize(Algo& algo, - typename Algo::Graph& graph, - typename Algo::Graph::GraphNode& source) { - - algo.readGraph(graph); - std::cout << "Read " << graph.size() << " nodes\n"; - - if (startNode >= graph.size()) { - std::cerr << "failed to set source: " << startNode << "\n"; - assert(0); - abort(); - } - - typename Algo::Graph::iterator it = graph.begin(); - std::advance(it, startNode); - source = *it; -} - -template -void readInOutGraph(Graph& graph) { - using namespace Galois::Graph; - if (symmetricGraph) { - Galois::Graph::readGraph(graph, filename); - } else if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, filename, transposeGraphName); - } else { - GALOIS_DIE("Graph type not supported"); - } -} - -static const int ChunkSize = 128; -static const int bfsChunkSize = 32+64; - -struct AsyncAlgo { - struct SNode { - float numPaths; - float dependencies; - int dist; - SNode() :numPaths(-std::numeric_limits::max()), dependencies(-std::numeric_limits::max()), dist(std::numeric_limits::max()) { } - }; - - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type InnerGraph; - typedef Galois::Graph::LC_InOut_Graph Graph; -//typedef Galois::Graph::LC_CSR_Graph Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "async"; } - - void readGraph(Graph& graph) { - readInOutGraph(graph); - } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - SNode& data = g.getData(n, Galois::MethodFlag::NONE); - data.numPaths = -std::numeric_limits::max(); - data.dependencies = -std::numeric_limits::max(); - data.dist = std::numeric_limits::max(); - } - }; - - struct BFS { - typedef int tt_does_not_need_aborts; - typedef std::pair WorkItem; - - struct Indexer: public std::unary_function { - int operator()(const WorkItem& val) const { - return val.second; - } - }; - - typedef Galois::WorkList::OrderedByIntegerMetric > OBIM; - - Graph& g; - BFS(Graph& g) :g(g) {} - - void operator()(WorkItem& item, Galois::UserContext& ctx) const { - GNode n = item.first; - int newDist = item.second; - if (newDist > g.getData(n).dist + 1) - return; - - for (Graph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), - ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - - int oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - break; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - ctx.push(WorkItem(dst, newDist + 1)); - break; - } - } - } - } - }; - - struct CountPaths { - typedef int tt_does_not_need_aborts; - - struct Indexer: public std::unary_function { - static Graph* g; - int operator()(const GNode& val) const { - // //use out edges as that signifies how many people will wait on this node - // auto ii = g->edge_begin(val, Galois::MethodFlag::NONE); - // auto ee = g->edge_end(val, Galois::MethodFlag::NONE); - // bool big = Galois::safe_advance(ii, ee, 10) != ee; - // return 2 * g->getData(val, Galois::MethodFlag::NONE).dist + (big ? 0 : 1); - return g->getData(val, Galois::MethodFlag::NONE).dist; - } - }; - - typedef Galois::WorkList::OrderedByIntegerMetric > OBIM; - - Graph& g; - CountPaths(Graph& g) :g(g) { Indexer::g = &g; } - - void operator()(GNode& n, Galois::UserContext& ctx) const { - SNode& sdata = g.getData(n, Galois::MethodFlag::NONE); - while (sdata.numPaths == -std::numeric_limits::max()) { - unsigned long np = 0; - bool allready = true; - for (Graph::in_edge_iterator ii = g.in_edge_begin(n, Galois::MethodFlag::NONE), - ee = g.in_edge_end(n, Galois::MethodFlag::NONE); ii != ee; ++ii) { - GNode dst = g.getInEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - if (ddata.dist + 1 == sdata.dist) { - if (ddata.numPaths != -std::numeric_limits::max()) { - np += ddata.numPaths; - } else { - allready = false; - // ctx.push(n); - // return; - } - } - } - if (allready) - sdata.numPaths = np; - } - } - }; - - struct ComputeDep { - typedef int tt_does_not_need_aborts; - - struct Indexer: public std::unary_function { - static Graph* g; - int operator()(const GNode& val) const { - // //use in edges as that signifies how many people will wait on this node - // auto ii = g->in_edge_begin(val, Galois::MethodFlag::NONE); - // auto ee = g->in_edge_end(val, Galois::MethodFlag::NONE); - // bool big = Galois::safe_advance(ii, ee, 10) != ee; - // return std::numeric_limits::max() - 2 * g->getData(val, Galois::MethodFlag::NONE).dist + (big ? 0 : 1); - return std::numeric_limits::max() - g->getData(val, Galois::MethodFlag::NONE).dist; - } - }; - - typedef Galois::WorkList::OrderedByIntegerMetric > OBIM; - - Graph& g; - ComputeDep(Graph& g) :g(g) { Indexer::g = &g; } - - void operator()(GNode& n, Galois::UserContext& ctx) const { - SNode& sdata = g.getData(n, Galois::MethodFlag::NONE); - while (sdata.dependencies == -std::numeric_limits::max()) { - float newDep = 0.0; - bool allready = true; - for (Graph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), - ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - if (ddata.dist == sdata.dist + 1) { - if (ddata.dependencies != -std::numeric_limits::max()) { - newDep += ((float)sdata.numPaths / (float)ddata.numPaths) * (1 + ddata.dependencies); - } else { - allready = false; - // ctx.push(n); - // return; - } - } - } - if (allready) - sdata.dependencies = newDep; - } - } - }; - - void operator()(Graph& graph, GNode source) { - Galois::StatTimer Tinit("InitTime"), Tlevel("LevelTime"), Tbfs("BFSTime"), Tcount("CountTime"), Tdep("DepTime"); - Tinit.start(); - Galois::do_all_local(graph, Initialize(graph), Galois::loopname("INIT")); - Tinit.stop(); - std::cout << "INIT DONE " << Tinit.get() << "\n"; - Tbfs.start(); - graph.getData(source).dist = 0; - //Galois::for_each(BFS::WorkItem(source, 1), BFS(graph), Galois::loopname("BFS"), Galois::wl()); - HybridBFS H; - H(graph,source); - Tbfs.stop(); - std::cout << "BFS DONE " << Tbfs.get() << "\n"; - Tcount.start(); - graph.getData(source).numPaths = 1; - Galois::for_each_local(graph, CountPaths(graph), Galois::loopname("COUNT"), Galois::wl()); - Tcount.stop(); - std::cout << "COUNT DONE " << Tcount.get() << "\n"; - Tdep.start(); - graph.getData(source).dependencies = 0.0; - Galois::for_each(graph.begin(), graph.end(), ComputeDep(graph), Galois::loopname("DEP"), Galois::wl()); - Tdep.stop(); - std::cout << "DEP DONE " << Tdep.get() << "\n"; - } -}; - -AsyncAlgo::Graph* AsyncAlgo::CountPaths::Indexer::g; -AsyncAlgo::Graph* AsyncAlgo::ComputeDep::Indexer::g; - -struct LeveledAlgo { - struct SNode { - std::atomic numPaths; - float dependencies; - std::atomic dist; - SNode() :numPaths(~0UL), dependencies(-std::numeric_limits::max()), dist(std::numeric_limits::max()) { } - }; - - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type InnerGraph; - typedef Galois::Graph::LC_InOut_Graph Graph; -//typedef Galois::Graph::LC_CSR_Graph Graph; - typedef Graph::GraphNode GNode; - typedef Galois::InsertBag Bag; - - std::string name() const { return "Leveled"; } - - void readGraph(Graph& graph) { - readInOutGraph(graph); - } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - SNode& data = g.getData(n, Galois::MethodFlag::NONE); - data.numPaths = 0; - data.dependencies = 0.0; //std::numeric_limits::lowest(); - data.dist = std::numeric_limits::max(); - } - }; - - //push based - template - struct BFS { - typedef int tt_does_not_need_aborts; - - Graph& g; - Bag& b; - BFS(Graph& g, Bag& b) :g(g), b(b) {} - - void operator()(GNode& n) const { - auto& sdata = g.getData(n, Galois::MethodFlag::NONE); - for (Graph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), - ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - if (ddata.dist.load(std::memory_order_relaxed) == std::numeric_limits::max()) { - if (std::numeric_limits::max() == ddata.dist.exchange(sdata.dist + 1)) - b.push_back(dst); - if (doCount) - ddata.numPaths = ddata.numPaths + sdata.numPaths; - } else if (ddata.dist == sdata.dist + 1) { - if (doCount) - ddata.numPaths = ddata.numPaths + sdata.numPaths; - } - } - // for (Graph::in_edge_iterator ii = g.in_edge_begin(n, Galois::MethodFlag::NONE), - // ee = g.in_edge_end(n, Galois::MethodFlag::NONE); ii != ee; ++ii) { - // GNode dst = g.getInEdgeDst(ii); - // SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - // if (ddata.dist + 1 == sdata.dist) - // sdata.numPaths += ddata.numPaths; - // } - } - }; - - //pull based - struct Counter { - typedef int tt_does_not_need_aborts; - - Graph& g; - Counter(Graph& g) :g(g) {} - - void operator()(GNode& n) const { - auto& sdata = g.getData(n, Galois::MethodFlag::NONE); - unsigned long np = 0; - for (Graph::in_edge_iterator ii = g.in_edge_begin(n, Galois::MethodFlag::NONE), - ee = g.in_edge_end(n, Galois::MethodFlag::NONE); ii != ee; ++ii) { - GNode dst = g.getInEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - if (ddata.dist + 1 == sdata.dist) - np += ddata.numPaths; - } - sdata.numPaths = sdata.numPaths + np; - } - }; - - //pull based - struct ComputeDep { - Graph& g; - ComputeDep(Graph& g) :g(g) {} - - void operator()(GNode& n) const { - SNode& sdata = g.getData(n, Galois::MethodFlag::NONE); - for (Graph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), - ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - SNode& ddata = g.getData(dst, Galois::MethodFlag::NONE); - if (ddata.dist == sdata.dist + 1) - sdata.dependencies += ((float)sdata.numPaths / (float)ddata.numPaths) * (1 + ddata.dependencies); - } - } - }; - - void operator()(Graph& graph, GNode source) { - Galois::StatTimer - Tinit("InitTime"), Tlevel("LevelTime"), Tbfs("BFSTime"), - Tcount("CountTime"), Tdep("DepTime"); - Tinit.start(); - Galois::do_all_local(graph, Initialize(graph), Galois::loopname("INIT")); - Tinit.stop(); - std::cout << "INIT DONE " << Tinit.get() << "\n"; - - Tbfs.start(); - std::deque levels; - levels.push_back(new Bag()); - levels[0]->push_back(source); - graph.getData(source).dist = 0; - graph.getData(source).numPaths = 1; - while (!levels.back()->empty()) { - Bag* b = levels.back(); - levels.push_back(new Bag()); - Galois::do_all_local(*b, BFS<>(graph, *levels.back()), Galois::loopname("BFS"), Galois::do_all_steal(true)); - //Galois::do_all_local(*levels.back(), Counter(graph), "COUNTER", true); - } - delete levels.back(); - levels.pop_back(); - Tbfs.stop(); - std::cout << "BFS DONE " << Tbfs.get() << " with " << levels.size() << " levels\n"; - - Tdep.start(); - for (int i = levels.size() - 1; i > 0; --i) - Galois::do_all_local(*levels[i-1], ComputeDep(graph), Galois::loopname("DEPS"), Galois::do_all_steal(true)); - Tdep.stop(); - std::cout << "DEP DONE " << Tdep.get() << "\n"; - while (!levels.empty()) { - delete levels.back(); - levels.pop_back(); - } - } -}; - - -template -void run() { - typedef typename Algo::Graph Graph; - typedef typename Graph::GraphNode GNode; - - Algo algo; - Graph graph; - GNode source; - - initialize(algo, graph, source); - - Galois::reportPageAlloc("MeminfoPre"); - Galois::preAlloc(numThreads + (3*graph.size() * sizeof(typename Graph::node_data_type)) / Galois::Runtime::MM::pageSize); - Galois::reportPageAlloc("MeminfoMid"); - - Galois::StatTimer T; - std::cout << "Running " << algo.name() << " version\n"; - T.start(); - algo(graph, source); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify) { - int count = 0; - for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei && count < 20; ++ii, ++count) { - std::cout << count << ": " - << std::setiosflags(std::ios::fixed) << std::setprecision(6) - << graph.getData(*ii).dependencies - << " " << graph.getData(*ii).numPaths - << " " << graph.getData(*ii).dist - << "\n"; - } - count = 0; - // for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii, ++count) - // std::cout << ((count % 128 == 0) ? "\n" : " ") << graph.getData(*ii).numPaths; - std::cout << "\n"; - } -} - -int main(int argc, char **argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::async: run(); break; - case Algo::leveled: run(); break; - } - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityOuter.cpp b/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityOuter.cpp deleted file mode 100644 index 8de7901..0000000 --- a/maxflow/galois/apps/betweennesscentrality/BetweennessCentralityOuter.cpp +++ /dev/null @@ -1,277 +0,0 @@ -/** Betweenness centrality application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Dimitrios Prountzos - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Statistic.h" -#include "Galois/UserContext.h" -#include "Galois/Graph/LCGraph.h" - -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -static const char* name = "Betweenness Centrality"; -static const char* desc = "Computes the betweenness centrality of all nodes in a graph"; -static const char* url = "betweenness_centrality"; - -static llvm::cl::opt filename(llvm::cl::Positional, llvm::cl::desc(""), llvm::cl::Required); -static llvm::cl::opt iterLimit("limit", llvm::cl::desc("Limit number of iterations to value (0 is all nodes)"), llvm::cl::init(0)); -static llvm::cl::opt startNode("startNode", llvm::cl::desc("Node to start search from"), llvm::cl::init(0)); -static llvm::cl::opt forceVerify("forceVerify", llvm::cl::desc("Abort if not verified, only makes sense for torus graphs")); -static llvm::cl::opt printAll("printAll", llvm::cl::desc("Print betweenness values for all nodes")); - -typedef Galois::Graph::LC_CSR_Graph::with_no_lockable::type - ::with_numa_alloc::type Graph; -typedef Graph::GraphNode GNode; - -Graph* G; -int NumNodes; - -Galois::Runtime::PerThreadStorage CB; -Galois::Runtime::PerThreadStorage perThreadSigma; -Galois::Runtime::PerThreadStorage perThreadD; -Galois::Runtime::PerThreadStorage perThreadDelta; -Galois::Runtime::PerThreadStorage*> perThreadSucc; - -template -struct PerIt { - typedef typename Galois::PerIterAllocTy::rebind::other Ty; -}; - -struct process { - typedef int tt_does_not_need_aborts; - //typedef int tt_needs_per_iter_alloc; - typedef int tt_does_not_need_push; - - void operator()(GNode& _req, Galois::UserContext& lwl) { - Galois::gdeque SQ; - double* sigma = *perThreadSigma.getLocal(); - int* d = *perThreadD.getLocal(); - double* delta = *perThreadDelta.getLocal(); - Galois::gdeque* succ = *perThreadSucc.getLocal(); - -#if 0 - std::deque::Ty> sigma(NumNodes, 0.0, lwl.getPerIterAlloc()); - std::deque::Ty> d(NumNodes, 0, lwl.getPerIterAlloc()); - std::deque::Ty> delta(NumNodes, 0.0, lwl.getPerIterAlloc()); - std::deque::Ty> succ(NumNodes, GNdeque(lwl.getPerIterAlloc()), lwl.getPerIterAlloc()); -#endif - unsigned int QAt = 0; - - int req = _req; - - sigma[req] = 1; - d[req] = 1; - - SQ.push_back(_req); - for (auto qq = SQ.begin(), eq = SQ.end(); qq != eq; ++qq) { - GNode _v = *qq; - int v = _v; - for (Graph::edge_iterator - ii = G->edge_begin(_v, Galois::MethodFlag::NONE), - ee = G->edge_end(_v, Galois::MethodFlag::NONE); ii != ee; ++ii) { - GNode _w = G->getEdgeDst(ii); - int w = _w; - if (!d[w]) { - SQ.push_back(_w); - d[w] = d[v] + 1; - } - if (d[w] == d[v] + 1) { - sigma[w] = sigma[w] + sigma[v]; - succ[v].push_back(w); - } - } - } - while (SQ.size() > 1) { - int w = SQ.back(); - SQ.pop_back(); - - double sigma_w = sigma[w]; - double delta_w = delta[w]; - auto& slist = succ[w]; - for (auto ii = slist.begin(), ee = slist.end(); ii != ee; ++ii) { - //std::cerr << "Processing node " << w << std::endl; - GNode v = *ii; - delta_w += (sigma_w/sigma[v])*(1.0 + delta[v]); - } - delta[w] = delta_w; - } - double* Vec = *CB.getLocal(); - for (unsigned int i = 0; i < NumNodes; ++i) { - Vec[i] += delta[i]; - delta[i] = 0; - sigma[i] = 0; - d[i] = 0; - succ[i].clear(); - } - } -}; - -// Verification for reference torus graph inputs. -// All nodes should have the same betweenness value. -void verify() { - double sampleBC = 0.0; - bool firstTime = true; - for (int i=0; i { - Graph* graph; - HasOut(Graph* g): graph(g) { } - bool operator()(const GNode& n) const { - return graph->edge_begin(n) != graph->edge_end(n); - } -}; - -struct InitializeLocal { - template - void initArray(T** addr) { - T* a = new T[NumNodes](); - *addr = a; - } - void operator()(unsigned, unsigned) { - initArray(CB.getLocal()); - initArray(perThreadSigma.getLocal()); - initArray(perThreadD.getLocal()); - initArray(perThreadDelta.getLocal()); - initArray(perThreadSucc.getLocal()); - } -}; - -struct DeleteLocal { - template - void deleteArray(T** addr) { - delete [] *addr; - } - void operator()(unsigned, unsigned) { - deleteArray(CB.getLocal()); - deleteArray(perThreadSigma.getLocal()); - deleteArray(perThreadD.getLocal()); - deleteArray(perThreadDelta.getLocal()); - deleteArray(perThreadSucc.getLocal()); - } -}; - -int main(int argc, char** argv) { - Galois::StatManager M; - LonestarStart(argc, argv, name, desc, url); - - Graph g; - G = &g; - Galois::Graph::readGraph(*G, filename); - - NumNodes = G->size(); - - Galois::on_each(InitializeLocal()); - - Galois::reportPageAlloc("MeminfoPre"); - Galois::preAlloc(numThreads * NumNodes / 1650); - Galois::reportPageAlloc("MeminfoMid"); - - boost::filter_iterator - begin = boost::make_filter_iterator(HasOut(G), g.begin(), g.end()), - end = boost::make_filter_iterator(HasOut(G), g.end(), g.end()); - - boost::filter_iterator begin2 = - iterLimit ? Galois::safe_advance(begin, end, (int)iterLimit) : end; - - size_t iterations = std::distance(begin, begin2); - - std::vector v(begin, begin2); - - std::cout - << "NumNodes: " << NumNodes - << " Start Node: " << startNode - << " Iterations: " << iterations << "\n"; - - typedef Galois::WorkList::StableIterator< std::vector::iterator, true> WLL; - Galois::StatTimer T; - T.start(); - Galois::for_each(v.begin(), v.end(), process(), Galois::wl()); - T.stop(); - - if (!skipVerify) { - for (int i=0; i<10; ++i) { - double bc = (*CB.getRemote(0))[i]; - for (unsigned int j = 1; j < Galois::getActiveThreads(); ++j) - bc += (*CB.getRemote(j))[i]; - std::cout << i << ": " - << std::setiosflags(std::ios::fixed) << std::setprecision(6) << bc << "\n"; - } - } - if (printAll) - printBCcertificate(); - - Galois::reportPageAlloc("MeminfoPost"); - - if (forceVerify || !skipVerify) { - verify(); - } - - // XXX(ddn): Could use unique_ptr but not supported on all our platforms :( - Galois::on_each(DeleteLocal()); - - return 0; -} diff --git a/maxflow/galois/apps/betweennesscentrality/CMakeLists.txt b/maxflow/galois/apps/betweennesscentrality/CMakeLists.txt deleted file mode 100644 index 9e67b4c..0000000 --- a/maxflow/galois/apps/betweennesscentrality/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -app(betweennesscentrality-outer BetweennessCentralityOuter.cpp) -include_directories(../bfs) -app(betweennesscentrality-inner BetweennessCentralityInner.cpp) diff --git a/maxflow/galois/apps/bfs/BFS.h b/maxflow/galois/apps/bfs/BFS.h deleted file mode 100644 index 1c3db8e..0000000 --- a/maxflow/galois/apps/bfs/BFS.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef APPS_BFS_BFS_H -#define APPS_BFS_BFS_H - -#include "llvm/Support/CommandLine.h" - -typedef unsigned int Dist; -static const Dist DIST_INFINITY = std::numeric_limits::max() - 1; - -//! Standard data type on nodes -struct SNode { - Dist dist; -}; - -template -void readInOutGraph(Graph& graph); - -extern llvm::cl::opt memoryLimit; - -#endif diff --git a/maxflow/galois/apps/bfs/CMakeLists.txt b/maxflow/galois/apps/bfs/CMakeLists.txt deleted file mode 100644 index 9f19fd5..0000000 --- a/maxflow/galois/apps/bfs/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -if(USE_EXP) - include_directories(../../exp/apps/bfs .) -endif() -app(bfs bfs.cpp) -if(NOT CMAKE_CXX_COMPILER_ID MATCHES "XL") - app(diameter Diameter.cpp) -endif() diff --git a/maxflow/galois/apps/bfs/Diameter.cpp b/maxflow/galois/apps/bfs/Diameter.cpp deleted file mode 100644 index 4aa4419..0000000 --- a/maxflow/galois/apps/bfs/Diameter.cpp +++ /dev/null @@ -1,464 +0,0 @@ -/** Computing/Estimating diameter of a graph -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Algorithms for estimating the diameter (longest shortest path) of a graph. - * - * @author Donald Nguyen - */ -#include "Galois/config.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Galois.h" -#include "Galois/Statistic.h" -#include "Galois/Timer.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include GALOIS_CXX11_STD_HEADER(random) -#include -#include -#include -#include - -#include "HybridBFS.h" -#ifdef GALOIS_USE_EXP -#include "LigraAlgo.h" -#include "GraphLabAlgo.h" -#endif -#include "BFS.h" - -static const char* name = "Diameter Estimation"; -static const char* desc = "Estimates the diameter of a graph"; -static const char* url = 0; - -//****** Command Line Options ****** -enum Algo { - graphlab, - ligra, - ligraChi, - pickK, - simple -}; - -namespace cll = llvm::cl; -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric")); -static cll::opt startNode("startNode", cll::desc("Node to start search from"), cll::init(0)); -static cll::opt numCandidates("numCandidates", cll::desc("Number of candidates to use for pickK algorithm"), cll::init(5)); -cll::opt memoryLimit("memoryLimit", - cll::desc("Memory limit for out-of-core algorithms (in MB)"), cll::init(~0U)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::simple, "simple", "Simple pseudo-peripheral algorithm (default)"), - clEnumValN(Algo::pickK, "pickK", "Pick K candidates"), -#ifdef USE_EXP - clEnumValN(Algo::ligra, "ligra", "Use Ligra programming model"), - clEnumValN(Algo::ligraChi, "ligraChi", "Use Ligra and GraphChi programming model"), - clEnumValN(Algo::graphlab, "graphlab", "Use GraphLab programming model"), -#endif - clEnumValEnd), cll::init(Algo::simple)); - -template -struct min_degree { - typedef typename Graph::GraphNode GNode; - Graph& graph; - min_degree(Graph& g): graph(g) { } - - Galois::optional operator()(const Galois::optional& a, const Galois::optional& b) const { - if (!a) return b; - if (!b) return a; - if (std::distance(graph.edge_begin(*a), graph.edge_end(*a)) - < std::distance(graph.edge_begin(*b), graph.edge_end(*b))) - return a; - else - return b; - } -}; - -template -struct order_by_degree { - typedef typename Graph::GraphNode GNode; - Graph& graph; - order_by_degree(Graph& g): graph(g) { } - - bool operator()(const GNode& a, const GNode& b) const { - return std::distance(graph.edge_begin(a), graph.edge_end(a)) - < std::distance(graph.edge_begin(b), graph.edge_end(b)); - } -}; - -//! Collect nodes with dist == d -template -struct collect_nodes_with_dist { - typedef typename Graph::GraphNode GNode; - Graph& graph; - Galois::InsertBag& bag; - Dist dist; - collect_nodes_with_dist(Graph& g, Galois::InsertBag& b, Dist d): graph(g), bag(b), dist(d) { } - - void operator()(const GNode& n) { - if (graph.getData(n).dist == dist) - bag.push(n); - } -}; - -template -struct has_dist { - typedef typename Graph::GraphNode GNode; - Graph& graph; - Dist dist; - has_dist(Graph& g, Dist d): graph(g), dist(d) { } - Galois::optional operator()(const GNode& a) const { - if (graph.getData(a).dist == dist) - return Galois::optional(a); - return Galois::optional(); - } -}; - -template -struct CountLevels { - Graph& graph; - std::deque counts; - - CountLevels(Graph& g): graph(g) { } - - void operator()(typename Graph::GraphNode n) { - Dist d = graph.getData(n).dist; - if (d == DIST_INFINITY) - return; - if (counts.size() <= d) - counts.resize(d + 1); - ++counts[d]; - } - - // Reduce function - template - void operator()(CountLevels& a, CountLevels& b) { - if (a.counts.size() < b.counts.size()) - a.counts.resize(b.counts.size()); - std::transform(b.counts.begin(), b.counts.end(), a.counts.begin(), a.counts.begin(), std::plus()); - } - - std::deque count() { - return Galois::Runtime::do_all_impl(Galois::Runtime::makeLocalRange(graph), *this, *this).counts; - } -}; - -template -void resetGraph(typename Algo::Graph& g) { - Galois::do_all_local(g, typename Algo::Initialize(g)); -} - -template -void readInOutGraph(Graph& graph) { - using namespace Galois::Graph; - if (symmetricGraph) { - Galois::Graph::readGraph(graph, filename); - } else if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, filename, transposeGraphName); - } else { - GALOIS_DIE("Graph type not supported"); - } -} - -/** - * The eccentricity of vertex v, ecc(v), is the greatest distance from v to any vertex. - * A peripheral vertex v is one whose distance from some other vertex u is the - * diameter of the graph: \exists u : dist(v, u) = D. A pseudo-peripheral vertex is a - * vertex v that satisfies: \forall u : dist(v, u) = ecc(v) ==> ecc(v) = ecc(u). - * - * Simple pseudo-peripheral algorithm: - * 1. Choose v - * 2. Among the vertices dist(v, u) = ecc(v), select u with minimal degree - * 3. If ecc(u) > ecc(v) then - * v = u and go to step 2 - * otherwise - * u is a pseudo-peripheral vertex - */ -struct SimpleAlgo { - typedef HybridBFS BFS; - typedef BFS::Graph Graph; - typedef Graph::GraphNode GNode; - typedef std::pair Result; - - void readGraph(Graph& graph) { readInOutGraph(graph); } - - struct Initialize { - Graph& graph; - Initialize(Graph& g): graph(g) { } - void operator()(GNode n) { - graph.getData(n).dist = DIST_INFINITY; - } - }; - - Result search(Graph& graph, GNode start) { - BFS bfs; - - bfs(graph, start); - CountLevels cl(graph); - std::deque counts = cl.count(); - - size_t ecc = counts.size() - 1; - //size_t maxWidth = *std::max_element(counts.begin(), counts.end()); - GNode candidate = *Galois::ParallelSTL::map_reduce(graph.begin(), graph.end(), - has_dist(graph, ecc), Galois::optional(), min_degree(graph)); - resetGraph(graph); - return Result(ecc, candidate); - } - - size_t operator()(Graph& graph, GNode source) { - Result v = search(graph, source); - while (true) { - Result u = search(graph, v.second); - std::cout << "ecc(v) = " << v.first << " ecc(u) = " << u.first << "\n"; - bool better = u.first > v.first; - if (!better) - break; - v = u; - } - return v.first; - } -}; - -/** - * A more complicated pseudo-peripheral algorithm. Designed for finding pairs - * of nodes with small maximum width between them, which is useful for matrix - * reordering. Include it here for completeness. - * - * Let the width of vertex v be the maximum number of nodes with the same - * distance from v. - * - * Unlike the simple one, instead of picking a minimal degree candidate u, - * select among some number of candidates U. Here, we select the top n - * lowest degree nodes who do not share neighborhoods. - * - * If there exists a vertex u such that ecc(u) > ecc(v) proceed as in the - * simple algorithm. - * - * Otherwise, select the u that has least maximum width. - */ -struct PickKAlgo { - struct LNode: public SNode { - bool done; - }; - - typedef HybridBFS BFS; - typedef BFS::Graph Graph; - typedef Graph::GraphNode GNode; - - void readGraph(Graph& graph) { readInOutGraph(graph); } - - struct Initialize { - Graph& graph; - Initialize(Graph& g): graph(g) { } - void operator()(GNode n) { - graph.getData(n).dist = DIST_INFINITY; - graph.getData(n).done = false; - } - }; - - std::deque select(Graph& graph, unsigned topn, size_t dist) { - Galois::InsertBag bag; - Galois::do_all_local(graph, collect_nodes_with_dist(graph, bag, dist)); - - // Incrementally sort nodes until we find least N who are not neighbors - // of each other - std::deque nodes; - std::deque result; - std::copy(bag.begin(), bag.end(), std::back_inserter(nodes)); - size_t cur = 0; - size_t size = nodes.size(); - size_t delta = topn * 5; - - for (std::deque::iterator ii = nodes.begin(), ei = nodes.end(); ii != ei; ) { - std::deque::iterator mi = ii; - if (cur + delta < size) { - std::advance(mi, delta); - cur += delta; - } else { - mi = ei; - cur = size; - } - - std::partial_sort(ii, mi, ei, order_by_degree(graph)); - - for (std::deque::iterator jj = ii; jj != mi; ++jj) { - GNode n = *jj; - - // Ignore marked neighbors - if (graph.getData(n).done) - continue; - - result.push_back(n); - - if (result.size() == topn) { - return result; - } - - // Mark neighbors - for (Graph::edge_iterator nn = graph.edge_begin(n), en = graph.edge_end(n); nn != en; ++nn) - graph.getData(graph.getEdgeDst(nn)).done = true; - } - - ii = mi; - } - - return result; - } - - struct Result { - GNode source; - std::deque candidates; - size_t maxWidth; - size_t ecc; - }; - - Result search(Graph& graph, const GNode& start, size_t limit, bool computeCandidates) { - BFS bfs; - Result res; - - bfs(graph, start); - CountLevels cl(graph); - std::deque counts = cl.count(); - - res.source = start; - res.ecc = counts.size() - 1; - res.maxWidth = *std::max_element(counts.begin(), counts.end()); - - if (limit == static_cast(-1) || res.maxWidth < limit) { - if (computeCandidates) - res.candidates = select(graph, numCandidates, res.ecc); - } - - resetGraph(graph); - return res; - } - - size_t operator()(Graph& graph, GNode source) { - Galois::optional terminal; - - Result v = search(graph, source, ~0, true); - - while (true) { - std::cout - << "(ecc(v), max_width) =" - << " (" << v.ecc << ", " << v.maxWidth << ")" - << " (ecc(u), max_width(u)) ="; - - size_t last = ~0; - for (auto ii = v.candidates.begin(), ei = v.candidates.end(); ii != ei; ++ii) { - Result u = search(graph, *ii, last, false); - - std::cout << " (" << u.ecc << ", " << u.maxWidth << ")"; - - if (u.maxWidth >= last) { - continue; - } else if (u.ecc > v.ecc) { - v = u; - terminal = Galois::optional(); - break; - } else if (u.maxWidth < last) { - last = u.maxWidth; - terminal = Galois::optional(u.ecc); - } - } - - std::cout << "\n"; - - if (terminal) - break; - v = search(graph, v.source, ~0, true); - } - - return *terminal; - } -}; - -template -void initialize(Algo& algo, - typename Algo::Graph& graph, - typename Algo::Graph::GraphNode& source) { - - algo.readGraph(graph); - std::cout << "Read " << graph.size() << " nodes\n"; - - if (startNode >= graph.size()) { - std::cerr - << "failed to set source: " << startNode << "\n"; - assert(0); - abort(); - } - - typename Algo::Graph::iterator it = graph.begin(); - std::advance(it, startNode); - source = *it; -} - - -template -void run() { - typedef typename Algo::Graph Graph; - typedef typename Graph::GraphNode GNode; - - Algo algo; - Graph graph; - GNode source; - - initialize(algo, graph, source); - - //Galois::preAlloc((numThreads + (graph.size() * sizeof(SNode) * 2) / Galois::Runtime::MM::pageSize)*8); - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T; - T.start(); - resetGraph(graph); - size_t diameter = algo(graph, source); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - std::cout << "Estimated diameter: " << diameter << "\n"; -} - -int main(int argc, char **argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::simple: run(); break; - case Algo::pickK: run(); break; -#ifdef GALOIS_USE_EXP - case Algo::ligra: run >(); break; - case Algo::ligraChi: run >(); break; - case Algo::graphlab: run >(); break; -#endif - default: std::cerr << "Unknown algorithm\n"; abort(); - } - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/bfs/HybridBFS.h b/maxflow/galois/apps/bfs/HybridBFS.h deleted file mode 100644 index 07fa415..0000000 --- a/maxflow/galois/apps/bfs/HybridBFS.h +++ /dev/null @@ -1,188 +0,0 @@ -#ifndef APPS_BFS_HYBRIDBFS_H -#define APPS_BFS_HYBRIDBFS_H - -#include "Galois/Galois.h" -#include "Galois/Graph/LCGraph.h" - -template -struct HybridBFS { - typedef typename Galois::Graph::LC_CSR_Graph - ::template with_no_lockable::type - ::template with_numa_alloc::type - InnerGraph; - typedef typename Galois::Graph::LC_InOut_Graph Graph; - typedef typename Graph::GraphNode GNode; - - typedef std::pair WorkItem; - typedef Galois::InsertBag NodeBag; - typedef Galois::InsertBag WorkItemBag; - - Galois::GAccumulator count; - NodeBag bags[2]; - - struct ForwardProcess { - typedef int tt_does_not_need_aborts; - - Graph& graph; - HybridBFS* self; - NodeBag* nextBag; - Dist newDist; - - ForwardProcess(Graph& g, HybridBFS* s, NodeBag* n = 0, int d = 0): - graph(g), self(s), nextBag(n), newDist(d) { } - - void operator()(const GNode& n, Galois::UserContext&) { - for (typename Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - processBS(ii, newDist, *nextBag); - } - } - - void operator()(const typename Graph::edge_iterator& ii, Galois::UserContext&) { - processBS(ii, newDist, *nextBag); - } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) { - GNode n = item.first; - for (typename Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - processAsync(ii, item.second, ctx); - } - } - - void processBS(const typename Graph::edge_iterator& ii, Dist nextDist, NodeBag& next) { - GNode dst = graph.getEdgeDst(ii); - NodeData& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= nextDist) - return; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, nextDist)) { - next.push(dst); - self->count += 1 - + std::distance(graph.edge_begin(dst, Galois::MethodFlag::NONE), - graph.edge_end(dst, Galois::MethodFlag::NONE)); - break; - } - } - } - - void processAsync(const typename Graph::edge_iterator& ii, Dist nextDist, Galois::UserContext& next) { - GNode dst = graph.getEdgeDst(ii); - NodeData& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= nextDist) - return; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, nextDist)) { - next.push(WorkItem(dst, nextDist + 1)); - break; - } - } - } - }; - - struct BackwardProcess { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - Graph& graph; - HybridBFS* self; - NodeBag* nextBag; - Dist newDist; - BackwardProcess(Graph& g, HybridBFS* s, NodeBag* n, int d): graph(g), self(s), nextBag(n), newDist(d) { } - - void operator()(const GNode& n, Galois::UserContext&) { - (*this)(n); - } - - void operator()(const GNode& n) { - NodeData& sdata = graph.getData(n, Galois::MethodFlag::NONE); - if (sdata.dist <= newDist) - return; - - for (typename Graph::in_edge_iterator ii = graph.in_edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.in_edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getInEdgeDst(ii); - NodeData& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (ddata.dist + 1 == newDist) { - sdata.dist = newDist; - nextBag->push(n); - self->count += 1 - + std::distance(graph.edge_begin(n, Galois::MethodFlag::NONE), - graph.edge_end(n, Galois::MethodFlag::NONE)); - break; - } - } - } - }; - - struct PopulateAsync { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - WorkItemBag& bag; - Dist newDist; - PopulateAsync(WorkItemBag& b, Dist d): bag(b), newDist(d) { } - void operator()(const GNode& n, Galois::UserContext&) { - (*this)(n); - } - - void operator()(const GNode& n) { - bag.push(WorkItem(n, newDist)); - } - }; - - void operator()(Graph& graph, const GNode& source) { - using namespace Galois::WorkList; - typedef dChunkedLIFO<256> WL; - typedef BulkSynchronous > BSWL; - - int next = 0; - Dist newDist = 1; - int numForward = 0; - int numBackward = 0; - - graph.getData(source).dist = 0; - if (std::distance(graph.edge_begin(source), graph.edge_end(source)) + 1 > (long) graph.sizeEdges() / 20) { - Galois::do_all_local(graph, BackwardProcess(graph, this, &bags[next], newDist)); - numBackward += 1; - } else { - Galois::for_each(graph.out_edges(source, Galois::MethodFlag::NONE).begin(), - graph.out_edges(source, Galois::MethodFlag::NONE).end(), - ForwardProcess(graph, this, &bags[next], newDist)); - numForward += 1; - } - - while (!bags[next].empty()) { - size_t nextSize = count.reduce(); - count.reset(); - int cur = next; - next = (cur + 1) & 1; - newDist++; - if (nextSize > graph.sizeEdges() / 20) { - //std::cout << "Dense " << nextSize << "\n"; - Galois::do_all_local(graph, BackwardProcess(graph, this, &bags[next], newDist)); - numBackward += 1; - } else if (numForward < 10 && numBackward == 0) { - //std::cout << "Sparse " << nextSize << "\n"; - Galois::for_each_local(bags[cur], ForwardProcess(graph, this, &bags[next], newDist), Galois::wl()); - numForward += 1; - } else { - //std::cout << "Async " << nextSize << "\n"; - WorkItemBag asyncBag; - Galois::for_each_local(bags[cur], PopulateAsync(asyncBag, newDist), Galois::wl()); - Galois::for_each_local(asyncBag, ForwardProcess(graph, this), Galois::wl()); - break; - } - bags[cur].clear(); - } - } -}; - -#endif diff --git a/maxflow/galois/apps/bfs/bfs.cpp b/maxflow/galois/apps/bfs/bfs.cpp deleted file mode 100644 index 7d8c784..0000000 --- a/maxflow/galois/apps/bfs/bfs.cpp +++ /dev/null @@ -1,718 +0,0 @@ -/** Breadth-first search -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Breadth-first search. - * - * @author Andrew Lenharth - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/Timer.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/TypeTraits.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#ifdef GALOIS_USE_EXP -#include "Galois/Runtime/ParallelWorkInline.h" -#endif -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include -#include -#include - -#include "HybridBFS.h" -#ifdef GALOIS_USE_EXP -#include "LigraAlgo.h" -#include "GraphLabAlgo.h" -#endif -#include "BFS.h" - -static const char* name = "Breadth-first Search"; -static const char* desc = - "Computes the shortest path from a source node to all nodes in a directed " - "graph using a modified Bellman-Ford algorithm"; -static const char* url = "breadth_first_search"; - -//****** Command Line Options ****** -enum Algo { - async, - barrier, - barrierWithCas, - barrierWithInline, - deterministic, - deterministicDisjoint, - graphlab, - highCentrality, - hybrid, - ligra, - ligraChi, - serial -}; - -enum DetAlgo { - none, - base, - disjoint -}; - -namespace cll = llvm::cl; -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric")); -static cll::opt useDetBase("detBase", cll::desc("Deterministic")); -static cll::opt useDetDisjoint("detDisjoint", cll::desc("Deterministic with disjoint optimization")); -static cll::opt startNode("startNode", cll::desc("Node to start search from"), cll::init(0)); -static cll::opt reportNode("reportNode", cll::desc("Node to report distance to"), cll::init(1)); -cll::opt memoryLimit("memoryLimit", - cll::desc("Memory limit for out-of-core algorithms (in MB)"), cll::init(~0U)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::async, "async", "Asynchronous"), - clEnumValN(Algo::barrier, "barrier", "Parallel optimized with barrier (default)"), - clEnumValN(Algo::barrierWithCas, "barrierWithCas", "Use compare-and-swap to update nodes"), - clEnumValN(Algo::deterministic, "detBase", "Deterministic"), - clEnumValN(Algo::deterministicDisjoint, "detDisjoint", "Deterministic with disjoint optimization"), - clEnumValN(Algo::highCentrality, "highCentrality", "Optimization for graphs with many shortest paths"), - clEnumValN(Algo::hybrid, "hybrid", "Hybrid of barrier and high centrality algorithms"), - clEnumValN(Algo::serial, "serial", "Serial"), -#ifdef GALOIS_USE_EXP - clEnumValN(Algo::barrierWithInline, "barrierWithInline", "Optimized with inlined workset"), - clEnumValN(Algo::graphlab, "graphlab", "Use GraphLab programming model"), - clEnumValN(Algo::ligraChi, "ligraChi", "Use Ligra and GraphChi programming model"), - clEnumValN(Algo::ligra, "ligra", "Use Ligra programming model"), -#endif - clEnumValEnd), cll::init(Algo::barrier)); - -template -struct not_consistent { - not_consistent(Graph& g) { } - - bool operator()(typename Graph::GraphNode n) const { return false; } -}; - -template -struct not_consistent::value>::type> { - Graph& g; - not_consistent(Graph& g): g(g) { } - - bool operator()(typename Graph::GraphNode n) const { - Dist dist = g.getData(n).dist; - if (dist == DIST_INFINITY) - return false; - - for (typename Graph::edge_iterator ii = g.edge_begin(n), ee = g.edge_end(n); ii != ee; ++ii) { - Dist ddist = g.getData(g.getEdgeDst(ii)).dist; - if (ddist > dist + 1) { - return true; - } - } - return false; - } -}; - -template -struct not_visited { - Graph& g; - - not_visited(Graph& g): g(g) { } - - bool operator()(typename Graph::GraphNode n) const { - return g.getData(n).dist >= DIST_INFINITY; - } -}; - -template -struct max_dist { - Graph& g; - Galois::GReduceMax& m; - - max_dist(Graph& g, Galois::GReduceMax& m): g(g), m(m) { } - - void operator()(typename Graph::GraphNode n) const { - Dist d = g.getData(n).dist; - if (d == DIST_INFINITY) - return; - m.update(d); - } -}; - -template -bool verify(Graph& graph, typename Graph::GraphNode source) { - if (graph.getData(source).dist != 0) { - std::cerr << "source has non-zero dist value\n"; - return false; - } - namespace pstl = Galois::ParallelSTL; - - size_t notVisited = pstl::count_if(graph.begin(), graph.end(), not_visited(graph)); - if (notVisited) { - std::cerr << notVisited << " unvisited nodes; this is an error if the graph is strongly connected\n"; - } - - bool consistent = pstl::find_if(graph.begin(), graph.end(), not_consistent(graph)) == graph.end(); - if (!consistent) { - std::cerr << "node found with incorrect distance\n"; - return false; - } - - Galois::GReduceMax m; - Galois::do_all(graph.begin(), graph.end(), max_dist(graph, m)); - std::cout << "max dist: " << m.reduce() << "\n"; - - return true; -} - -template -struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(typename Graph::GraphNode n) { - g.getData(n).dist = DIST_INFINITY; - } -}; - -template -void initialize(Algo& algo, - typename Algo::Graph& graph, - typename Algo::Graph::GraphNode& source, - typename Algo::Graph::GraphNode& report) { - - algo.readGraph(graph); - std::cout << "Read " << graph.size() << " nodes\n"; - - if (startNode >= graph.size() || reportNode >= graph.size()) { - std::cerr - << "failed to set report: " << reportNode - << "or failed to set source: " << startNode << "\n"; - assert(0); - abort(); - } - - typename Algo::Graph::iterator it = graph.begin(); - std::advance(it, startNode); - source = *it; - it = graph.begin(); - std::advance(it, reportNode); - report = *it; -} - -template -void readInOutGraph(Graph& graph) { - using namespace Galois::Graph; - if (symmetricGraph) { - Galois::Graph::readGraph(graph, filename); - } else if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, filename, transposeGraphName); - } else { - GALOIS_DIE("Graph type not supported"); - } -} - -//! Serial BFS using optimized flags based off asynchronous algo -struct SerialAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "Serial"; } - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - void operator()(Graph& graph, const GNode source) const { - std::deque wl; - graph.getData(source).dist = 0; - wl.push_back(source); - - while (!wl.empty()) { - GNode n = wl.front(); - wl.pop_front(); - - SNode& data = graph.getData(n, Galois::MethodFlag::NONE); - - Dist newDist = data.dist + 1; - - for (Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (newDist < ddata.dist) { - ddata.dist = newDist; - wl.push_back(dst); - } - } - } - } -}; - -//! Galois BFS using optimized flags -struct AsyncAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "Asynchronous"; } - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - typedef std::pair WorkItem; - - struct Indexer: public std::unary_function { - Dist operator()(const WorkItem& val) const { - return val.second; - } - }; - - struct Process { - typedef int tt_does_not_need_aborts; - - Graph& graph; - Process(Graph& g): graph(g) { } - - void operator()(WorkItem& item, Galois::UserContext& ctx) const { - GNode n = item.first; - - Dist newDist = item.second; - - for (Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - break; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - ctx.push(WorkItem(dst, newDist + 1)); - break; - } - } - } - } - }; - - void operator()(Graph& graph, const GNode& source) const { - using namespace Galois::WorkList; - typedef dChunkedFIFO<64> dChunk; - //typedef ChunkedFIFO<64> Chunk; - typedef OrderedByIntegerMetric OBIM; - - graph.getData(source).dist = 0; - - Galois::for_each(WorkItem(source, 1), Process(graph), Galois::wl()); - } -}; - -/** - * Alternate between processing outgoing edges or incoming edges. Best for - * graphs that have many redundant shortest paths. - * - * S. Beamer, K. Asanovic and D. Patterson. Direction-optimizing breadth-first - * search. In Supercomputing. 2012. - */ -struct HighCentralityAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type InnerGraph; - typedef Galois::Graph::LC_InOut_Graph Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "High Centrality"; } - - void readGraph(Graph& graph) { readInOutGraph(graph); } - - struct CountingBag { - Galois::InsertBag wl; - Galois::GAccumulator count; - - void clear() { - wl.clear(); - count.reset(); - } - - bool empty() { return wl.empty(); } - size_t size() { return count.reduce(); } - }; - - CountingBag bags[2]; - - struct ForwardProcess { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - Graph& graph; - CountingBag* next; - Dist newDist; - ForwardProcess(Graph& g, CountingBag* n, int d): graph(g), next(n), newDist(d) { } - - void operator()(const GNode& n, Galois::UserContext&) { - (*this)(n); - } - - void operator()(const Graph::edge_iterator& it, Galois::UserContext&) { - (*this)(it); - } - - void operator()(const Graph::edge_iterator& ii) { - GNode dst = graph.getEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - return; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - next->wl.push(dst); - next->count += 1 - + std::distance(graph.edge_begin(dst, Galois::MethodFlag::NONE), - graph.edge_end(dst, Galois::MethodFlag::NONE)); - break; - } - } - } - - void operator()(const GNode& n) { - for (Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - (*this)(ii); - } - } - }; - - struct BackwardProcess { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - Graph& graph; - CountingBag* next; - Dist newDist; - BackwardProcess(Graph& g, CountingBag* n, int d): graph(g), next(n), newDist(d) { } - - void operator()(const GNode& n, Galois::UserContext&) { - (*this)(n); - } - - void operator()(const GNode& n) { - SNode& sdata = graph.getData(n, Galois::MethodFlag::NONE); - if (sdata.dist <= newDist) - return; - - for (Graph::in_edge_iterator ii = graph.in_edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.in_edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getInEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (ddata.dist + 1 == newDist) { - sdata.dist = newDist; - next->wl.push(n); - next->count += 1 - + std::distance(graph.edge_begin(n, Galois::MethodFlag::NONE), - graph.edge_end(n, Galois::MethodFlag::NONE)); - break; - } - } - } - }; - - void operator()(Graph& graph, const GNode& source) { - using namespace Galois::WorkList; - typedef dChunkedLIFO<256> WL; - int next = 0; - Dist newDist = 1; - graph.getData(source).dist = 0; - Galois::for_each(graph.out_edges(source, Galois::MethodFlag::NONE).begin(), - graph.out_edges(source, Galois::MethodFlag::NONE).end(), - ForwardProcess(graph, &bags[next], newDist)); - while (!bags[next].empty()) { - size_t nextSize = bags[next].size(); - int cur = next; - next = (cur + 1) & 1; - newDist++; - std::cout << nextSize << " " << (nextSize > graph.sizeEdges() / 20) << "\n"; - if (nextSize > graph.sizeEdges() / 20) - Galois::do_all_local(graph, BackwardProcess(graph, &bags[next], newDist)); - else - Galois::for_each_local(bags[cur].wl, ForwardProcess(graph, &bags[next], newDist), Galois::wl()); - bags[cur].clear(); - } - } -}; - -//! BFS using optimized flags and barrier scheduling -template -struct BarrierAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::template with_numa_alloc::type - ::template with_no_lockable::type - Graph; - typedef Graph::GraphNode GNode; - typedef std::pair WorkItem; - - std::string name() const { return "Barrier"; } - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - struct Process { - typedef int tt_does_not_need_aborts; - - Graph& graph; - Process(Graph& g): graph(g) { } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) const { - GNode n = item.first; - - Dist newDist = item.second; - - for (Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - break; - if (!useCas || __sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - if (!useCas) - ddata.dist = newDist; - ctx.push(WorkItem(dst, newDist + 1)); - break; - } - } - } - } - }; - - void operator()(Graph& graph, const GNode& source) const { - graph.getData(source).dist = 0; - Galois::for_each(WorkItem(source, 1), Process(graph), Galois::wl()); - } -}; - -struct HybridAlgo: public HybridBFS { - std::string name() const { return "Hybrid"; } - - void readGraph(Graph& graph) { readInOutGraph(graph); } -}; - -template -struct DeterministicAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::template with_numa_alloc::type Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "Deterministic"; } - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - typedef std::pair WorkItem; - - struct Process { - typedef int tt_needs_per_iter_alloc; // For LocalState - static_assert(Galois::needs_per_iter_alloc::value, "Oops"); - - Graph& graph; - - Process(Graph& g): graph(g) { } - - struct LocalState { - typedef typename Galois::PerIterAllocTy::rebind::other Alloc; - typedef std::deque Pending; - Pending pending; - LocalState(Process& self, Galois::PerIterAllocTy& alloc): pending(alloc) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - uintptr_t galoisDeterministicId(const WorkItem& item) const { - return item.first; - } - static_assert(Galois::has_deterministic_id::value, "Oops"); - - void build(const WorkItem& item, typename LocalState::Pending* pending) const { - GNode n = item.first; - - Dist newDist = item.second; - - for (Graph::edge_iterator ii = graph.edge_begin(n, Galois::MethodFlag::NONE), - ei = graph.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - SNode& ddata = graph.getData(dst, Galois::MethodFlag::ALL); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - break; - pending->push_back(dst); - break; - } - } - } - - void modify(const WorkItem& item, Galois::UserContext& ctx, typename LocalState::Pending* ppending) const { - Dist newDist = item.second; - bool useCas = false; - - for (typename LocalState::Pending::iterator ii = ppending->begin(), ei = ppending->end(); ii != ei; ++ii) { - GNode dst = *ii; - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - Dist oldDist; - while (true) { - oldDist = ddata.dist; - if (oldDist <= newDist) - break; - if (!useCas || __sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - if (!useCas) - ddata.dist = newDist; - ctx.push(WorkItem(dst, newDist + 1)); - break; - } - } - } - } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) const { - typename LocalState::Pending* ppending; - if (Version == DetAlgo::disjoint) { - bool used; - LocalState* localState = (LocalState*) ctx.getLocalState(used); - ppending = &localState->pending; - if (used) { - modify(item, ctx, ppending); - return; - } - } - if (Version == DetAlgo::disjoint) { - build(item, ppending); - } else { - typename LocalState::Pending pending(ctx.getPerIterAlloc()); - build(item, &pending); - graph.getData(item.first, Galois::MethodFlag::WRITE); // Failsafe point - modify(item, ctx, &pending); - } - } - }; - - void operator()(Graph& graph, const GNode& source) const { -#ifdef GALOIS_USE_EXP - typedef Galois::WorkList::BulkSynchronousInline<> WL; -#else - typedef Galois::WorkList::BulkSynchronous > WL; -#endif - graph.getData(source).dist = 0; - - switch (Version) { - case DetAlgo::none: Galois::for_each(WorkItem(source, 1), Process(graph),Galois::wl()); break; - case DetAlgo::base: Galois::for_each_det(WorkItem(source, 1), Process(graph)); break; - case DetAlgo::disjoint: Galois::for_each_det(WorkItem(source, 1), Process(graph)); break; - default: std::cerr << "Unknown algorithm " << int(Version) << "\n"; abort(); - } - } -}; - -template -void run() { - typedef typename Algo::Graph Graph; - typedef typename Graph::GraphNode GNode; - - Algo algo; - Graph graph; - GNode source, report; - - initialize(algo, graph, source, report); - - //Galois::preAlloc(numThreads + (3*graph.size() * sizeof(typename Graph::node_data_type)) / Galois::Runtime::MM::pageSize); - Galois::preAlloc(8*(numThreads + (graph.size() * sizeof(typename Graph::node_data_type)) / Galois::Runtime::MM::pageSize)); - - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T; - std::cout << "Running " << algo.name() << " version\n"; - T.start(); - Galois::do_all_local(graph, Initialize(graph)); - algo(graph, source); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - std::cout << "Node " << reportNode << " has distance " << graph.getData(report).dist << "\n"; - - if (!skipVerify) { - if (verify(graph, source)) { - std::cout << "Verification successful.\n"; - } else { - std::cerr << "Verification failed.\n"; - assert(0 && "Verification failed"); - abort(); - } - } -} - -int main(int argc, char **argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - using namespace Galois::WorkList; - typedef BulkSynchronous > BSWL; - -#ifdef GALOIS_USE_EXP - typedef BulkSynchronousInline<> BSInline; -#else - typedef BSWL BSInline; -#endif - if (useDetDisjoint) - algo = Algo::deterministicDisjoint; - else if (useDetBase) - algo = Algo::deterministic; - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::serial: run(); break; - case Algo::async: run(); break; - case Algo::barrier: run >(); break; - case Algo::barrierWithCas: run >(); break; - case Algo::barrierWithInline: run >(); break; - case Algo::highCentrality: run(); break; - case Algo::hybrid: run(); break; -#ifdef GALOIS_USE_EXP - case Algo::graphlab: run(); break; - case Algo::ligraChi: run >(); break; - case Algo::ligra: run >(); break; -#endif - case Algo::deterministic: run >(); break; - case Algo::deterministicDisjoint: run >(); break; - default: std::cerr << "Unknown algorithm\n"; abort(); - } - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/boruvka/Boruvka.cpp b/maxflow/galois/apps/boruvka/Boruvka.cpp deleted file mode 100644 index dde57c2..0000000 --- a/maxflow/galois/apps/boruvka/Boruvka.cpp +++ /dev/null @@ -1,472 +0,0 @@ -/** Spanning-tree application -*- C++ -*- - * @file - * - * A minimum spanning tree algorithm to demonstrate the Galois system. - * - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#include "Galois/config.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/UnionFind.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#include "llvm/Support/CommandLine.h" - -#ifdef GALOIS_USE_EXP -#include "Galois/Runtime/BulkSynchronousWork.h" -#endif - -#include "Lonestar/BoilerPlate.h" - -#include GALOIS_CXX11_STD_HEADER(atomic) -#include -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Boruvka's Minimum Spanning Tree Algorithm"; -static const char* desc = "Computes the minimum spanning forest of a graph"; -static const char* url = "mst"; - -enum Algo { - parallel, - exp_parallel -}; - -static cll::opt inputFilename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Graph already symmetric"), cll::init(false)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumVal(parallel, "Parallel"), -#ifdef GALOIS_USE_EXP - clEnumVal(exp_parallel, "Parallel (exp)"), -#endif - clEnumValEnd), cll::init(parallel)); - -typedef int EdgeData; - -struct Node: public Galois::UnionFindNode { - std::atomic lightest; -}; - -typedef Galois::Graph::LC_CSR_Graph - ::with_numa_alloc::type - ::with_no_lockable::type Graph; - -typedef Graph::GraphNode GNode; - -Graph graph; - -std::ostream& operator<<(std::ostream& os, const Node& n) { - os << "[id: " << &n << ", c: " << n.find() << "]"; - return os; -} - -struct Edge { - GNode src; - GNode dst; - const EdgeData* weight; - Edge(const GNode& s, const GNode& d, const EdgeData* w): src(s), dst(d), weight(w) { } -}; - -Galois::InsertBag mst; -EdgeData inf; -EdgeData heaviest; - -/** - * Boruvka's algorithm. Implemented bulk-synchronously in order to avoid the - * need to merge edge lists. - */ -template -struct ParallelAlgo { - struct WorkItem { - Edge edge; - int cur; - WorkItem(const GNode& s, const GNode& d, const EdgeData* w, int c): edge(s, d, w), cur(c) { } - }; - - typedef Galois::InsertBag WL; - - WL wls[3]; - WL* current; - WL* next; - WL* pending; - EdgeData limit; - - /** - * Find lightest edge between components leaving a node and add it to the - * worklist. - */ - template - static void findLightest(ParallelAlgo* self, - const GNode& src, int cur, Context& ctx, Pending& pending) { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE); - Graph::edge_iterator ei = graph.edge_end(src, Galois::MethodFlag::NONE); - - std::advance(ii, cur); - - for (; ii != ei; ++ii, ++cur) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - EdgeData& weight = graph.getEdgeData(ii); - if (useLimit && weight > self->limit) { - pending.push(WorkItem(src, dst, &weight, cur)); - return; - } - Node* rep; - if ((rep = sdata.findAndCompress()) != ddata.findAndCompress()) { - //const EdgeData& weight = graph.getEdgeData(ii); - EdgeData* old; - ctx.push(WorkItem(src, dst, &weight, cur)); - while (weight < *(old = rep->lightest)) { - if (rep->lightest.compare_exchange_strong(old, &weight)) - break; - } - return; - } - } - } - - /** - * Merge step specialized for first round of the algorithm. - */ - struct Initialize { - ParallelAlgo* self; - - Initialize(ParallelAlgo* s): self(s) { } - - void operator()(const GNode& src) const { - (*this)(src, *self->next, *self->pending); - } - - template - void operator()(const GNode& src, Context& ctx) const { - (*this)(src, ctx, *self->pending); - } - - template - void operator()(const GNode& src, Context& ctx, Pending& pending) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - sdata.lightest = &inf; - findLightest(self, src, 0, ctx, pending); - } - }; - - struct Merge { - // NB: tells do_all_bs this operator implicitly calls ctx.push(x) for each - // call to (*this)(x); - typedef int tt_does_not_need_push; - - ParallelAlgo* self; - - Merge(ParallelAlgo* s): self(s) { } - - void operator()(const WorkItem& item) const { - (*this)(item, *self->next, *self->pending); - } - - template - void operator()(const WorkItem& item, Context& ctx) const { - (*this)(item, ctx, *self->pending); - } - - template - void operator()(const WorkItem& item, Context& ctx, Pending& pending) const { - GNode src = item.edge.src; - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - Node* rep = sdata.findAndCompress(); - int cur = item.cur; - - if (rep->lightest == item.edge.weight) { - GNode dst = item.edge.dst; - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - if ((rep = sdata.merge(&ddata))) { - rep->lightest = &inf; - mst.push(Edge(src, dst, item.edge.weight)); - } - ++cur; - } - } - }; - - struct Find { - ParallelAlgo* self; - - Find(ParallelAlgo* s): self(s) { } - - void operator()(const WorkItem& item) const { - (*this)(item, *self->next, *self->pending); - } - - template - void operator()(const WorkItem& item, Context& ctx) const { - (*this)(item, ctx, *self->pending); - } - - template - void operator()(const WorkItem& item, Context& ctx, Pending& pending) const { - findLightest(self, item.edge.src, item.cur, ctx, pending); - } - }; - - void init() { - current = &wls[0]; - next = &wls[1]; - pending = &wls[2]; - - EdgeData delta = std::max(heaviest / 5, 1); - limit = delta; - } - - void process() { - Galois::Statistic rounds("Rounds"); - - init(); - - Galois::do_all_local(graph, Initialize(this)); - while (true) { - while (true) { - rounds += 1; - - std::swap(current, next); - Galois::do_all_local(*current, Merge(this)); - Galois::do_all_local(*current, Find(this)); - current->clear(); - - if (next->empty()) - break; - } - - if (pending->empty()) - break; - - std::swap(next, pending); - - limit *= 2; - } - } - -#if defined(GALOIS_USE_EXP) && !defined(GALOIS_HAS_NO_BULKSYNCHRONOUS_EXECUTOR) - void processExp() { - typedef boost::fusion::vector Items; - - init(); - - Galois::do_all_bs_local(graph, - boost::fusion::make_vector(Merge(this), Find(this)), - Initialize(this)); - - while (!pending->empty()) { - std::swap(next, pending); - - Galois::do_all_bs_local(*next, - boost::fusion::make_vector(Merge(this), Find(this))); - - next->clear(); - - limit *= 2; - } - } -#else - void processExp() { GALOIS_DIE("not supported"); } -#endif - - void operator()() { - if (useExp) { - processExp(); - } else { - process(); - } - } -}; - -struct is_bad_graph { - bool operator()(const GNode& n) const { - Node& me = graph.getData(n); - for (Graph::edge_iterator ii = graph.edge_begin(n), ei = graph.edge_end(n); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - if (me.findAndCompress() != data.findAndCompress()) { - std::cerr << "not in same component: " << me << " and " << data << "\n"; - return true; - } - } - return false; - } -}; - -struct is_bad_mst { - bool operator()(const Edge& e) const { - return graph.getData(e.src).findAndCompress() != graph.getData(e.dst).findAndCompress(); - } -}; - -struct CheckAcyclic { - struct Accum { - Galois::GAccumulator roots; - }; - - Accum* accum; - - void operator()(const GNode& n) { - Node& data = graph.getData(n); - if (data.isRep()) - accum->roots += 1; - } - - bool operator()() { - Accum a; - accum = &a; - Galois::do_all_local(graph, *this); - unsigned numRoots = a.roots.reduce(); - unsigned numEdges = std::distance(mst.begin(), mst.end()); - if (graph.size() - numRoots != numEdges) { - std::cerr << "Generated graph is not a forest. " - << "Expected " << graph.size() - numRoots << " edges but " - << "found " << numEdges << "\n"; - return false; - } - - std::cout << "Num trees: " << numRoots << "\n"; - std::cout << "Tree edges: " << numEdges << "\n"; - return true; - } -}; - -struct SortEdges { - struct Accum { - Galois::GReduceMax heavy; - }; - - Accum* accum; - - void operator()(const GNode& src) { - graph.sortEdgesByEdgeData(src, std::less(), Galois::MethodFlag::NONE); - - Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE); - Graph::edge_iterator ei = graph.edge_end(src, Galois::MethodFlag::NONE); - ptrdiff_t dist = std::distance(ii, ei); - if (dist == 0) - return; - std::advance(ii, dist - 1); - accum->heavy.update(graph.getEdgeData(ii)); - } - - EdgeData operator()() { - Accum a; - accum = &a; - Galois::do_all_local(graph, *this); - return a.heavy.reduce(); - } -}; - -struct get_weight { - EdgeData operator()(const Edge& e) const { return *e.weight; } -}; - -template -void run() { - Algo algo; - - return algo(); -} - -bool verify() { - if (Galois::ParallelSTL::find_if(graph.begin(), graph.end(), is_bad_graph()) == graph.end()) { - if (Galois::ParallelSTL::find_if(mst.begin(), mst.end(), is_bad_mst()) == mst.end()) { - CheckAcyclic c; - return c(); - } - } - return false; -} - -void initializeGraph() { - Galois::Graph::FileGraph origGraph; - Galois::Graph::FileGraph symGraph; - - origGraph.structureFromFileInterleaved(inputFilename); - if (!symmetricGraph) - Galois::Graph::makeSymmetric(origGraph, symGraph); - else - symGraph.swap(origGraph); - - Galois::Graph::readGraph(graph, symGraph); - - Galois::StatTimer Tsort("InitializeSortTime"); - Tsort.start(); - SortEdges sortEdges; - heaviest = sortEdges(); - if (heaviest == std::numeric_limits::max() || - heaviest == std::numeric_limits::min()) { - GALOIS_DIE("Edge weights of graph out of range"); - } - inf = heaviest + 1; - - Tsort.stop(); - - std::cout << "Nodes: " << graph.size() - << " edges: " << graph.sizeEdges() - << " heaviest edge: " << heaviest - << "\n"; -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - Galois::StatTimer Tinitial("InitializeTime"); - Tinitial.start(); - initializeGraph(); - Tinitial.stop(); - - Galois::preAlloc(Galois::Runtime::MM::numPageAllocTotal() * 10); - Galois::reportPageAlloc("MeminfoPre"); - Galois::StatTimer T; - T.start(); - switch (algo) { - case parallel: run >(); break; - case exp_parallel: run >(); break; - default: std::cerr << "Unknown algo: " << algo << "\n"; - } - T.stop(); - Galois::reportPageAlloc("MeminfoPost"); - - std::cout << "MST weight: " - << Galois::ParallelSTL::map_reduce(mst.begin(), mst.end(), - get_weight(), 0.0, std::plus()) - << " (" - << Galois::ParallelSTL::map_reduce(mst.begin(), mst.end(), - get_weight(), 0UL, std::plus()) - << ")\n"; - - if (!skipVerify && !verify()) { - GALOIS_DIE("verification failed"); - } - - return 0; -} - diff --git a/maxflow/galois/apps/boruvka/BoruvkaMerge.cpp b/maxflow/galois/apps/boruvka/BoruvkaMerge.cpp deleted file mode 100644 index 9c32b39..0000000 --- a/maxflow/galois/apps/boruvka/BoruvkaMerge.cpp +++ /dev/null @@ -1,523 +0,0 @@ -/** Boruvka application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - * @author Rashid Kaleem - */ - -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Timer.h" -#include "Galois/Galois.h" -#include "Galois/Graph/LCGraph.h" - -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" -#ifdef GALOIS_USE_EXP -#include "Galois/PriorityScheduling.h" -#endif - -#include -#include -#include -#include -#include -#include -#include -#include - -#define BORUVKA_DEBUG 0 -#define COMPILE_STATISICS 0 -#if BORUVKA_DEBUG -#include "UnionFind.h" -#endif -#if COMPILE_STATISICS -#include -int BORUVKA_SAMPLE_FREQUENCY= 1000000; -#endif - -using namespace std; -namespace cll = llvm::cl; - -static const char* name = "Boruvka's Minimum Spanning Tree Algorithm"; -static const char* desc = "Computes a minimum weight spanning tree of a graph"; -static const char* url = "mst"; - -static cll::opt inputfile(cll::Positional, cll::desc(""), cll::Required); -static cll::opt use_weighted_rmat("wrmat",cll::desc("Weighted RMAT"), cll::Optional,cll::init(false)); -static cll::opt verify_via_kruskal("verify",cll::desc("Verify MST result via Serial Kruskal"), cll::Optional,cll::init(false)); -static int nodeID = 0; -/////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////// -struct Node { - //Do not include node data if not debugging since - //it is a useless overhead. Useful for debugging though. -#if BORUVKA_DEBUG - int id; - Node(int i=-1) : - id(i) { - } - std::string toString() { - std::ostringstream s; - s << "N(" << id << ")"; - return s.str(); - } -#else - Node(int){}; - Node(){}; -#endif -}; -std::ostream& operator<<(std::ostream& s, Node& n) { -#if BORUVKA_DEBUG - s << n.toString(); -#endif - return s; -} -/////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////// -typedef long NodeDataType; -typedef int EdgeDataType; - -typedef Galois::Graph::FirstGraph Graph; -typedef Graph::GraphNode GNode; -//The graph. -Graph graph; -/////////////////////////////////////////////////////////////////////////////////////// - -/////////////////////////////////////////////////////////////////////////////////////// -#if COMPILE_STATISICS -struct GraphStats{ - std::vector average_degrees; - std::vector time_vals; - std::vector max_degrees; - unsigned long counter; - GraphStats(){ - counter=0; - } - GraphStats & tick(){ - ++counter; - return *this; - } - void snap(){ - unsigned long num_nodes=0; - unsigned long num_edges=0; - unsigned long current_degree = 0; - unsigned long max_degree=0; - for(Graph::iterator it = graph.begin(), end_it = graph.end(); it!=end_it; ++it){ - ++num_nodes; - current_degree=0; - for(Graph::edge_iterator e_it = graph.edge_begin(*it, Galois::MethodFlag::NONE), e_it_end = graph.edge_end(*it, Galois::MethodFlag::NONE); e_it!=e_it_end; ++e_it){ - ++num_edges; - ++current_degree; - } - if(current_degree > max_degree) max_degree = current_degree; - } - time_vals.push_back(time(0)); - average_degrees.push_back((float)(num_edges)/(num_nodes)); - max_degrees.push_back(max_degree); - } - void dump(ostream & out){ - out<<"\nMax degrees,"; - for(size_t i=0;i=0); - Node & ddata = graph.getData(graph.getEdgeDst(dst)); - std::cout << "1) " << sdata << " => " << ddata << " [ " << w << " ] " << std::endl; - numEdges++; - } - } - std::cout << "Num edges " << numEdges << std::endl; -} -/////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////// -Galois::Runtime::PerThreadStorage MSTWeight; -struct process { - template - void operator()(GNode& src, ContextTy& lwl) { - if (graph.containsNode(src) == false) - return; - graph.getData(src, Galois::MethodFlag::ALL); - GNode * minNeighbor = 0; -#if BORUVKA_DEBUG - std::cout<<"Processing "<::max(); - //Acquire locks on neighborhood. - for (Graph::edge_iterator dst = graph.edge_begin(src, Galois::MethodFlag::ALL), edst = graph.edge_end(src, Galois::MethodFlag::ALL); dst != edst; ++dst) { - graph.getData(graph.getEdgeDst(dst)); - } - //Find minimum neighbor - for (Graph::edge_iterator e_it = graph.edge_begin(src, Galois::MethodFlag::NONE), edst = graph.edge_end(src, Galois::MethodFlag::NONE); e_it != edst; ++e_it) { - EdgeDataType w = graph.getEdgeData(e_it, Galois::MethodFlag::NONE); - assert(w>=0); - if (w < minEdgeWeight) { - minNeighbor = &((*e_it).first()); - minEdgeWeight = w; - } - } - //If there are no outgoing neighbors. - if (minEdgeWeight == std::numeric_limits::max()) { - graph.removeNode(src, Galois::MethodFlag::NONE); - return; - } -#if BORUVKA_DEBUG - std::cout << " Min edge from "<=0); - //update MST weight. - *MSTWeight.getLocal() += minEdgeWeight; - typedef std::pair EdgeData; - typedef std::set, Galois::PerIterAllocTy::rebind::other> edsetTy; - edsetTy toAdd(std::less(), Galois::PerIterAllocTy::rebind::other(lwl.getPerIterAlloc())); - for (Graph::edge_iterator mdst = graph.edge_begin(*minNeighbor, Galois::MethodFlag::NONE), medst = graph.edge_end(*minNeighbor, Galois::MethodFlag::NONE); mdst != medst; ++mdst) { - GNode dstNode = graph.getEdgeDst(mdst); - int edgeWeight = graph.getEdgeData(mdst,Galois::MethodFlag::NONE); - if (dstNode != src) { //Do not add the edge being contracted - Graph::edge_iterator dup_edge = graph.findEdge(src, dstNode, Galois::MethodFlag::NONE); - if (dup_edge != graph.edge_end(src, Galois::MethodFlag::NONE)) { - EdgeDataType dup_wt = graph.getEdgeData(dup_edge,Galois::MethodFlag::NONE); - graph.getEdgeData(dup_edge,Galois::MethodFlag::NONE) = std::min(edgeWeight, dup_wt); - assert(std::min(edgeWeight, dup_wt)>=0); - } else { - toAdd.insert(EdgeData(dstNode, edgeWeight)); - assert(edgeWeight>=0); - } - } - } - graph.removeNode(*minNeighbor, Galois::MethodFlag::NONE); - for (edsetTy::iterator it = toAdd.begin(), endIt = toAdd.end(); it != endIt; it++) { - graph.getEdgeData(graph.addEdge(src, it->first, Galois::MethodFlag::NONE)) = it->second; - } - lwl.push(src); -#if COMPILE_STATISICS - if(stat_collector.tick().counter%BORUVKA_SAMPLE_FREQUENCY==0) - stat_collector.snap(); -#endif - } -}; -/////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////// -struct Indexer: public std::unary_function { - unsigned operator()(const GNode& n) { - return std::distance(graph.edge_begin(n, Galois::MethodFlag::NONE), graph.edge_end(n, Galois::MethodFlag::NONE)); - } - static unsigned foo(const GNode& n) { - return std::distance(graph.edge_begin(n, Galois::MethodFlag::NONE), graph.edge_end(n, Galois::MethodFlag::NONE)); - } -}; -struct seq_less: public std::binary_function { - bool operator()(const GNode& lhs, const GNode& rhs) const { - if (Indexer::foo(lhs) < Indexer::foo(rhs)) - return true; - if (Indexer::foo(lhs) > Indexer::foo(rhs)) - return false; - return lhs < rhs; - } -}; -struct seq_gt: public std::binary_function { - bool operator()(const GNode& lhs, const GNode& rhs) const { - if (Indexer::foo(lhs) > Indexer::foo(rhs)) - return true; - if (Indexer::foo(lhs) < Indexer::foo(rhs)) - return false; - return lhs > rhs; - } -}; -//End body of for-each. -/////////////////////////////////////////////////////////////////////////////////////// -/////////////////////////////////////////////////////////////////////////////////////// -EdgeDataType runBodyParallel() { - using namespace Galois::WorkList; - typedef dChunkedFIFO<64> dChunk; - typedef ChunkedFIFO<64> Chunk; - typedef OrderedByIntegerMetric OBIM; - -#if BORUVKA_DEBUG - std::cout<<"Graph size "<::for_each(graph.begin(), graph.end(), process()); -#else - Galois::for_each_local(graph, process(), Galois::wl()); -#endif - T.stop(); - - EdgeDataType res = 0; - for (size_t i = 0; i < MSTWeight.size(); i++) { -#if BORUVKA_DEBUG - std::cout<<"MST +=" << *MSTWeight.getRemote(i)< nodes; - //Create local computation graph. - typedef Galois::Graph::LC_CSR_Graph InGraph; - typedef InGraph::GraphNode InGNode; - InGraph in_graph; - //Read graph from file. - Galois::Graph::readGraph(in_graph, input); - std::cout << "Read " << in_graph.size() << " nodes\n"; - //A node and a int is an element. - typedef std::pair Element; - //A vector of element is 'Elements' - typedef std::vector Elements; - //A vector of 'Elements' is a 'Map' - typedef std::vector Map; - //'in_edges' is a vector of vector of pairs of nodes and int. - Map edges(in_graph.size()); - // - int numEdges = 0; - for (InGraph::iterator src = in_graph.begin(), esrc = in_graph.end(); src != esrc; ++src) { - for (InGraph::edge_iterator dst = in_graph.edge_begin(*src, Galois::MethodFlag::NONE), edst = in_graph.edge_end(*src, Galois::MethodFlag::NONE); dst != edst; ++dst) { - if (*src == *dst) { -#if BORUVKA_DEBUG - std::cout<<"ERR:: Self loop at "<<*src<begin(), ej = i->end(); j != ej; ++j) { - Graph::edge_iterator it = graph.findEdge(src, nodes[j->first], Galois::MethodFlag::NONE); - if (it != graph.edge_end(src, Galois::MethodFlag::NONE)) { - numDups++; - EdgeDataType w = (graph.getEdgeData(it)); - if (j->second < w) { - graph.getEdgeData(it) = j->second; - edge_sum += (j->second-w); - } - } else { - graph.getEdgeData(graph.addEdge(src, nodes[j->first], Galois::MethodFlag::NONE)) = j->second; - edge_sum += j->second; - } - numEdges++; - assert(edge_sum < std::numeric_limits::max()); - } - id++; - } -#if BORUVKA_DEBUG - std::cout << "Final num edges " << numEdges << " Dups " << numDups << " sum :" << edge_sum << std::endl; -#endif -} -////////////////////////////////////////////////////////////////////////////////////////// -//////////////////////////////////RMAT Reading function////////////////////////////////// -template -struct EdgeTuple{ - NdTy src; - NdTy dst; - EdTy wt; - EdgeTuple(NdTy s, NdTy d, EdTy w):src(s),dst(d),wt(w){}; - void print()const{ - cout << "[" << src << ", " << dst << " , " << wt << "]\n"; - } -}; -template -struct LessThanEdgeTuple{ -bool operator()(const EdgeTuple & o1, const EdgeTuple &o2){ - return (o1.wt==o2.wt)? o1.src < o2.src : o1.wt -std::ostream & operator << (std::ostream & out , const EdgeTuple & e ){ - e.print(); - return out; -} -static void readWeightedRMAT(const char* input) { - std::vector > et; - et.reserve(100000000); - ifstream inFile (input); - NodeDataType src, dst; - EdgeDataType wt; - char header[30]; - inFile.seekg(0, ios::beg); - inFile>>header; - NodeDataType max_id=0; - while(inFile.eof()==false){ - inFile>>src; - inFile>>dst; - inFile>>wt; - max_id = max(max_id, (src>dst?src:dst)); - et.push_back(EdgeTuple(src,dst,wt)); - } - - std::vector nodes; - - nodes.resize(max_id+1); - for (NodeDataType l = 0; l < max_id+1 ; ++l) { - Node n(nodeID); - GNode node = graph.createNode(n); - nodes[nodeID] = node; - nodeID++; - } - - long numEdges = 0; - EdgeDataType edge_sum = 0; - int numDups = 0; - for (std::vector >::iterator eIt = et.begin(), end = et.end(); eIt!=end; ++eIt) { - EdgeTuple e = *eIt; - Graph::edge_iterator it = graph.findEdge(nodes[e.src], nodes[e.dst], Galois::MethodFlag::NONE); - if (it != graph.edge_end(nodes[e.src], Galois::MethodFlag::NONE)) { - numDups++; - EdgeDataType w = (graph.getEdgeData(it)); - if (e.wt < w) { - graph.getEdgeData(it) = e.wt; - edge_sum += (e.wt-w); - } - } else { - graph.getEdgeData(graph.addEdge(nodes[e.src], nodes[e.dst], Galois::MethodFlag::NONE)) = e.wt; - edge_sum += e.wt; - } - numEdges++; - assert(edge_sum < std::numeric_limits::max()); - } -} - -////////////////////////////End READ WRMAT//////////////////////////////////////////////// -////////////////////////////Kruskal//////////////////////////////////////////////// -#if BORUVKA_DEBUG -typedef EdgeTuple KEdgeTuple; -typedef std::vector KruskalGraph; -KruskalGraph read_edges(Graph &g){ - KruskalGraph * ret = new KruskalGraph (); -for(Graph::iterator it = g.begin(), e = g.end(); it!=e; ++it){ - for(Graph::edge_iterator e_it = g.edge_begin(*it), e_end = g.edge_end(*it); e_it!=e_end; ++e_it){ - ret->push_back(KEdgeTuple(g.getData(*it).id,g.getData(g.getEdgeDst(e_it)).id, g.getEdgeData(e_it) )); - } -} -std::cout<<"Number of edge tuples " << ret->size() << "\n"; -return *ret; -} -EdgeDataType kruskal_impl(const size_t num_nodes, KruskalGraph kg){ - std::sort(kg.begin(), kg.end(), LessThanEdgeTuple()); - UnionFind uf(num_nodes); - size_t mst_size = 0; - EdgeDataType mst_sum = 0; - for(size_t i = 0; i < kg.size(); ++i){ - KEdgeTuple e = kg[i]; - NodeDataType src = uf.uf_find(e.src); - NodeDataType dst = uf.uf_find(e.dst); - if(src!=dst){ - uf.uf_union(src,dst); - mst_sum+=e.wt; - mst_size++; - if(mst_size>=num_nodes-1) - return mst_sum; - } - } - return -1; -} -EdgeDataType verify(Graph & g){ - return kruskal_impl(g.size(), read_edges(g)); -} -#endif -////////////////////////////////////////////////////////////////////////////////////////// -int main(int argc, char **argv) { - Galois::StatManager M; - LonestarStart(argc, argv, name, desc, url); - if(use_weighted_rmat) - readWeightedRMAT(inputfile.c_str()); - else - makeGraph(inputfile.c_str()); -#if BORUVKA_DEBUG - EdgeDataType kruskal_wt; - if(verify_via_kruskal){ - kruskal_wt= verify(graph); - cout<<"Kruskal MST Result is " << kruskal_wt <<"\n"; - } -#endif - cout << "Starting loop body\n"; - EdgeDataType mst_wt = runBodyParallel(); - cout<<"Boruvka MST Result is " << mst_wt <<"\n"; -#if BORUVKA_DEBUG - if(verify_via_kruskal){ - assert(kruskal_wt==mst_wt); - } -#endif - -#if COMPILE_STATISICS - cout<< " \n==================================================\n"; - stat_collector.dump(cout); - cout<< " \n==================================================\n"; -#endif - return 0; -} diff --git a/maxflow/galois/apps/boruvka/CMakeLists.txt b/maxflow/galois/apps/boruvka/CMakeLists.txt deleted file mode 100644 index 2e719be..0000000 --- a/maxflow/galois/apps/boruvka/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -app(boruvka Boruvka.cpp) -app(boruvka-merge BoruvkaMerge.cpp) diff --git a/maxflow/galois/apps/boruvka/UnionFind.h b/maxflow/galois/apps/boruvka/UnionFind.h deleted file mode 100644 index 89c098f..0000000 --- a/maxflow/galois/apps/boruvka/UnionFind.h +++ /dev/null @@ -1,39 +0,0 @@ - -#ifndef GALOIS_UNION_FIND -#define GALOIS_UNION_FIND - -template -struct UnionFind { - ElTy * parents; - const size_t size; - UnionFind(size_t sz) : - size(sz) { - parents = new ElTy [size]; - for (size_t s = 0; s < sz; s++) - parents[s] = initializer; - } - ElTy uf_find(ElTy e) { - if(parents[e]==initializer) return e; - ElTy tmp = e; - ElTy rep = initializer; - while (parents[tmp] != initializer) - tmp = parents[tmp]; - rep = tmp; - tmp = e; - while (parents[tmp] != initializer) { - parents[tmp] = rep; - tmp = parents[tmp]; - } - return rep; - } - void uf_union(ElTy e1, ElTy e2) { - parents[e1]=e2; - } - ~UnionFind() { - delete parents; - } -}; -void test_uf(){ - UnionFind sample(10000); -} -#endif // def GALOIS_UNION_FIND diff --git a/maxflow/galois/apps/clustering/AbstractNode.h b/maxflow/galois/apps/clustering/AbstractNode.h deleted file mode 100644 index 5b1ab51..0000000 --- a/maxflow/galois/apps/clustering/AbstractNode.h +++ /dev/null @@ -1,183 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ -#ifndef ABSTRACTNODE_H_ -#define ABSTRACTNODE_H_ - -#include"Point3.h" -#include -#include -#include -#include -#include -using namespace std; -class AbstractNode { -public: - static int globalNumReps; - static vector*>repRandomNums; - static bool globalMultitime; -protected: - Point3 myLoc; - Point3 intensity; // Use r,g,b as x,y,z - int startTime, endTime; - vector timeVector; -public: - AbstractNode(double x, double y, double z) : - myLoc(x, y, z), intensity(0) { - startTime = -1; - } - AbstractNode() : - myLoc(0), intensity(0) { - startTime = -1; - } - virtual ~AbstractNode() { } - - double getScalarTotalIntensity() { - return (1.0f / 3.0f) * intensity.getSum(); - } - double getRelativeIntensity(int time) { - if (time < startTime || time > endTime) - return 0; - return timeVector[time - startTime]; - } - - void setIntensity(double inScaleFactor, int inTime) { - intensity.set(inScaleFactor); - if (inTime == -1) { - inTime = 0; - } - if (inTime >= 0) { - startTime = inTime; - endTime = inTime; - timeVector.clear(); - timeVector.push_back(1.0f); - } else { - //negative value used as signal that should be uniform across all time - int len = -inTime; - startTime = 0; - endTime = (int) (len - 1); - timeVector.clear(); - timeVector.resize(len); - for (int i = 0; i < len; i++) - timeVector[i] = 1.0f / len; - scaleIntensity(len); - } - } - void setSummedIntensity(AbstractNode &inA, AbstractNode &inB) { - intensity.set(inA.intensity); - intensity.add(inB.intensity); - startTime = inA.startTime < inB.startTime ? inA.startTime : inB.endTime; - endTime = inA.startTime < inB.startTime ? inB.startTime : inA.endTime; - - if (startTime != endTime) { - int len = endTime - startTime + 1; - if ((timeVector.size() == 0) || timeVector.size() < (unsigned int)len) { - timeVector.resize(len); - } else { - for (unsigned int i = 0; i < timeVector.size(); i++) { - timeVector[i] = 0; - } - } - double weightA = inA.getScalarTotalIntensity(); - double weightB = inB.getScalarTotalIntensity(); - double invDenom = 1.0f / (weightA + weightB); - weightA *= invDenom; - weightB *= invDenom; - for (int i = inA.startTime; i <= inA.endTime; i++) { - timeVector[i - startTime] += weightA * inA.timeVector[i - - inA.startTime]; - } - for (int i = inB.startTime; i <= inB.endTime; i++) { - timeVector[i - startTime] += weightB * inB.timeVector[i - - inB.startTime]; - } - } else { - timeVector.clear(); - timeVector.push_back(1.0f); - } - } - - //////////////////////////////////////////////////////// - void scaleIntensity(double inScale) { - intensity.scale(inScale); - } - - static void setGlobalNumReps() { - if (globalNumReps == 1) { - return; - } - //trees must be rebuilt for this to take effect - globalNumReps = 1; - double inc = 1.0f/1; - for (int i = 0; i < 256; i++) { - for (unsigned int i = 0; i < repRandomNums.size(); i++) { - vector * ranVec = new vector (1); - for (int j = ranVec->size()-1; j > 0; j++) { - int index = (int) (j + 1) * (inc*(double) rand()) - / (std::numeric_limits::max()); - if (index > j) { - GALOIS_DIE("Badness :", index); - } - double temp = (*ranVec)[j]; - (*ranVec)[j] = (*ranVec)[index]; - (*ranVec)[index] = temp; - } - if(AbstractNode::repRandomNums[i] !=NULL) - delete AbstractNode::repRandomNums[i]; - AbstractNode::repRandomNums[i] = ranVec; - } - } - } - - static void setGlobalMultitime() { - //trees must be rebuilt for this to take effect - globalMultitime = false; - } - - Point3 & getPoint() { - return myLoc; - } - - virtual bool isLeaf()=0; - - virtual int size()=0; - static void cleanup() { - for (unsigned int i = 0; i < repRandomNums.size(); i++) - delete AbstractNode::repRandomNums[i]; - } - friend ostream & operator<<(ostream & s, AbstractNode & pt); - -}; -ostream & operator<<(ostream & s, AbstractNode & pt) { - s << "Abs Node :: Loc " << pt.myLoc << " , Int ::" << pt.intensity - << " Time:: [" << pt.startTime << " - " << pt.endTime << "]"; - return s; -} -const int numRepRandomNums= 256; -vector*> AbstractNode::repRandomNums(256); -int AbstractNode::globalNumReps = -1; -bool AbstractNode::globalMultitime = false; -#endif /* ABSTRACTNODE_H_ */ diff --git a/maxflow/galois/apps/clustering/Box3d.h b/maxflow/galois/apps/clustering/Box3d.h deleted file mode 100644 index d19d448..0000000 --- a/maxflow/galois/apps/clustering/Box3d.h +++ /dev/null @@ -1,71 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ -#ifndef BOX3D_H_ -#define BOX3D_H_ -#include"Point3.h" -#include -using namespace std; -class Box3d { -protected: - Point3 min; - Point3 max; - bool initialized; - -public: - Box3d():min(std::numeric_limits::max()), max(-1*std::numeric_limits::max()){ - initialized= false; - } - void setBox(Point3 & pt){ - initialized=true; - min.set(pt); - max.set(pt); - } - void addPoint(Point3& pt){ - initialized=true; - min.setIfMin(pt); - max.setIfMax(pt); - } - void addBox(Box3d & b){ - initialized=true; - min.setIfMin(b.min); - max.setIfMax(b.max); - } - const Point3 & getMin()const{ - return min; - } - const Point3 & getMax()const { - return max; - } - bool isInitialized()const { - return initialized; - } - bool equals(const Box3d & other)const{ - return min.equals(other.min) && max.equals(other.max); - } -}; - -#endif /* BOX3D_H_ */ diff --git a/maxflow/galois/apps/clustering/CMakeLists.txt b/maxflow/galois/apps/clustering/CMakeLists.txt deleted file mode 100644 index 824c7a5..0000000 --- a/maxflow/galois/apps/clustering/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(clustering) diff --git a/maxflow/galois/apps/clustering/ClusterNode.h b/maxflow/galois/apps/clustering/ClusterNode.h deleted file mode 100644 index 75d6a7f..0000000 --- a/maxflow/galois/apps/clustering/ClusterNode.h +++ /dev/null @@ -1,237 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ -#ifndef CLUSTERNODE_H_ -#define CLUSTERNODE_H_ -#include"LeafNode.h" -#include"NodeWrapper.h" -#include -class ClusterNode : public AbstractNode{ -private : - AbstractNode *leftChild; - AbstractNode *rightChild; - vector reps; - Point3 boxRadius; - Point3 coneDirection; - double coneCos; - -public: - ClusterNode():boxRadius(0),coneDirection(0){ - } - virtual ~ClusterNode(){ -// cout<<"Clearing reps"< *ranVec = repRandomNums[(int) (repRandomNum * numRepRandomNums)]; - if (globalMultitime) { - assert(false&&"Should not have time true!"); -// int numReps = endTime - startTime + 1; -// if (reps == null || reps.length < numReps) { -// reps = new LeafNode[numReps]; -// } else { -// for (int j = numReps; j < reps.length; j++) { -// reps[j] = null; -// } //fill unused values will nulls -// } -// if (leftChild.isLeaf()) { -// LeafNode leftLeaf = (LeafNode) leftChild; -// if (rightChild.isLeaf()) { -// chooseRepsWithTime(reps, this, ranVec, leftLeaf, (LeafNode) rightChild); -// } else { -// chooseRepsWithTime(reps, this, ranVec, (ClusterNode) rightChild, leftLeaf); //note: operation is symmectric so we just interchange the children in the call -// } -// } else { -// ClusterNode leftClus = (ClusterNode) leftChild; -// if (rightChild.isLeaf()) { -// chooseRepsWithTime(reps, this, ranVec, leftClus, (LeafNode) rightChild); -// } else { -// chooseRepsWithTime(reps, this, ranVec, leftClus, (ClusterNode) rightChild); -// } -// } - } else - { - if (reps.size() == 0 || reps.size()!= (unsigned int)globalNumReps) { - reps.clear(); - reps.resize(globalNumReps); - } - if (leftChild->isLeaf()) { - LeafNode *leftLeaf = (LeafNode*) leftChild; - if (rightChild->isLeaf()) { - chooseRepsNoTime(reps, *this, ranVec, *leftLeaf, (LeafNode&) *rightChild); - } else { - chooseRepsNoTime(reps, *this, ranVec, (ClusterNode&) *rightChild, *leftLeaf); //note: operation is symmectric so we just interchange the children in the call - } - } else { - ClusterNode *leftClus = (ClusterNode*) leftChild; - if (rightChild->isLeaf()) { - chooseRepsNoTime(reps, *this, ranVec, *leftClus, (LeafNode&) *rightChild); - } else { - chooseRepsNoTime(reps, *this, ranVec, *leftClus, (ClusterNode&) *rightChild); - } - } - } - } - - static void chooseRepsNoTime(vector & repArr, AbstractNode & parent, vector * ranVec, LeafNode &left,LeafNode & right) { - double totalInten = parent.getScalarTotalIntensity(); - double leftInten = left.getScalarTotalIntensity(); - double nextTest = (*ranVec)[0] * totalInten; - for (unsigned int i = 0; i < repArr.size() - 1; i++) { - double test = nextTest; - nextTest = (*ranVec)[i + 1] * totalInten; - repArr[i] = (test < leftInten) ? &left : &right; - } - repArr[repArr.size() - 1] = (nextTest < leftInten) ? &left : &right; - } - - - static void chooseRepsNoTime(vector& repArr, AbstractNode &parent, vector *ranVec, ClusterNode &left, LeafNode &right) { - double totalInten = parent.getScalarTotalIntensity(); - double leftInten = left.getScalarTotalIntensity(); - double nextTest = (*ranVec)[0] * totalInten; - for (unsigned int i = 0; i < repArr.size() - 1; i++) { - double test = nextTest; - nextTest = (*ranVec)[i + 1] * totalInten; - repArr[i] = (test < leftInten) ? (left.reps[i]) : &right; - } - repArr[repArr.size() - 1] = (nextTest < leftInten) ? (left.reps[repArr.size() - 1]) : &right; - } - - static void chooseRepsNoTime(vector &repArr, AbstractNode &parent, vector *ranVec, - ClusterNode &left, ClusterNode &right) { - double totalInten = parent.getScalarTotalIntensity(); - double leftInten = left.getScalarTotalIntensity(); - double nextTest = (*ranVec)[0] * totalInten; - for (unsigned int i = 0; i < repArr.size() - 1; i++) { - double test = nextTest; - nextTest = (*ranVec)[i + 1] * totalInten; - repArr[i] = (test < leftInten) ? (left.reps[i]) : (right.reps[i]); - } - repArr[repArr.size() - 1] = (nextTest < leftInten) ? (left.reps[repArr.size() - 1]) - : (right.reps[repArr.size() - 1]); - } - - void setDirectionCone(double dirX, double dirY, double dirZ, double inConeCos) { - coneDirection.set(dirX,dirY,dirZ); - coneCos = inConeCos; - } - -// float getConeDirX() { -// return coneDirX; -// } -// -// public float getConeDirY() { -// return coneDirY; -// } -// -// public float getConeDirZ() { -// return coneDirZ; -// } - - float getConeCos() { - return coneCos; - } - /** - * - */ - void findConeDirsRecursive(vector * coordArr, vector & tempClusterArr){ - //TODO : Fix this. NodeWrapper::CONE_RECURSE_DEPTH - 1 = 3 - findConeDirsRecursive(*leftChild, coordArr, 0, tempClusterArr, 3); - findConeDirsRecursive(*rightChild, coordArr, 0, tempClusterArr, 3); - - } - - static int findConeDirsRecursive(AbstractNode & node, vector *fArr, int numDirs, vector & cArr,int recurseDepth) { - if (!node.isLeaf()) { - ClusterNode & clus = (ClusterNode&) node; - if (clus.coneCos == 1.0) { - numDirs = addConeDir(fArr, numDirs, clus.coneDirection.getX(), clus.coneDirection.getY(), clus.coneDirection.getZ()); - } else if (recurseDepth <= 0) { - //find first empty slot and add this cluster there - for (int i = 0; ; i++) { - if (cArr[i] == NULL) { - cArr[i] = &clus; - if (cArr[i + 1] != NULL) { - assert(false); - } - break; - } - } - } else { - numDirs = findConeDirsRecursive(*(clus.leftChild), fArr, numDirs, cArr, recurseDepth - 1); - numDirs = findConeDirsRecursive(*(clus.rightChild), fArr, numDirs, cArr, recurseDepth - 1); - } - } else { - LeafNode &light = (LeafNode&) node; - numDirs = addConeDir(fArr, numDirs, light.getDirX(), light.getDirY(), light.getDirZ()); - } - return numDirs; - } - - static int addConeDir(vector *fArr, int numDirs, double x, double y, double z) { - //only add direction if it does not match any existing directions - for (int i = 0; i < 3 * numDirs; i++) { - if (((*fArr)[i] == x) && ((*fArr)[i + 1] == y) && ((*fArr)[i + 2] == z)) { - return numDirs; - } - } - int index = 3 * numDirs; - (*fArr)[index] = x; - (*fArr)[index + 1] = y; - (*fArr)[index + 2] = z; - return numDirs + 1; - } - - bool isLeaf() { - return false; - } - - int size() { - // only leafs are counted - return leftChild->size() + rightChild->size(); - } -}; - -#endif /* CLUSTERNODE_H_ */ diff --git a/maxflow/galois/apps/clustering/Clustering.cpp b/maxflow/galois/apps/clustering/Clustering.cpp deleted file mode 100644 index 7578e3f..0000000 --- a/maxflow/galois/apps/clustering/Clustering.cpp +++ /dev/null @@ -1,349 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" - -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "LeafNode.h" -#include "NodeWrapper.h" -#include "KdTree.h" -#include"ClusterNode.h" -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Unordered Agglomerative Clustering"; -static const char* desc = "Clusters data points using the well-known data-mining algorithm"; -static const char* url = "agglomerative_clustering"; - -static cll::opt numPoints("numPoints", cll::desc("Number of Points"), cll::init(1000)); - -#define DEBUG_CONSOLE 0 - -using namespace std; - -void loopBody(NodeWrapper * cluster, KdTree *kdTree, - std::vector * wl, std::vector *clusterArr, - std::vector * floatArr) ; -/////////////////////////////////////////// -void getRandomPoints(vector & lights, int numPoints){ - double dirX = 0; - double dirY = 0; - double dirZ = 1; - AbstractNode::setGlobalMultitime(); - AbstractNode::setGlobalNumReps(); - //generating random lights - for (int i = 0; i ::max()); - double y = ((double) rand())/(std::numeric_limits::max()); - double z = ((double) rand())/(std::numeric_limits::max()); - - LeafNode * l = new LeafNode(x, y, z, dirX, dirY, dirZ); -#if DEBUG_CONSOLE - cout<<"Created "<<*l< addedNodes; -struct FindMatching { - KdTree * tree; - Galois::InsertBag *newNodes; - Galois::InsertBag &allocs; - vector * coordinatesArray; - vector &clusterArray; - - FindMatching(KdTree * pT,Galois::InsertBag *&pNNodes, Galois::InsertBag &pAllocs, - vector * pCoordinatesArray,vector &pClusterArray): - tree(pT), newNodes(pNNodes), allocs(pAllocs),coordinatesArray(pCoordinatesArray), clusterArray(pClusterArray) - { - } - template - void operator()(NodeWrapper * nodeA, ContextTy& lwl) { - if (tree->contains(*nodeA)) { - NodeWrapper * nodeB = tree->findBestMatch((*nodeA)); - if (nodeB != NULL && tree->contains(*nodeB)) { - NodeWrapper * nodeBMatch = tree->findBestMatch((*nodeB)); - if (nodeBMatch!=NULL ){ - if(nodeA->equals(*nodeBMatch) && nodeA->equals(*nodeB)==false) { - //Create a new node here. - if(nodeApush(newNode); - allocs.push(newNode); - addedNodes +=1; - } - } - else{ - addedNodes +=1; - newNodes->push(nodeA); - } - } - } - else{ - addedNodes +=1; - newNodes->push(nodeA); - } - } - } -}; - -//////////////////////////////////////////////////////////// -int findMatch(KdTree * tree, NodeWrapper * nodeA,Galois::InsertBag *&newNodes, Galois::InsertBag &allocs, - vector * coordinatesArray,vector &clusterArray ){ - int addCounter=0; - if (tree->contains(*nodeA)) { - NodeWrapper * nodeB = tree->findBestMatch((*nodeA)); - if (nodeB != NULL && tree->contains(*nodeB)) { - NodeWrapper * nodeBMatch = tree->findBestMatch((*nodeB)); - if (nodeBMatch!=NULL ){ -// cout<<" Match found "<<*nodeA<<" AND " << *nodeB<equals(*nodeBMatch) && nodeA->equals(*nodeB)==false) { -// cout<<" A is "<<*nodeA<<" A-Closes=B:: " << *nodeB<<" B-Closest "<<*nodeBMatch<push(newNode); - allocs.push(newNode); - addCounter++; - } - } - else{ - addCounter++; - newNodes->push(nodeA); -// cout<<" A is "<<*nodeA<<" A-Closes=B:: " << *nodeB<<" B-Closest "<<*nodeBMatch<push(nodeA); - } - } - return addCounter; -} - -/*********************************************************************************************/ -void clusterGalois(vector & lights) { - int tempSize = (1 << NodeWrapper::CONE_RECURSE_SIZE) + 1; - cout << "Temp size is " << tempSize << " coord. arr size should be "<< tempSize * 3 << endl; - vector * coordinatesArray = new vector (tempSize * 3); - vector initialWorklist(lights.size()); - vector clusterArray(tempSize); - for (unsigned int i = 0; i < lights.size(); i++) { - NodeWrapper * nw = new NodeWrapper(*(lights[i])); - initialWorklist[i] = nw; - } - KdTree * tree = (KdTree::createTree(initialWorklist)); -#if DEBUG_CONSOLE - cout<<"Tree created "<<*tree< workListOld(0); - KdTree::getAll(*tree, workListOld); - vector workList(0); - size_t size = 0; - for(unsigned int i=0;i *newNodes; - Galois::InsertBag allocs; - FindMatching findMatchingLambda(tree,newNodes, allocs,coordinatesArray,clusterArray); - Galois::StatTimer T; - T.start(); - - while(true){ - newNodes = new Galois::InsertBag(); - - addedNodes.reset(); - - findMatchingLambda.newNodes=newNodes; - findMatchingLambda.tree=tree; - - Galois::for_each(workList.begin(),workList.end(),findMatchingLambda); - - size += addedNodes.reduce(); - - workList.clear(); - for(Galois::InsertBag::iterator it = newNodes->begin(), itEnd = newNodes->end();it!=itEnd;it++) - workList.push_back(*it); - if(size<2) - break; - size=0; - KdCell::cleanupTree(tree); - tree = (KdTree::createTree(workList)); - delete newNodes; - } - T.stop(); -#if DEBUG_CONSOLE - cout<<"================================================================"<::iterator it = allocs.begin(), itEnd = allocs.end();it!=itEnd;it++) - delete *it; - delete coordinatesArray; - return; - -} - -/*********************************************************************************************/ -/////////////////////////////////////////// - -void clusterSerial(vector & lights) { - int tempSize = (1 << NodeWrapper::CONE_RECURSE_SIZE) + 1; - cout << "Temp size is " << tempSize << " coord. arr size should be "<< tempSize * 3 << endl; - vector * coordinatesArray = new vector (tempSize * 3); - vector initialWorklist(lights.size()); - vector clusterArray(tempSize); - for (unsigned int i = 0; i < lights.size(); i++) { - NodeWrapper * nw = new NodeWrapper(*(lights[i])); - initialWorklist[i] = nw; - } - KdTree * tree = (KdTree::createTree(initialWorklist)); -//#if DEBUG_CONSOLE - cout<<"Tree created "<<*tree< workListOld(0); - KdTree::getAll(*tree, workListOld); - vector workList(0); - for(unsigned int i=0;i newNodes; - vector allocs; - while(true){ - while (workList.size() > 1) { - cout << "===================Worklist size :: "<< workList.size() << "===============" << endl; - NodeWrapper * nodeA = workList.back(); - workList.pop_back(); - if (tree->contains(*nodeA)) { - NodeWrapper * nodeB = tree->findBestMatch((*nodeA)); - if (nodeB != NULL && tree->contains(*nodeB)) { - NodeWrapper * nodeBMatch = tree->findBestMatch((*nodeB)); - if (nodeBMatch!=NULL ){ - if(nodeA->equals(*nodeBMatch) && nodeA->equals(*nodeB)==false) { - if(nodeA::iterator it = newNodes.begin(), itEnd = newNodes.end();it!=itEnd;it++) - workList.push_back(*it); - cout<<"Newly added"< * lights =new vector(numPoints); - getRandomPoints(*lights, numPoints); -// clusterSerial(*lights); - clusterGalois(*lights); - //Cleaning up! - for(int i=0;iat(i); -#if DEBUG_CONSOLE - std::cout<<"deleted :: "<<*l<size()<<"]"< - */ -#ifndef KDCELL_H_ -#define KDCELL_H_ -#include -#include -#include -#include -#include"Point3.h" -#include -#include"NodeWrapper.h" -using namespace std; - -class KdCell { -public: - const static int LEAF; - const static int SPLIT_X; - const static int SPLIT_Y; - const static int SPLIT_Z; - const static int MAX_POINTS_IN_CELL; - bool removeFromTree; -protected: - Point3 min; - Point3 max; - const int splitType; - const double splitValue; - KdCell * leftChild; - KdCell * rightChild; - vector pointList; - -public: - KdCell() : - min(std::numeric_limits::max()), max(-1 * std::numeric_limits< - double>::max()), splitType(LEAF), splitValue(numeric_limits< - double>::max()) { - pointList.resize(MAX_POINTS_IN_CELL); - leftChild = NULL; - rightChild = NULL; - removeFromTree=false; - } - KdCell(int inSplitType, double inSplitValue) : - min(0), max(0), splitType(inSplitType), splitValue( - inSplitValue) { - if (splitType == LEAF) - pointList.resize(MAX_POINTS_IN_CELL); - else - pointList.resize(0); - leftChild=rightChild=NULL; - removeFromTree=false; - - } - virtual ~KdCell() { } - - bool equals(KdCell & other){ - if(splitType!=other.splitType) - return false; - if(splitValue!=other.splitValue) - return false; - if(min.equals(other.min)==false) - return false; - if(max.equals(other.max)==false) - return false; - if(splitType==KdCell::LEAF) - return leftChild->equals(*leftChild) && rightChild->equals(*rightChild); - if(pointList.size()!=other.pointList.size()) - return false; - for(unsigned int i=0;iequals(*other.pointList[i])==false) - return false; - } - if(pointList[i]!=other.pointList[i]) - return false; - } - return true; - } - /** - * - */ - virtual KdCell* createNewBlankCell(int splitType, double splitValue) { - cout<<"KDCELL CALLED !!!!! "<splitType == LEAF) { - delete root; - return; - } - if(root->leftChild!=NULL) - cleanupTree(root->leftChild); - if(root->rightChild!=NULL) - cleanupTree(root->rightChild); - delete root; - } - /*** - * - */ - static KdCell * subDivide(vector & list, int offset, - const int size, vector * arr, KdCell & factory) { - KdCell * toReturn; - if (size <= KdCell::MAX_POINTS_IN_CELL) { - - toReturn = factory.createNewBlankCell(KdCell::LEAF, numeric_limits< - double>::max()); - KdCell & cell = *toReturn; - for (int i = 0; i < size; i++) { - cell.pointList[i] = list[offset + i]; - } - for (int i = 0; i < size; i++) { - for(int j=0;jequals(*cell.pointList[j])) - assert(false); - } - } - } - cell.computeBoundingBoxFromPoints(list, size); - cell.notifyContentsRebuilt(true); - } else { - bool shouldClean = false; - if (arr == NULL) { - arr = new vector (size); - shouldClean = true; - } - Point3 min(std::numeric_limits::max()); - Point3 max(-std::numeric_limits::max()); - for (int i = offset; i < size; i++) { - min.setIfMin(list[i]->getMin()); - max.setIfMax(list[i]->getMax()); - } - Point3 diff(max); - diff.sub(min); - int splitTypeUsed = -1, splitType0, splitType1, splitType2; - double splitValueUsed = -1; - if (diff.getZ() > diff.getX() && diff.getZ() > diff.getY()) { - splitType0 = KdCell::SPLIT_Z; - bool comparCond = diff.getX() > diff.getY(); - splitType1 = comparCond ? KdCell::SPLIT_X : KdCell::SPLIT_Y; - splitType2 = comparCond ? KdCell::SPLIT_Y : KdCell::SPLIT_X; - } else if (diff.getY() > diff.getX()) { - splitType0 = KdCell::SPLIT_Y; - bool comparCond = diff.getX() > diff.getZ(); - splitType1 = comparCond ? KdCell::SPLIT_X : KdCell::SPLIT_Z; - splitType2 = comparCond ? KdCell::SPLIT_Z : KdCell::SPLIT_X; - } else { - splitType0 = KdCell::SPLIT_X; - bool comparCond = diff.getY() > diff.getZ(); - splitType1 = comparCond ? KdCell::SPLIT_Y : KdCell::SPLIT_Z; - splitType2 = comparCond ? KdCell::SPLIT_Z : KdCell::SPLIT_Y; - } -// cout<< "================================================================"<< endl; - //Perform splitting, iteratively on type0, type1, type2, whichever suceeds. - splitTypeUsed = splitType0; - splitValueUsed = computeSplitValue(list, offset, size, splitType0, - arr); - if (splitValueUsed == numeric_limits::max()) { - splitTypeUsed = splitType1; - splitValueUsed = computeSplitValue(list, offset, size, - splitType1, arr); - if (splitValueUsed == numeric_limits::max()) { - splitTypeUsed = splitType2; - splitValueUsed = computeSplitValue(list, offset, size, - splitType2, arr); - } - } - //Unable to find a good split along any axis! - if (splitValueUsed == numeric_limits::max()) { - assert(false && "Unable to find a valid split across any dimension!"); - } -// cout << "Before :" << offset << " , " << size << " , value ::" -// << splitValueUsed << " type:" << splitTypeUsed << endl; - int leftCountForSplit = splitList(list, offset, size, - splitValueUsed, splitTypeUsed); -// cout << "Splitting at " << offset << " , " << leftCountForSplit -// << " , " << size << " , value ::" << splitValueUsed -// << " type:" << splitTypeUsed << endl; - if (leftCountForSplit <= 1 || leftCountForSplit >= size - 1) { -// for (int i = 0; i < size; i++) -// cout << "NW In split fault " << *list[offset + i] << endl; -// cout << "Failed at " << offset << " , " << leftCountForSplit -// << " , " << size << " , value ::" << splitValueUsed -// << " type:" << splitTypeUsed << endl; - assert(false && "Invalid split"); - } - toReturn - = factory.createNewBlankCell(splitTypeUsed, splitValueUsed); - KdCell & cell = *toReturn; - cell.max.set(max); - cell.min.set(min); - cell.leftChild = subDivide(list, offset, leftCountForSplit, arr, - factory); - cell.rightChild = subDivide(list, offset + leftCountForSplit, size - - leftCountForSplit, arr, factory); - // cout << "created inner node" << cell; - //Clean up on exit. - if (shouldClean == true) - delete arr; - } - return toReturn; - - } - /** - * - */ - bool notifyContentsRebuilt(bool inChange) { - return inChange; - } - /** - * - */ - static double computeSplitValue(vector & list, int offset, - int size, int pSplitType, vector * arr) { - for (int i = 0; i < size; i++) { - (*arr)[i] = findSplitComponent(*(list[offset + i]), pSplitType); - } -// cout << "SplitVal ::[ " << pSplitType << "]"; -// for (int i = 0; i < size; i++) { -// cout << "["<<*list[offset+i]<<" , "<<(*arr)[i] << ",]"; -// } -// cout << endl; - - return findMedianGapSplit(arr, size); - } - /** - * - */ - static double findSplitComponent(NodeWrapper & n, int pSplitType) { - if (pSplitType == KdCell::SPLIT_X) - return n.getLocationX(); - if (pSplitType == KdCell::SPLIT_Y) - return n.getLocationY(); - if (pSplitType == KdCell::SPLIT_Z) - return n.getLocationZ(); - assert(false && "Invalid splitType requested in findSplitComponent"); - abort(); - return 0.0; - } - /** - * - */ - static double findMedianGapSplit(vector * arr, int size) { - -// cout << "Pre sort Median ::[ "; -// for (int i = 0; i < size; i++) { -// cout << (*arr)[i] << ","; -// } -// cout << "]" << endl; - sort(arr->begin(), arr->begin()+size); -// cout << "Sorted Median ::[ "; -// for (int i = 0; i < size; i++) { -// cout << (*arr)[i] << ","; -// } -// cout << "]" << endl; - int start = ((size - 1) >> 1) - ((size + 7) >> 3); - int end = (size >> 1) + ((size + 7) >> 3); - if (start == end) { - //should never happen - assert(false && "Start==End in findMedianSplit, should not happen!"); - } - double largestGap = 0; - double splitValue = 0; - double nextValue = (*arr)[start]; - for (int i = start; i < end; i++) { - double curValue = nextValue; //ie val[i] - nextValue = (*arr)[i + 1]; - if ((nextValue - curValue) > largestGap) { - largestGap = nextValue - curValue; - splitValue = 0.5f * (curValue + nextValue); - if (splitValue == nextValue) { - splitValue = curValue; - } //if not between then choose smaller value - } - } - if (largestGap <= 0) { - //indicate that the attempt to find a good split value failed - splitValue = numeric_limits::max(); - } - return splitValue; - - } - /** - * - */ - static int splitList(vector & list, int startIndex, int size, - double pSplitValue, const int pSplitType) { - // for(int i=startIndex;i the split value - //note: after splitting, (lo - startIndex) will be the size of the low group - while (lo <= hi) { - while (lo <= hi && pSplitValue >= findSplitComponent(*(list[lo]), - pSplitType)) { -// cout << "Lo[" << findSplitComponent(*(list[lo]), pSplitType) -// << "]"; - lo++; - } - while (lo <= hi && pSplitValue < findSplitComponent(*(list[hi]), - pSplitType)) { -// cout << "Hi[" << findSplitComponent(*(list[hi]), pSplitType) -// << "]"; - hi--; - } - if (lo < hi) { - int index1 = lo++; - int index2 = hi--; - NodeWrapper *temp = list[index1]; - list[index1] = list[index2]; - list[index2] = temp; - } - } - return lo - startIndex; - } - /** - * - */ - - bool contains(NodeWrapper &point) { - if (splitType == KdCell::LEAF) { - //look for it in list of points - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - NodeWrapper * myNode = pointList[i]; - if (myNode != NULL && (*myNode).equals(point) == true) { - return true; - } - } - return false; - } else { - //otherwise its an interior node, so find which child should contain the point - float val = findSplitComponent(point, splitType); - KdCell *child = val <= splitValue ? leftChild : rightChild; - if(child!=NULL) - return child->contains(point); - return false; - } - } - /** - * - */ - void getAll(vector & allLeaves) { - if (this->splitType == KdCell::LEAF) { - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - if (pointList[i] != NULL) - allLeaves.push_back(pointList[i]); - } - } else { - leftChild->getAll(allLeaves); - rightChild->getAll(allLeaves); - } - } - /** - * - */ - bool remove(NodeWrapper & nw) { - bool treeChanged = false; - treeChanged = removeInternal(nw,NULL,NULL); - cout<<"===================AFTER REMOVAL================" - <<*this<<"====================================="<splitType == KdCell::LEAF) { - int numPoints=0; - int indexToDelete = -1; - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - if (pointList[i] != NULL){ - if (pointList[i]->equals(nw) == true) { - indexToDelete = i; - } - numPoints++; - } - } - //If we found a match, delete the node. - if(indexToDelete!=-1){ - if(numPoints==1 && parent!=NULL && grandParent!=NULL){ - cout<<"About to Updated subnode :: " << *grandParent<leftChild->equals(*this)){ - otherChild = rightChild; - } - else{ - otherChild = leftChild; - } - - if (grandParent->leftChild->equals(*parent)) { - grandParent->leftChild=otherChild; - } else { - grandParent->rightChild=otherChild; - } - this->removeFromTree=true; - parent->removeFromTree = true; - cout<<"Updated subnode :: " << *grandParent<removeInternal(nw,this, parent); - cout<<"BEFORE EX " <<*this<splitType == KdCell::LEAF) { - bool canInsert = false; - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - if (current->pointList[i] == NULL) { - current->pointList[i] = &nw; - canInsert = true; - break; - } - } - //If we could not insert in there, we need to split this. - if (canInsert == false) { - if (parent == NULL) { - assert(false&&"Cannot split root node, in addNode"); - } else { - vectornewList(KdCell::MAX_POINTS_IN_CELL + 1); - for(int i=0;ipointList[i]->equals(*current->pointList[j])) - assert(false&& "Sharing!!"); - } - } - } - for (int i = 0; i < MAX_POINTS_IN_CELL; i++) - newList[i] = current->pointList[i]; - newList[MAX_POINTS_IN_CELL] = &nw; - KdCell *newCell = subDivide(newList, 0,KdCell::MAX_POINTS_IN_CELL + 1, NULL, *current); - if (parent->leftChild == current) { - parent->leftChild = newCell; - } else if (parent->rightChild == current) { - parent->rightChild = newCell; - } - canInsert = true; - delete current; - } - } - treeChanged = canInsert; - } - //Internal node. - else { - double nodeSplitAxisValue = findSplitComponent(nw, - current->splitType); - treeChanged = (nodeSplitAxisValue <= current->splitValue) ? add( - current, current->leftChild, nw) : add(current, - current->rightChild, nw); - if (treeChanged) { - bool change = current->addToBoundingBoxIfChanged(nw); - change = current->notifyPointAdded(nw, change); - - } - } - return treeChanged; - } - -private: - /** - * - */ - bool notifyPointAdded(NodeWrapper & nw, bool inChange) { - return inChange; - } - /** - * - */ - bool addToBoundingBoxIfChanged(NodeWrapper & nw) { - bool retVal = min.setIfMin(nw.getLocation()); - retVal |= max.setIfMax(nw.getLocation()); - return retVal; - } - - /** - * - */ - void computeBoundingBoxFromPoints(vector & list, int size) { - Point3 newMin(numeric_limits::max()); - Point3 newMax(-numeric_limits::max()); - for (int i = 0; i < size; i++) { - newMin.setIfMin(list[i]->getLocation()); - newMax.setIfMax(list[i]->getLocation()); - } - min.set(newMin); - max.set(newMax); - } - /** - * - */ - bool recomputeLeafBoundingBoxIfChanges() { - Point3 newMin(numeric_limits::max()); - Point3 newMax(-numeric_limits::max()); - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - if (pointList[i] != NULL) { - newMin.setIfMin(pointList[i]->getMin()); - newMax.setIfMax(pointList[i]->getMax()); - } - } - return updateBoundingBox(newMin, newMax); - - } - /** - * - */ - bool recomputeParentBoundingBoxIfChanges(){ - Point3 newMin(leftChild->min); - newMin.setIfMin(rightChild->min); - Point3 newMax(leftChild->max); - newMax.setIfMax(rightChild->max); - return updateBoundingBox(newMin, newMax); - } - /** - * - */ - bool updateBoundingBox(Point3 & newMin, Point3 & newMax) { - bool retVal = false; - retVal = min.setIfMin(newMin); - retVal |= max.setIfMax(newMax); - return retVal; - } - /** - * - */ - friend ostream& operator<<(ostream & s, KdCell & cell); -}; -const int KdCell::SPLIT_X = 0; -const int KdCell::SPLIT_Y = 1; -const int KdCell::SPLIT_Z = 2; -const int KdCell::LEAF = 3; -const int KdCell::MAX_POINTS_IN_CELL = 4; - -/** - * - */ -ostream& operator<<(ostream & s, KdCell & cell) { - if (cell.splitType == KdCell::LEAF) { - s << "Leaf ::["; - for (int i = 0; i < KdCell::MAX_POINTS_IN_CELL; i++) { - if (cell.pointList[i] != NULL) - s << *cell.pointList[i] << ","; - } - s << "]" << std::endl; - } else { - s << "InnerNode(" << cell.splitType << "," << cell.splitValue; - if(cell.leftChild!=NULL) - s<< ") \nLEFT::[" << (*cell.leftChild); - else - s<<" NO-LEFT "; - if(cell.rightChild!=NULL) - s<< "]\nRIGHT::["<< (*cell.rightChild); - else - s<<" NO-RIGHT"; - s<< "]"; -} - return s; -} -#endif /* KDCELL_H_ */ diff --git a/maxflow/galois/apps/clustering/KdTree.h b/maxflow/galois/apps/clustering/KdTree.h deleted file mode 100644 index b0504ac..0000000 --- a/maxflow/galois/apps/clustering/KdTree.h +++ /dev/null @@ -1,227 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ - -#ifndef KDTREE_H_ -#define KDTREE_H_ - -#include -#include -#include"Point3.h" -#include"NodeWrapper.h" -#include"KdCell.h" -#include"PotentialCluster.h" - -using namespace std; - -class KdTree: public KdCell { -private: - double minLightIntensity; - double maxConeCosine; - Point3 minHalfSize; - - KdTree():KdCell(), minHalfSize(std::numeric_limits::max()){ - minLightIntensity=std::numeric_limits::max(); - maxConeCosine = -1.0f; - } - KdTree(int st, double sv):KdCell(st,sv), minHalfSize(0){ - minLightIntensity=0; - maxConeCosine = -1.0f; - } - -public: - /** - * - */ - static KdTree * createTree(vector & inPoints){ - KdTree * factory = new KdTree(); - KdTree * root = (KdTree *) KdTree::subDivide(inPoints, 0, inPoints.size(), NULL, *factory); - delete factory; - return root; - } - /** - * - */ - virtual KdCell *createNewBlankCell(int inSplitType, double inSplitValue) { -// cout<<"CALLED !!!!! "< & allLeaves){ - tree.getAll(allLeaves); - } - /** - * - */ - bool notifyPointAdded(NodeWrapper & nw, bool inChange){ - if(inChange){ - double b3 = nw.getLight().getScalarTotalIntensity(); - minLightIntensity = (minLightIntensity >= b3) ? b3 : minLightIntensity; - maxConeCosine = (maxConeCosine >= nw.getConeCosine()) ? maxConeCosine : nw.getConeCosine(); - double b2 = nw.getHalfSizeX(); - double minHalfSizeX = (minHalfSize.getX() >= b2) ? b2 : minHalfSize.getX(); - double b1 = nw.getHalfSizeY(); - double minHalfSizeY = (minHalfSize.getY() >= b1) ? b1 : minHalfSize.getY(); - double b = nw.getHalfSizeZ(); - double minHalfSizeZ = (minHalfSize.getZ() >= b) ? b : minHalfSize.getZ(); - minHalfSize.set(minHalfSizeX,minHalfSizeY,minHalfSizeZ); - - } - else{ - double newIntensity = nw.getLight().getScalarTotalIntensity(); - if(minLightIntensity>newIntensity){ - minLightIntensity = newIntensity; - inChange=true; - } - if(maxConeCosine < nw.getConeCosine()){ - maxConeCosine= nw.getConeCosine(); - inChange=true; - } - inChange |= minHalfSize.setIfMin(nw.getHalfSizeX(),nw.getHalfSizeY(),nw.getHalfSizeZ()); - } - return inChange; - } - /** - * - */ - NodeWrapper *findBestMatch(NodeWrapper &inLight) { -// cout<<"********************************************"<equals(potentialCluster.original)==false){ - double size = NodeWrapper::potentialClusterSize(from, *(pointList[i])); - if (size < potentialCluster.clusterSize) { -// cout<<"Found close match!!! " << *pointList[i]<removeFromTree==false) - ((KdTree*) leftChild)->findNearestRecursive(potentialCluster); - if(rightChild!=NULL && rightChild->removeFromTree==false) - ((KdTree*) rightChild)->findNearestRecursive(potentialCluster); - } else { - if(rightChild!=NULL && rightChild->removeFromTree==false) - ((KdTree*) rightChild)->findNearestRecursive(potentialCluster); - if(leftChild!=NULL && leftChild->removeFromTree==false) - ((KdTree*) leftChild)->findNearestRecursive(potentialCluster); - } - } - - /** - * Determines if any element of this cell could be closer to the the cluster, outCluster, using - * the metrics defined in inBuilder. - * - * @param outCluster the cluster to test - * @return true if an element could be closer, false otherwise - */ - bool couldBeCloser(PotentialCluster &outCluster) { - //first check to see if we can prove that none of our contents could be closer than the current closest - const NodeWrapper &from = outCluster.original; - //compute minumum offset to bounding box - double a2 = min.getX() - from.getLocationX() >= from.getLocationX() - max.getX() ? min.getX() - from.getLocationX() : from.getLocationX() - max.getX(); - //more than twice as fast as Math.max(a,0) - double dx = (a2 >= 0) ? a2 : 0; - double a1 = (min.getY() - from.getLocationY() >= from.getLocationY()- max.getY()) ? min.getY() - from.getLocationY() : from.getLocationY() - max.getY(); - double dy = a1 >= 0 ? a1 : 0; - double a = (min.getZ() - from.getLocationZ() >= from.getLocationZ() - max.getZ()) ? min.getZ() - from.getLocationZ() : from.getLocationZ() - max.getZ(); - double dz = a >= 0 ? a : 0; - //expand distance by half size of from's bounding box (distance is min to center of box) - //and by half the minimum bounding box extents of any node in this cell -// cout<<"From :: " << minHalfSize<= from.getConeCosine()) ? from.getConeCosine() : maxConeCosine; - //minimum cluster intensity would be from's intensity plus smallest intensity inside this cell - double intensity = minLightIntensity + from.getLight().getScalarTotalIntensity(); - Point3 diff(dx,dy,dz); - - double testSize = NodeWrapper::clusterSizeMetric(diff, coneCos, intensity); - //return if our contents could be closer and so need to be checked - //extra factor of 0.9999 is to correct for any roundoff error in computing minimum size -// cout<<"Could be closer computed :: "<= 0.9999 * testSize); - } - - - -private: - -}; - -#endif /* KDTREE_H_ */ diff --git a/maxflow/galois/apps/clustering/LeafNode.h b/maxflow/galois/apps/clustering/LeafNode.h deleted file mode 100644 index b12de55..0000000 --- a/maxflow/galois/apps/clustering/LeafNode.h +++ /dev/null @@ -1,83 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ - -#ifndef LEAFNODE_H_ -#define LEAFNODE_H_ -#define MATH_PI 3.1415926 -#include -#include "AbstractNode.h" -#include "Point3.h" -using namespace std; -class LeafNode : public AbstractNode{ -protected: - //direction of maximum emission - Point3 direction; - /** - * Creates a new instance of MLTreeLeafNode - */ -public: - LeafNode(double x, double y, double z, double dirX, double dirY, double dirZ):AbstractNode(x,y,z), direction(dirX,dirY,dirZ) { -// this->myLoc.x = x; -// this->myLoc.y = y; -// this->myLoc.z = z; - setIntensity(1.0 / MATH_PI, 0); -// this->direction.x = dirX; -// this->direction.y = dirY; -// this->direction.z = dirZ; - } - - Point3 & getDirection(){ - return direction; - } - double getDirX(){ - return direction.getX(); - } - double getDirY(){ - return direction.getY(); - } - double getDirZ(){ - return direction.getZ(); - } - bool isLeaf() { - return true; - } - - int size() { - return 1; - } - friend ostream & operator<<(ostream & s , LeafNode & pt); - -}; - ostream & operator<<(ostream & s , LeafNode& pt){ - s<<"LeafNode :: "; - operator<<(s,(AbstractNode&)pt); - s<<"Dir::"< - */ - -#ifndef NODEWRAPPER_H_ -#define NODEWRAPPER_H_ -#include"LeafNode.h" -#include"ClusterNode.h" -#include"Box3d.h" -#include"Point3.h" -#include -class NodeWrapper :public Box3d{ -public: - static const int CONE_RECURSE_SIZE; - static const double GLOBAL_SCENE_DIAGONAL; - -private: - - AbstractNode & light; - Box3d direction; - double coneCosine; - Point3 location; - Point3 coneDirection; - const int descendents; - vector coneClusters; - const bool cleanLight; - NodeWrapper * _l, *_r; - -public: - /** - * - */ - NodeWrapper(LeafNode & inNode):light(inNode),location(0),coneDirection(0),descendents(1),cleanLight(false){ - setBox(inNode.getPoint()); - direction.setBox(inNode.getDirection()); - coneCosine=1.0f; - coneDirection.set(inNode.getDirection()); - location.set(getMin()); - location.add(getMax()); - location.scale(0.5f); - _l=_r=NULL; - } - /** - * - */ - NodeWrapper(NodeWrapper & pLeft, NodeWrapper & pRight, vector * coordArr, vector & tempClusterArr) - :light(*(new ClusterNode())), - location(0),coneDirection(0), descendents(pLeft.descendents + pRight.descendents), cleanLight(true){ - NodeWrapper * l = &pLeft, *r = &pRight; - if( (pLeft.location.getX() > pRight.location.getX()) || - ( (pLeft.location.getX()==pRight.location.getX()) && (pLeft.location.getY() > pRight.location.getY()) ) || - ( (pLeft.location.getX()==pRight.location.getX()) && (pLeft.location.getY() == pRight.location.getY()) && (pLeft.location.getZ() > pRight.location.getZ()) ) - ){ - l = &pRight; - r = &pLeft; - - } - addBox(*r); - addBox(*l); - location.set(max); - location.add(min); - location.scale(0.5); - ((ClusterNode&)light).setBox(min, max); - ((ClusterNode&)light).setChildren(&l->light, &r->light, ((double) rand())/numeric_limits::max()); - coneCosine = computeCone(*l,*r,((ClusterNode&)light)); - if(coneCosine>-0.9f){ - direction.addBox(l->direction); - direction.addBox(r->direction); - ((ClusterNode&)light).findConeDirsRecursive(coordArr,tempClusterArr); - int numClus=0; - for(; tempClusterArr[numClus]!=NULL;numClus++){ - } - if(numClus>0){ - this->coneClusters.resize(numClus); - for(int j=0;jdirection.equals(other.direction)==false) - retVal &= false; - if(this->coneCosine != other.coneCosine) - retVal &= false; - if(this->location.equals(other.location)==false) - retVal &= false; - if(this->coneDirection.equals(other.coneDirection)==false) - retVal &= false; - if(this->direction.equals(other.direction)==false) - retVal &= false; - //TODO : Add light comparison logic here! - return retVal; - } - /** - * - */ - - static double potentialClusterSize(const NodeWrapper &a, NodeWrapper &b) { - Point3 max(a.max); - max.setIfMax(b.max); - Point3 min(a.min); - min.setIfMin(b.min); - Point3 diff(max); - diff.sub(min); - double minCos = computeCone(a, b); - double maxIntensity = a.light.getScalarTotalIntensity() + b.light.getScalarTotalIntensity(); - return clusterSizeMetric(diff, minCos, maxIntensity); - } - - /** - * Compute a measure of the size of a light cluster - */ - static double clusterSizeMetric(Point3 & size, double cosSemiAngle, double intensity) { - double len2 = size.getLen(); - double angleFactor = (1 - cosSemiAngle) * GLOBAL_SCENE_DIAGONAL; - double res = intensity * (len2 + angleFactor * angleFactor); -// cout<<">>>>>>>>>>> "< - */ -#ifndef POINT3_H_ -#define POINT3_H_ -using namespace std; -#include -class Point3 { - double x, y, z; -public: - - Point3(double v) { - this->set(v); - } - Point3(double x, double y, double z) { - this->x = x; - this->y = y; - this->z = z; - } - Point3(const Point3 & pt) { - this->x = pt.x; - this->y = pt.y; - this->z = pt.z; - } - double getSum() const{ - return x + y + z; - } - double getLen() const{ - return x*x + y*y + z*z; - } - void scale(double factor) { - x *= factor; - y *= factor; - z *= factor; - } - void add(const Point3 & pt) { - x += pt.x; - y += pt.y; - z += pt.z; - } - void sub(const Point3 & pt) { - x -= pt.x; - y -= pt.y; - z -= pt.z; - } - void set(double n) { - x = y = z = n; - } - void set(double x, double y, double z) { - this->x = x; - this->y = y; - this->z = z; - } - void set(const Point3 & other) { - x = other.x; - y = other.y; - z = other.z; - } - bool setIfMax(double nx, double ny, double nz) { - bool ret = false; - if (nx > x) { - x = nx; - ret = true; - } - if (ny > y) { - y = ny; - ret = true; - } - if (nz > z) { - z = nz; - ret = true; - } - return ret; - } - bool setIfMin(double nx, double ny, double nz) { - bool ret = false; - if (nx < x) { - x = nx; - ret = true; - } - if (ny < y) { - y = ny; - ret = true; - } - if (nz < z) { - z = nz; - ret = true; - } - return ret; - } - bool setIfMax(const Point3 & other) { - return setIfMax(other.x, other.y, other.z); - } - bool setIfMin(const Point3 & other) { - return setIfMin(other.x, other.y, other.z); - } - double getX()const { - return x; - } - double getY() const{ - return y; - } - double getZ() const{ - return z; - } - - bool equals(const Point3 & other)const { - return (x==other.x) && (y==other.y) && (z==other.z); - } - friend ostream & operator<<(ostream & s, const Point3 & pt); - -}; -ostream & operator<<(ostream & s, const Point3 & pt) { - s << "[" << pt.x << "," << pt.y << "," << pt.z << "]"; - return s; -} - -#endif /* POINT3_H_ */ diff --git a/maxflow/galois/apps/clustering/PotentialCluster.h b/maxflow/galois/apps/clustering/PotentialCluster.h deleted file mode 100644 index 0c275a4..0000000 --- a/maxflow/galois/apps/clustering/PotentialCluster.h +++ /dev/null @@ -1,56 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Agglomerative Clustering. - * - * @author Rashid Kaleem - */ -#ifndef POTENTIALCLUSTER_H_ -#define POTENTIALCLUSTER_H_ -#include -#include"NodeWrapper.h" - -using namespace std; -class PotentialCluster { -public: - const NodeWrapper & original; - NodeWrapper * closest; - double clusterSize; - - PotentialCluster(NodeWrapper & pOriginal): original(pOriginal) { - closest = NULL; - clusterSize = numeric_limits::max(); - } - friend ostream & operator<<(ostream & s, const PotentialCluster & p); - -}; -ostream & operator<<(ostream & s, const PotentialCluster & p){ - s<<"PC : ["< - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/DomainSpecificExecutors.h" -#include "Galois/Statistic.h" -#include "Galois/UnionFind.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/OCGraph.h" -#include "Galois/Graph/TypeTraits.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include -#include - -#ifdef GALOIS_USE_EXP -#include "LigraAlgo.h" -#include "GraphLabAlgo.h" -#include "GraphChiAlgo.h" -#endif - -const char* name = "Connected Components"; -const char* desc = "Computes the connected components of a graph"; -const char* url = 0; - -enum Algo { - async, - asyncOc, - blockedasync, - graphchi, - graphlab, - labelProp, - ligra, - ligraChi, - serial, - synchronous -}; - -enum WriteType { - none, - largest -}; - -namespace cll = llvm::cl; -static cll::opt inputFilename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt outputFilename(cll::Positional, cll::desc("[output file]"), cll::init("largest.gr")); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric"), cll::init(false)); -cll::opt memoryLimit("memoryLimit", - cll::desc("Memory limit for out-of-core algorithms (in MB)"), cll::init(~0U)); -static cll::opt writeType("output", cll::desc("Output type:"), - cll::values( - clEnumValN(WriteType::none, "none", "None (default)"), - clEnumValN(WriteType::largest, "largest", "Write largest component"), - clEnumValEnd), cll::init(WriteType::none)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::async, "async", "Asynchronous (default)"), - clEnumValN(Algo::blockedasync, "blockedasync", "Blocked asynchronous"), - clEnumValN(Algo::asyncOc, "asyncOc", "Asynchronous out-of-core memory"), - clEnumValN(Algo::labelProp, "labelProp", "Using label propagation algorithm"), - clEnumValN(Algo::serial, "serial", "Serial"), - clEnumValN(Algo::synchronous, "sync", "Synchronous"), -#ifdef GALOIS_USE_EXP - clEnumValN(Algo::graphchi, "graphchi", "Using GraphChi programming model"), - clEnumValN(Algo::graphlab, "graphlab", "Using GraphLab programming model"), - clEnumValN(Algo::ligraChi, "ligraChi", "Using Ligra and GraphChi programming model"), - clEnumValN(Algo::ligra, "ligra", "Using Ligra programming model"), -#endif - clEnumValEnd), cll::init(Algo::async)); - -struct Node: public Galois::UnionFindNode { - typedef Node* component_type; - unsigned int id; - - component_type component() { return this->findAndCompress(); } -}; - -template -void readInOutGraph(Graph& graph) { - using namespace Galois::Graph; - if (symmetricGraph) { - Galois::Graph::readGraph(graph, inputFilename); - } else if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, inputFilename, transposeGraphName); - } else { - GALOIS_DIE("Graph type not supported"); - } -} - -/** - * Serial connected components algorithm. Just use union-find. - */ -struct SerialAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type Graph; - typedef Graph::GraphNode GNode; - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, inputFilename); } - - struct Merge { - Graph& graph; - Merge(Graph& g): graph(g) { } - - void operator()(const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - sdata.merge(&ddata); - } - } - }; - - void operator()(Graph& graph) { - std::for_each(graph.begin(), graph.end(), Merge(graph)); - } -}; - -/** - * Synchronous connected components algorithm. Initially all nodes are in - * their own component. Then, we merge endpoints of edges to form the spanning - * tree. Merging is done in two phases to simplify concurrent updates: (1) - * find components and (2) union components. Since the merge phase does not - * do any finds, we only process a fraction of edges at a time; otherwise, - * the union phase may unnecessarily merge two endpoints in the same - * component. - */ -struct SynchronousAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type Graph; - typedef Graph::GraphNode GNode; - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, inputFilename); } - - struct Edge { - GNode src; - Node* ddata; - int count; - Edge(GNode src, Node* ddata, int count): src(src), ddata(ddata), count(count) { } - }; - - Galois::InsertBag wls[2]; - Galois::InsertBag* next; - Galois::InsertBag* cur; - - struct Initialize { - Graph& graph; - Galois::InsertBag& next; - Initialize(Graph& g, Galois::InsertBag& next): graph(g), next(next) { } - - //! Add the first edge between components to the worklist - void operator()(const GNode& src) const { - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - if (symmetricGraph && src >= dst) - continue; - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - next.push(Edge(src, &ddata, 0)); - break; - } - } - }; - - struct Merge { - Graph& graph; - Galois::Statistic& emptyMerges; - Merge(Graph& g, Galois::Statistic& e): graph(g), emptyMerges(e) { } - - void operator()(const Edge& edge) const { - Node& sdata = graph.getData(edge.src, Galois::MethodFlag::NONE); - if (!sdata.merge(edge.ddata)) - emptyMerges += 1; - } - }; - - struct Find { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - typedef int tt_does_not_need_stats; - - Graph& graph; - Galois::InsertBag& next; - Find(Graph& g, Galois::InsertBag& next): graph(g), next(next) { } - - //! Add the next edge between components to the worklist - void operator()(const Edge& edge, Galois::UserContext&) const { - (*this)(edge); - } - - void operator()(const Edge& edge) const { - GNode src = edge.src; - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - Node* scomponent = sdata.findAndCompress(); - Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE); - Graph::edge_iterator ei = graph.edge_end(src, Galois::MethodFlag::NONE); - int count = edge.count + 1; - std::advance(ii, count); - for (; ii != ei; ++ii, ++count) { - GNode dst = graph.getEdgeDst(ii); - if (symmetricGraph && src >= dst) - continue; - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - Node* dcomponent = ddata.findAndCompress(); - if (scomponent != dcomponent) { - next.push(Edge(src, dcomponent, count)); - break; - } - } - } - }; - - void operator()(Graph& graph) { - Galois::Statistic rounds("Rounds"); - Galois::Statistic emptyMerges("EmptyMerges"); - - cur = &wls[0]; - next = &wls[1]; - Galois::do_all_local(graph, Initialize(graph, *cur)); - - while (!cur->empty()) { - Galois::do_all_local(*cur, Merge(graph, emptyMerges)); - Galois::for_each_local(*cur, Find(graph, *next)); - cur->clear(); - std::swap(cur, next); - rounds += 1; - } - } -}; - -struct LabelPropAlgo { - struct LNode { - typedef unsigned int component_type; - unsigned int id; - unsigned int comp; - - component_type component() { return comp; } - bool isRep() { return id == comp; } - }; - - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type InnerGraph; - typedef Galois::Graph::LC_InOut_Graph Graph; - typedef Graph::GraphNode GNode; - typedef LNode::component_type component_type; - - void readGraph(Graph& graph) { - readInOutGraph(graph); - } - - struct Initialize { - Graph& graph; - - Initialize(Graph& g): graph(g) { } - void operator()(GNode n) { - LNode& data = graph.getData(n, Galois::MethodFlag::NONE); - data.comp = data.id; - } - }; - - template - struct Process { - typedef int tt_does_not_need_aborts; - Graph& graph; - Process(Graph& g): graph(g) { } - - template - void update(LNode& sdata, Iterator ii, Iterator ei, GetNeighbor get, Galois::UserContext& ctx) { - for (; ii != ei; ++ii) { - GNode dst = get(ii); - LNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - while (true) { - component_type old = ddata.comp; - component_type newV = sdata.comp; - if (old <= newV) - break; - if (__sync_bool_compare_and_swap(&ddata.comp, old, newV)) { - ctx.push(dst); - break; - } - } - } - } - - struct BackwardUpdate { - Graph& graph; - BackwardUpdate(Graph& g): graph(g) { } - GNode operator()(typename Graph::in_edge_iterator ii) { return graph.getInEdgeDst(ii); } - }; - - struct ForwardUpdate { - Graph& graph; - ForwardUpdate(Graph& g): graph(g) { } - GNode operator()(typename Graph::edge_iterator ii) { return graph.getEdgeDst(ii); } - }; - - //! Add the next edge between components to the worklist - void operator()(const GNode& src, Galois::UserContext& ctx) { - LNode& sdata = graph.getData(src, Galois::MethodFlag::NONE); - - if (Backward) { - update(sdata, graph.in_edge_begin(src, Galois::MethodFlag::NONE), graph.in_edge_end(src, Galois::MethodFlag::NONE), - BackwardUpdate(graph), ctx); - } - if (Forward) { - update(sdata, graph.edge_begin(src, Galois::MethodFlag::NONE), graph.edge_end(src, Galois::MethodFlag::NONE), - ForwardUpdate(graph), ctx); - } - } - }; - - void operator()(Graph& graph) { - typedef Galois::WorkList::dChunkedFIFO<256> WL; - - Galois::do_all_local(graph, Initialize(graph)); - if (symmetricGraph) { - Galois::for_each_local(graph, Process(graph), Galois::wl()); - } else { - Galois::for_each_local(graph, Process(graph), Galois::wl()); - } - } -}; - -struct AsyncOCAlgo { - typedef Galois::Graph::OCImmutableEdgeGraph Graph; - typedef Graph::GraphNode GNode; - - void readGraph(Graph& graph) { - readInOutGraph(graph); - } - - struct Merge { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - Galois::Statistic& emptyMerges; - Merge(Galois::Statistic& e): emptyMerges(e) { } - - //! Add the next edge between components to the worklist - template - void operator()(GTy& graph, const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - - for (typename GTy::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (symmetricGraph && src >= dst) - continue; - - if (!sdata.merge(&ddata)) - emptyMerges += 1; - } - } - }; - - void operator()(Graph& graph) { - Galois::Statistic emptyMerges("EmptyMerges"); - - Galois::GraphChi::vertexMap(graph, Merge(emptyMerges), memoryLimit); - } -}; - -/** - * Like synchronous algorithm, but if we restrict path compression (as done is - * @link{UnionFindNode}), we can perform unions and finds concurrently. - */ -struct AsyncAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_numa_alloc::type - ::with_no_lockable::type - Graph; - typedef Graph::GraphNode GNode; - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, inputFilename); } - - struct Merge { - typedef int tt_does_not_need_aborts; - typedef int tt_does_not_need_push; - - Graph& graph; - Galois::Statistic& emptyMerges; - Merge(Graph& g, Galois::Statistic& e): graph(g), emptyMerges(e) { } - - //! Add the next edge between components to the worklist - void operator()(const GNode& src, Galois::UserContext&) const { - (*this)(src); - } - - void operator()(const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (symmetricGraph && src >= dst) - continue; - - if (!sdata.merge(&ddata)) - emptyMerges += 1; - } - } - }; - - void operator()(Graph& graph) { - Galois::Statistic emptyMerges("EmptyMerges"); - Galois::for_each_local(graph, Merge(graph, emptyMerges)); - } -}; - -/** - * Improve performance of async algorithm by following machine topology. - */ -struct BlockedAsyncAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_numa_alloc::type - ::with_no_lockable::type - Graph; - typedef Graph::GraphNode GNode; - - struct WorkItem { - GNode src; - Graph::edge_iterator start; - }; - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, inputFilename); } - - struct Merge { - typedef int tt_does_not_need_aborts; - - Graph& graph; - Galois::InsertBag& items; - - //! Add the next edge between components to the worklist - template - void process(const GNode& src, const Graph::edge_iterator& start, Pusher& pusher) { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - int count = 1; - for (Graph::edge_iterator ii = start, ei = graph.edge_end(src, Galois::MethodFlag::NONE); - ii != ei; - ++ii, ++count) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - if (symmetricGraph && src >= dst) - continue; - - if (sdata.merge(&ddata)) { - if (Limit == 0 || count != Limit) - continue; - } - - if (MakeContinuation || (Limit != 0 && count == Limit)) { - WorkItem item = { src, ii + 1 }; - pusher.push(item); - break; - } - } - } - - void operator()(const GNode& src) { - Graph::edge_iterator start = graph.edge_begin(src, Galois::MethodFlag::NONE); - if (Galois::Runtime::LL::getPackageForSelf(Galois::Runtime::LL::getTID()) == 0) { - process(src, start, items); - } else { - process(src, start, items); - } - } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) { - process(item.src, item.start, ctx); - } - }; - - void operator()(Graph& graph) { - Galois::InsertBag items; - Merge merge = { graph, items }; - Galois::do_all_local(graph, merge, Galois::loopname("Initialize"), Galois::do_all_steal(false)); - Galois::for_each_local(items, merge, - Galois::loopname("Merge"), Galois::wl >()); - } -}; - -template -struct is_bad { - typedef typename Graph::GraphNode GNode; - Graph& graph; - - is_bad(Graph& g): graph(g) { } - - bool operator()(const GNode& n) const { - typedef typename Graph::node_data_reference node_data_reference; - - node_data_reference me = graph.getData(n); - for (typename Graph::edge_iterator ii = graph.edge_begin(n), ei = graph.edge_end(n); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - node_data_reference data = graph.getData(dst); - if (data.component() != me.component()) { - std::cerr << "not in same component: " - << me.id << " (" << me.component() << ")" - << " and " - << data.id << " (" << data.component() << ")" - << "\n"; - return true; - } - } - return false; - } -}; - -template -bool verify(Graph& graph, - typename std::enable_if::value>::type* = 0) { - return true; -} - -template -bool verify(Graph& graph, - typename std::enable_if::value>::type* = 0) { - return Galois::ParallelSTL::find_if(graph.begin(), graph.end(), is_bad(graph)) == graph.end(); -} - -template -void writeComponent(Graph& graph, typename Graph::node_data_type::component_type component, - typename std::enable_if::value>::type* = 0) { - std::cerr << "Writing component not supported for this graph\n"; - abort(); -} - -template -void writeComponent(Graph& graph, typename Graph::node_data_type::component_type component, - typename std::enable_if::value>::type* = 0) { - typedef typename Graph::GraphNode GNode; - typedef typename Graph::node_data_reference node_data_reference; - - // id == 1 if node is in component - size_t numEdges = 0; - size_t numNodes = 0; - for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - node_data_reference data = graph.getData(*ii); - data.id = data.component() == component ? 1 : 0; - if (data.id) { - size_t degree = - std::distance(graph.edge_begin(*ii, Galois::MethodFlag::NONE), graph.edge_end(*ii, Galois::MethodFlag::NONE)); - numEdges += degree; - numNodes += 1; - } - } - - typedef Galois::Graph::FileGraphWriter Writer; - Writer p; - p.setNumNodes(numNodes); - p.setNumEdges(numEdges); - - p.phase1(); - // partial sums of ids: id == new_index + 1 - typename Graph::node_data_type* prev = 0; - for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - node_data_reference data = graph.getData(*ii); - if (prev) - data.id = prev->id + data.id; - if (data.component() == component) { - size_t degree = - std::distance(graph.edge_begin(*ii, Galois::MethodFlag::NONE), graph.edge_end(*ii, Galois::MethodFlag::NONE)); - size_t sid = data.id - 1; - assert(sid < numNodes); - p.incrementDegree(sid, degree); - } - - prev = &data; - } - - assert(!prev || prev->id == numNodes); - - p.phase2(); - for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - node_data_reference data = graph.getData(*ii); - if (data.component() != component) - continue; - - size_t sid = data.id - 1; - - for (typename Graph::edge_iterator jj = graph.edge_begin(*ii, Galois::MethodFlag::NONE), - ej = graph.edge_end(*ii, Galois::MethodFlag::NONE); jj != ej; ++jj) { - GNode dst = graph.getEdgeDst(jj); - node_data_reference ddata = graph.getData(dst, Galois::MethodFlag::NONE); - size_t did = ddata.id - 1; - - //assert(ddata.component == component); - assert(sid < numNodes && did < numNodes); - p.addNeighbor(sid, did); - } - } - - p.finish(); - - std::cout - << "Writing largest component to " << outputFilename - << " (nodes: " << numNodes << " edges: " << numEdges << ")\n"; - - p.structureToFile(outputFilename); -} - -template -struct CountLargest { - typedef typename Graph::node_data_type::component_type component_type; - typedef std::map Map; - typedef typename Graph::GraphNode GNode; - - struct Accums { - Galois::GMapElementAccumulator map; - Galois::GAccumulator reps; - }; - - Graph& graph; - Accums& accums; - - CountLargest(Graph& g, Accums& accums): graph(g), accums(accums) { } - - void operator()(const GNode& x) { - typename Graph::node_data_reference n = graph.getData(x, Galois::MethodFlag::NONE); - if (n.isRep()) { - accums.reps += 1; - return; - } - - // Don't add reps to table to avoid adding components of size 1 - accums.map.update(n.component(), 1); - } -}; - -template -struct ComponentSizePair { - typedef typename Graph::node_data_type::component_type component_type; - - component_type component; - int size; - - struct Max { - ComponentSizePair operator()(const ComponentSizePair& a, const ComponentSizePair& b) const { - if (a.size > b.size) - return a; - return b; - } - }; - - ComponentSizePair(): component(0), size(0) { } - ComponentSizePair(component_type c, int s): component(c), size(s) { } -}; - -template -struct ReduceMax { - typedef typename Graph::node_data_type::component_type component_type; - typedef Galois::GSimpleReducible,typename ComponentSizePair::Max> Accum; - - Accum& accum; - - ReduceMax(Accum& accum): accum(accum) { } - - void operator()(const std::pair& x) { - accum.update(ComponentSizePair(x.first, x.second)); - } -}; - -template -typename Graph::node_data_type::component_type findLargest(Graph& graph) { - typedef CountLargest CL; - typedef ReduceMax RM; - - typename CL::Accums accums; - Galois::do_all_local(graph, CL(graph, accums)); - typename CL::Map& map = accums.map.reduce(); - size_t reps = accums.reps.reduce(); - - typename RM::Accum accumMax; - Galois::do_all(map.begin(), map.end(), RM(accumMax)); - ComponentSizePair& largest = accumMax.reduce(); - - // Compensate for dropping representative node of components - double ratio = graph.size() - reps + map.size(); - size_t largestSize = largest.size + 1; - if (ratio) - ratio = largestSize / ratio; - - std::cout << "Number of non-trivial components: " << map.size() << " (largest: " << largestSize << " [" << ratio << "])\n"; - std::cout << "Total components: " << reps << "\n"; - - return largest.component; -} - -template -void run() { - typedef typename Algo::Graph Graph; - - Algo algo; - Graph graph; - - algo.readGraph(graph); - std::cout << "Read " << graph.size() << " nodes\n"; - - unsigned int id = 0; - for (typename Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii, ++id) { - graph.getData(*ii).id = id; - } - - Galois::preAlloc(numThreads + (2 * graph.size() * sizeof(typename Graph::node_data_type)) / Galois::Runtime::MM::pageSize); - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T; - T.start(); - algo(graph); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify || writeType == WriteType::largest) { - auto component = findLargest(graph); - if (!verify(graph)) { - std::cerr << "verification failed\n"; - assert(0 && "verification failed"); - abort(); - } - if (writeType == WriteType::largest && component) { - writeComponent(graph, component); - } - } -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::asyncOc: run(); break; - case Algo::async: run(); break; - case Algo::blockedasync: run(); break; - case Algo::labelProp: run(); break; - case Algo::serial: run(); break; - case Algo::synchronous: run(); break; -#ifdef GALOIS_USE_EXP - case Algo::graphchi: run(); break; - case Algo::graphlab: run(); break; - case Algo::ligraChi: run >(); break; - case Algo::ligra: run >(); break; -#endif - default: std::cerr << "Unknown algorithm\n"; abort(); - } - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/delaunayrefinement/CMakeLists.txt b/maxflow/galois/apps/delaunayrefinement/CMakeLists.txt deleted file mode 100644 index 708060e..0000000 --- a/maxflow/galois/apps/delaunayrefinement/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(delaunayrefinement) diff --git a/maxflow/galois/apps/delaunayrefinement/Cavity.h b/maxflow/galois/apps/delaunayrefinement/Cavity.h deleted file mode 100644 index 8f54e79..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Cavity.h +++ /dev/null @@ -1,188 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Milind Kulkarni - */ -#include -#include - -class Cavity { - typedef std::vector::other> ConnTy; - - Tuple center; - GNode centerNode; - std::vector::other> frontier; - // !the cavity itself - PreGraph pre; - // !what the new elements should look like - PostGraph post; - // the edge-relations that connect the boundary to the cavity - ConnTy connections; - Element* centerElement; - Graph* graph; - int dim; - - /** - * find the node that is opposite the obtuse angle of the element - */ - GNode getOpposite(GNode node) { - assert(std::distance(graph->edge_begin(node), graph->edge_end(node)) == 3); - Element& element = graph->getData(node, Galois::MethodFlag::ALL); - Tuple elementTuple = element.getObtuse(); - Edge ObtuseEdge = element.getOppositeObtuse(); - for (Graph::edge_iterator ii = graph->edge_begin(node, Galois::MethodFlag::ALL), - ee = graph->edge_end(node, Galois::MethodFlag::ALL); ii != ee; ++ii) { - GNode neighbor = graph->getEdgeDst(ii); - //Edge& edgeData = graph->getEdgeData(node, neighbor); - Edge edgeData = element.getRelatedEdge(graph->getData(neighbor, Galois::MethodFlag::ALL)); - if (elementTuple != edgeData.getPoint(0) && elementTuple != edgeData.getPoint(1)) { - return neighbor; - } - } - GALOIS_DIE("unreachable"); - return node; - } - - void expand(GNode node, GNode next) { - Element& nextElement = graph->getData(next, Galois::MethodFlag::ALL); - if ((!(dim == 2 && nextElement.dim() == 2 && next != centerNode)) - && nextElement.inCircle(center)) { - // isMember says next is part of the cavity, and we're not the second - // segment encroaching on this cavity - if ((nextElement.dim() == 2) && (dim != 2)) { - // is segment, and we are encroaching - initialize(next); - build(); - } else { - if (!pre.containsNode(next)) { - pre.addNode(next); - frontier.push_back(next); - } - } - } else { - // not a member - //Edge& edgeData = graph->getEdgeData(node, next); - Edge edgeData = nextElement.getRelatedEdge(graph->getData(node, Galois::MethodFlag::ALL)); - EdgeTuple edge(node, next, edgeData); - if (std::find(connections.begin(), connections.end(), edge) == connections.end()) { - connections.push_back(edge); - } - } - } - -public: - Cavity(Graph* g, Galois::PerIterAllocTy& cnx) - :frontier(cnx), - pre(cnx), - post(cnx), - connections(cnx), - graph(g) - {} - - void initialize(GNode node) { - pre.reset(); - post.reset(); - connections.clear(); - frontier.clear(); - centerNode = node; - centerElement = &graph->getData(centerNode, Galois::MethodFlag::ALL); - while (graph->containsNode(centerNode, Galois::MethodFlag::ALL) && centerElement->isObtuse()) { - centerNode = getOpposite(centerNode); - centerElement = &graph->getData(centerNode, Galois::MethodFlag::ALL); - } - center = centerElement->getCenter(); - dim = centerElement->dim(); - pre.addNode(centerNode); - frontier.push_back(centerNode); - } - - void build() { - while (!frontier.empty()) { - GNode curr = frontier.back(); - frontier.pop_back(); - for (Graph::edge_iterator ii = graph->edge_begin(curr, Galois::MethodFlag::ALL), - ee = graph->edge_end(curr, Galois::MethodFlag::ALL); - ii != ee; ++ii) { - GNode neighbor = graph->getEdgeDst(ii); - expand(curr, neighbor); - } - } - } - - /** - * Create the new cavity based on the data of the old one - */ - void computePost() { - if (centerElement->dim() == 2) { // we built around a segment - GNode n1 = graph->createNode(Element(center, centerElement->getPoint(0))); - GNode n2 = graph->createNode(Element(center, centerElement->getPoint(1))); - - post.addNode(n1); - post.addNode(n2); - } - - for (ConnTy::iterator ii = connections.begin(), ee = connections.end(); ii != ee; ++ii) { - EdgeTuple tuple = *ii; - Element newElement(center, tuple.data.getPoint(0), tuple.data.getPoint(1)); - GNode other = pre.containsNode(tuple.dst) ? tuple.src : tuple.dst; - Element& otherElement = graph->getData(other, Galois::MethodFlag::ALL); - - GNode newNode = graph->createNode(newElement); // XXX - const Edge& otherEdge = newElement.getRelatedEdge(otherElement); - post.addEdge(newNode, other, otherEdge); - - for (PostGraph::iterator ii = post.begin(), ee = post.end(); ii != ee; ++ii) { - GNode node = *ii; - Element& element = graph->getData(node, Galois::MethodFlag::ALL); - if (element.isRelated(newElement)) { - const Edge& edge = newElement.getRelatedEdge(element); - post.addEdge(newNode, node, edge); - } - } - post.addNode(newNode); - } - } - - void update(GNode node, Galois::UserContext& ctx) { - for (PreGraph::iterator ii = pre.begin(), ee = pre.end(); ii != ee; ++ii) - graph->removeNode(*ii, Galois::MethodFlag::NONE); - - //add new data - for (PostGraph::iterator ii = post.begin(), ee = post.end(); ii != ee; ++ii) { - GNode n = *ii; - graph->addNode(n, Galois::MethodFlag::NONE); - Element& element = graph->getData(n, Galois::MethodFlag::NONE); - if (element.isBad()) { - ctx.push(n); - } - } - - for (PostGraph::edge_iterator ii = post.edge_begin(), ee = post.edge_end(); ii != ee; ++ii) { - EdgeTuple edge = *ii; - graph->addEdge(edge.src, edge.dst, Galois::MethodFlag::NONE); - } - - if (graph->containsNode(node, Galois::MethodFlag::NONE)) { - ctx.push(node); - } - } -}; diff --git a/maxflow/galois/apps/delaunayrefinement/DelaunayRefinement.cpp b/maxflow/galois/apps/delaunayrefinement/DelaunayRefinement.cpp deleted file mode 100644 index 577ddbc..0000000 --- a/maxflow/galois/apps/delaunayrefinement/DelaunayRefinement.cpp +++ /dev/null @@ -1,203 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Refinement of an initial, unrefined Delaunay mesh to eliminate triangles - * with angles < 30 degrees, using a variation of Chew's algorithm. - * - * @author Milind Kulkarni - * @author Andrew Lenharth - */ -#include "Mesh.h" -#include "Cavity.h" -#include "Verifier.h" - -#include "Galois/Galois.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" - -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Delaunay Mesh Refinement"; -static const char* desc = "Refines a Delaunay triangulation mesh such that no angle in the mesh is less than 30 degrees"; -static const char* url = "delaunay_mesh_refinement"; - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); - -Graph* graph; - -enum DetAlgo { - nondet, - detBase, - detPrefix, - detDisjoint -}; - -static cll::opt detAlgo(cll::desc("Deterministic algorithm:"), - cll::values( - clEnumVal(nondet, "Non-deterministic"), - clEnumVal(detBase, "Base execution"), - clEnumVal(detPrefix, "Prefix execution"), - clEnumVal(detDisjoint, "Disjoint execution"), - clEnumValEnd), cll::init(nondet)); - -template -struct Process { - typedef int tt_needs_per_iter_alloc; - - struct LocalState { - Cavity cav; - LocalState(Process& self, Galois::PerIterAllocTy& alloc): cav(graph, alloc) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - void operator()(GNode item, Galois::UserContext& ctx) { - if (!graph->containsNode(item, Galois::MethodFlag::ALL)) - return; - - Cavity* cavp = NULL; - - if (Version == detDisjoint) { - bool used; - LocalState* localState = (LocalState*) ctx.getLocalState(used); - if (used) { - localState->cav.update(item, ctx); - return; - } else { - cavp = &localState->cav; - } - } - - if (Version == detDisjoint) { - cavp->initialize(item); - cavp->build(); - cavp->computePost(); - } else { - Cavity cav(graph, ctx.getPerIterAlloc()); - cav.initialize(item); - cav.build(); - cav.computePost(); - if (Version == detPrefix) - return; - cav.update(item, ctx); - } - } -}; - -struct Preprocess { - Galois::InsertBag& wl; - Preprocess(Galois::InsertBag& w): wl(w) { } - void operator()(GNode item) const { - if (graph->getData(item, Galois::MethodFlag::NONE).isBad()) - wl.push(item); - } -}; - -struct DetLessThan { - bool operator()(const GNode& a, const GNode& b) const { - int idA = graph->getData(a, Galois::MethodFlag::NONE).getId(); - int idB = graph->getData(b, Galois::MethodFlag::NONE).getId(); - if (idA == 0 || idB == 0) abort(); - return idA < idB; - } -}; - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - graph = new Graph(); - { - Mesh m; - m.read(graph, filename.c_str(), detAlgo == nondet); - Verifier v; - if (!skipVerify && !v.verify(graph)) { - GALOIS_DIE("bad input mesh"); - } - } - std::cout << "configuration: " << std::distance(graph->begin(), graph->end()) - << " total triangles, " << std::count_if(graph->begin(), graph->end(), is_bad(graph)) << " bad triangles\n"; - - Galois::reportPageAlloc("MeminfoPre1"); - Galois::preAlloc(Galois::Runtime::MM::numPageAllocTotal() * 10); - //Galois::preAlloc(15 * numThreads + Galois::Runtime::MM::numPageAllocTotal() * 10); - Galois::reportPageAlloc("MeminfoPre2"); - - Galois::StatTimer T; - T.start(); - - Galois::InsertBag initialBad; - - if (detAlgo == nondet) - Galois::do_all_local(*graph, Preprocess(initialBad), Galois::loopname("findbad")); - else - std::for_each(graph->begin(), graph->end(), Preprocess(initialBad)); - - Galois::reportPageAlloc("MeminfoMid"); - - Galois::StatTimer Trefine("refine"); - Trefine.start(); - using namespace Galois::WorkList; - - typedef LocalQueue, ChunkedLIFO<256> > BQ; - typedef AltChunkedLIFO<32> Chunked; - - switch (detAlgo) { - case nondet: - Galois::for_each_local(initialBad, Process<>(), Galois::loopname("refine"), Galois::wl()); - case detBase: - Galois::for_each_det(initialBad.begin(), initialBad.end(), Process<>()); break; - case detPrefix: - Galois::for_each_det(initialBad.begin(), initialBad.end(), Process(), Process<>()); - break; - case detDisjoint: - Galois::for_each_det(initialBad.begin(), initialBad.end(), Process()); break; - default: std::cerr << "Unknown algorithm" << detAlgo << "\n"; abort(); - } - Trefine.stop(); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify) { - int size = Galois::ParallelSTL::count_if(graph->begin(), graph->end(), is_bad(graph)); - if (size != 0) { - GALOIS_DIE("Bad triangles remaining"); - } - Verifier v; - if (!v.verify(graph)) { - GALOIS_DIE("Refinement failed"); - } - std::cout << std::distance(graph->begin(), graph->end()) << " total triangles\n"; - std::cout << "Refinement OK\n"; - } - - return 0; -} diff --git a/maxflow/galois/apps/delaunayrefinement/Edge.h b/maxflow/galois/apps/delaunayrefinement/Edge.h deleted file mode 100644 index b79f5f0..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Edge.h +++ /dev/null @@ -1,69 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Milind Kulkarni - */ -#ifndef EDGE_H -#define EDGE_H - -#include "Tuple.h" - -class Element; - -class Edge { - Tuple p[2]; - -public: - Edge() {} - Edge(const Tuple& a, const Tuple& b) { - if (a < b) { - p[0] = a; - p[1] = b; - } else { - p[0] = b; - p[1] = a; - } - } - Edge(const Edge &rhs) { - p[0] = rhs.p[0]; - p[1] = rhs.p[1]; - } - - bool operator==(const Edge& rhs) const { - return p[0] == rhs.p[0] && p[1] == rhs.p[1]; - } - bool operator!=(const Edge& rhs) const { - return !(*this == rhs); - } - bool operator<(const Edge& rhs) const { - return (p[0] < rhs.p[0]) || ((p[0] == rhs.p[0]) && (p[1] < rhs.p[1])); - } - - bool operator>(const Edge& rhs) const { - return (p[0] > rhs.p[0]) || ((p[0] == rhs.p[0]) && (p[1] > rhs.p[1])); - } - - Tuple getPoint(int i) const { - return p[i]; - } -}; -#endif diff --git a/maxflow/galois/apps/delaunayrefinement/Element.h b/maxflow/galois/apps/delaunayrefinement/Element.h deleted file mode 100644 index eab8cb1..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Element.h +++ /dev/null @@ -1,244 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Milind Kulkarni > - */ -#ifndef _ELEMENT_H -#define _ELEMENT_H - -#include "Galois/Runtime/ll/gio.h" - -#include -#include - -#include "Edge.h" - -#define MINANGLE 30.0 - -class Element { - Tuple coords[3]; // The three endpoints of the triangle - // if the triangle has an obtuse angle - // obtuse - 1 is which one - signed char obtuse; - bool bDim; // true == 3, false == 2 - int id; - -public: - //! Constructor for Triangles - Element(const Tuple& a, const Tuple& b, const Tuple& c, int _id = 0) - :obtuse(0), bDim(true), id(_id) - { - coords[0] = a; - coords[1] = b; - coords[2] = c; - if (b < a || c < a) { - if (b < c) { - coords[0] = b; - coords[1] = c; - coords[2] = a; - } else { - coords[0] = c; - coords[1] = a; - coords[2] = b; - } - } - // edges[0] = Edge(coords[0], coords[1]); - // edges[1] = Edge(coords[1], coords[2]); - // edges[2] = Edge(coords[2], coords[0]); - for (int i = 0; i < 3; i++) - if (angleOBCheck(i)) - obtuse = i + 1; - //computeCenter(); - } - - //! Constructor for segments - Element(const Tuple& a, const Tuple& b, int _id = 0): obtuse(0), bDim(false), id(_id) { - coords[0] = a; - coords[1] = b; - if (b < a) { - coords[0] = b; - coords[1] = a; - } - //computeCenter(); - } - - Tuple getCenter() const { - if (dim() == 2) { - return (coords[0] + coords[1]) * 0.5; - } else { - const Tuple& a = coords[0]; - const Tuple& b = coords[1]; - const Tuple& c = coords[2]; - Tuple x = b - a; - Tuple y = c - a; - double xlen = a.distance(b); - double ylen = a.distance(c); - double cosine = (x * y) / (xlen * ylen); - double sine_sq = 1.0 - cosine * cosine; - double plen = ylen / xlen; - double s = plen * cosine; - double t = plen * sine_sq; - double wp = (plen - cosine) / (2 * t); - double wb = 0.5 - (wp * s); - Tuple tmpval = a * (1 - wb - wp); - tmpval = tmpval + (b * wb); - return tmpval + (c * wp); - } - } - - double get_radius_squared() const { - return get_radius_squared(getCenter()); - } - - double get_radius_squared(const Tuple& center) const { - return center.distance_squared(coords[0]); - } - - bool operator<(const Element& rhs) const { - //apparently a triangle is less than a line - if (dim() < rhs.dim()) return false; - if (dim() > rhs.dim()) return true; - for (int i = 0; i < dim(); i++) { - if (coords[i] < rhs.coords[i]) return true; - else if (coords[i] > rhs.coords[i]) return false; - } - return false; - } - - /// @return if the current triangle has a common edge with e - bool isRelated(const Element& rhs) const { - int num_eq = 0; - for(int i = 0; i < dim(); ++i) - for(int j = 0; j < rhs.dim(); ++j) - if (coords[i] == rhs.coords[j]) - ++num_eq; - return num_eq == 2; - } - - bool inCircle(Tuple p) const { - Tuple center = getCenter(); - double ds = center.distance_squared(p); - return ds <= get_radius_squared(center); - } - - void angleCheck(int i, bool& ob, bool& sm, double M) const { - int j = (i + 1) % dim(); - int k = (i + 2) % dim(); - Tuple::angleCheck(coords[j], coords[i], coords[k], ob, sm, M); - } - - bool angleGTCheck(int i, double M) const { - int j = (i + 1) % dim(); - int k = (i + 2) % dim(); - return Tuple::angleGTCheck(coords[j], coords[i], coords[k], M); - } - - bool angleOBCheck(int i) const { - int j = (i + 1) % dim(); - int k = (i + 2) % dim(); - return Tuple::angleOBCheck(coords[j], coords[i], coords[k]); - } - - //Virtualize the Edges array - //Used only by Mesh now - Edge getEdge(int i) const { - if (i == 0) - return Edge(coords[0], coords[1]); - if (!bDim) { - if (i == 1) - return Edge(coords[1], coords[0]); - } else { - if (i == 1) - return Edge(coords[1], coords[2]); - else if (i == 2) - return Edge(coords[2], coords[0]); - } - GALOIS_DIE("unknown edge"); - return Edge(coords[0], coords[0]); - } - - Edge getOppositeObtuse() const { - //The edge opposite the obtuse angle is the edge formed by - //the other indexes - switch (obtuse) { - case 1: - return getEdge(1); - case 2: - return getEdge(2); - case 3: - return getEdge(0); - } - GALOIS_DIE("no obtuse edge"); - return getEdge(0); - } - - //! Should the node be processed? - bool isBad() const { - if (!bDim) - return false; - for (int i = 0; i < 3; i++) - if (angleGTCheck(i, MINANGLE)) - return true; - return false; - } - - const Tuple& getPoint(int i) const { return coords[i]; } - - const Tuple& getObtuse() const { return coords[obtuse-1]; } - - int dim() const { return bDim ? 3 : 2; } - - int numEdges() const { return dim() + dim() - 3; } - - bool isObtuse() const { return obtuse != 0; } - - int getId() const { return id; } - - /** - * Scans all the edges of the two elements and if it finds one that is - * equal, then sets this as the Edge of the EdgeRelation - */ - Edge getRelatedEdge(const Element& e) const { - int at = 0; - Tuple d[2]; - for(int i = 0; i < dim(); ++i) - for(int j = 0; j < e.dim(); ++j) - if (coords[i] == e.coords[j]) - d[at++] = coords[i]; - assert(at == 2); - return Edge(d[0], d[1]); - } - - std::ostream& print(std::ostream& s) const { - s << '['; - for (int i = 0; i < dim(); ++i) - s << coords[i] << (i < (dim() - 1) ? ", " : ""); - s << ']'; - return s; - } -}; - -static std::ostream& operator<<(std::ostream& s, const Element& E) { - return E.print(s); -} - -#endif diff --git a/maxflow/galois/apps/delaunayrefinement/Mesh.h b/maxflow/galois/apps/delaunayrefinement/Mesh.h deleted file mode 100644 index 3b95b84..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Mesh.h +++ /dev/null @@ -1,407 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Milind Kulkarni - */ -#ifndef MESH_H -#define MESH_H - -#include "Subgraph.h" - -#include -#include -#include -#include -#include - - -struct is_bad { - Graph* g; - is_bad(Graph* _g): g(_g) {} - bool operator()(const GNode& n) const { - return g->getData(n, Galois::MethodFlag::NONE).isBad(); - } -}; - -struct create_nodes { - Graph* g; - create_nodes(Graph* _g): g(_g) {} - void operator()(Element& item) { - GNode n = g->createNode(item); - g->addNode(n); - } -}; - -struct centerXCmp { - bool operator()(const Element& lhs, const Element& rhs) const { - //return lhs.getCenter() < rhs.getCenter(); - return lhs.getPoint(0)[0] < rhs.getPoint(0)[0]; - } -}; - -struct centerYCmp { - bool operator()(const Element& lhs, const Element& rhs) const { - //return lhs.getCenter() < rhs.getCenter(); - return lhs.getPoint(0)[1] < rhs.getPoint(0)[1]; - } -}; - -struct centerYCmpInv { - bool operator()(const Element& lhs, const Element& rhs) const { - //return lhs.getCenter() < rhs.getCenter(); - return rhs.getPoint(0)[1] < lhs.getPoint(0)[1]; - } -}; - -/** - * Helper class used providing methods to read in information and create the graph - * - */ -class Mesh { - std::vector elements; - size_t id; - -private: - void checkResults(int act, int exp, std::string& str) { - if (act != exp) { - std::cerr << "Failed read in " << str << "\n"; - abort(); - } - } - - bool readNodesBin(std::string filename, std::vector& tuples) { - FILE* pFile = fopen(filename.append(".node.bin").c_str(), "r"); - if (!pFile) { - return false; - } - std::cout << "Using bin for node\n"; - uint32_t ntups[4]; - if (fread(&ntups[0], sizeof(uint32_t), 4, pFile) < 4) { - std::cerr << "Malformed binary file\n"; - abort(); - } - tuples.resize(ntups[0]); - for (size_t i = 0; i < ntups[0]; i++) { - struct record { - uint32_t index; - double x, y, z; - }; - record R; - if (fread(&R, sizeof(record), 1, pFile) < 1) { - std::cerr << "Malformed binary file\n"; - abort(); - } - tuples[R.index] = Tuple(R.x,R.y); - } - fclose(pFile); - return true; - } - - void readNodes(std::string filename, std::vector& tuples) { - if (readNodesBin(filename, tuples)) - return; - else - writeNodes(filename); - FILE* pFile = fopen(filename.append(".node").c_str(), "r"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << "\n"; - abort(); - } - unsigned ntups; - int r = fscanf(pFile, "%u %*u %*u %*u", &ntups); - checkResults(r, 1, filename); - tuples.resize(ntups); - for (size_t i = 0; i < ntups; i++) { - unsigned index; - double x, y; - r = fscanf(pFile, "%u %lf %lf %*f", &index, &x, &y); - checkResults(r, 3, filename); - tuples[index] = Tuple(x,y); - } - fclose(pFile); - } - - void writeNodes(std::string filename) { - std::string filename2 = filename; - FILE* pFile = fopen(filename.append(".node").c_str(), "r"); - FILE* oFile = fopen(filename2.append(".node.bin").c_str(), "w"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << " (continuing)\n"; - return; - } - if (!oFile) { - std::cerr << "Failed to open file " << filename2 << " (continuing)\n"; - return; - } - unsigned ntups[4]; - int r = fscanf(pFile, "%u %u %u %u", &ntups[0], &ntups[1], &ntups[2], &ntups[3]); - checkResults(r, 4, filename); - uint32_t ntups32[4] = {ntups[0], ntups[1], ntups[2], ntups[3]}; - fwrite(&ntups32[0], sizeof(uint32_t), 4, oFile); - - for (size_t i = 0; i < ntups[0]; i++) { - struct record { - unsigned index; - double x, y, z; - }; - struct recordOut { - uint32_t index; - double x, y, z; - }; - record R; - r = fscanf(pFile, "%u %lf %lf %lf", &R.index, &R.x, &R.y, &R.z); - checkResults(r, 4, filename); - recordOut R2 = {R.index, R.x, R.y, R.z}; - fwrite(&R2, sizeof(recordOut), 1, oFile); - } - fclose(pFile); - fclose(oFile); - } - - bool readElementsBin(std::string filename, std::vector& tuples) { - FILE* pFile = fopen(filename.append(".ele.bin").c_str(), "r"); - if (!pFile) { - return false; - } - std::cout << "Using bin for ele\n"; - uint32_t nels[3]; - if (fread(&nels[0], sizeof(uint32_t), 3, pFile) < 3) { - std::cerr << "Malformed binary file\n"; - abort(); - } - for (size_t i = 0; i < nels[0]; i++) { - uint32_t r[4]; - if (fread(&r[0], sizeof(uint32_t), 4, pFile) < 4) { - std::cerr << "Malformed binary file\n"; - abort(); - } - assert(r[1] < tuples.size()); - assert(r[2] < tuples.size()); - assert(r[3] < tuples.size()); - Element e(tuples[r[1]], tuples[r[2]], tuples[r[3]], ++id); - elements.push_back(e); - } - fclose(pFile); - return true; - } - - void readElements(std::string filename, std::vector& tuples) { - if (readElementsBin(filename, tuples)) - return; - else - writeElements(filename); - FILE* pFile = fopen(filename.append(".ele").c_str(), "r"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << "\n"; - abort(); - } - unsigned nels; - int r = fscanf(pFile, "%u %*u %*u", &nels); - checkResults(r, 1, filename); - for (size_t i = 0; i < nels; i++) { - unsigned index; - unsigned n1, n2, n3; - r = fscanf(pFile, "%u %u %u %u", &index, &n1, &n2, &n3); - checkResults(r, 4, filename); - assert(n1 < tuples.size()); - assert(n2 < tuples.size()); - assert(n3 < tuples.size()); - Element e(tuples[n1], tuples[n2], tuples[n3], ++id); - elements.push_back(e); - } - fclose(pFile); - } - - void writeElements(std::string filename) { - std::string filename2 = filename; - FILE* pFile = fopen(filename.append(".ele").c_str(), "r"); - FILE* oFile = fopen(filename2.append(".ele.bin").c_str(), "w"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << " (continuing)\n"; - return; - } - if (!oFile) { - std::cerr << "Failed to open file " << filename2 << " (continuing)\n"; - return; - } - unsigned nels[3]; - int r = fscanf(pFile, "%u %u %u", &nels[0], &nels[1], &nels[2]); - checkResults(r, 3, filename); - uint32_t nels32[3] = {nels[0], nels[1], nels[2]}; - fwrite(&nels32[0], sizeof(uint32_t), 3, oFile); - - for (size_t i = 0; i < nels[0]; i++) { - unsigned index; - unsigned n1, n2, n3; - r = fscanf(pFile, "%u %u %u %u", &index, &n1, &n2, &n3); - checkResults(r, 4, filename); - uint32_t vals[4] = {index, n1, n2, n3}; - fwrite(&vals[0], sizeof(uint32_t), 4, oFile); - } - fclose(pFile); - fclose(oFile); - } - - bool readPolyBin(std::string filename, std::vector& tuples) { - FILE* pFile = fopen(filename.append(".poly.bin").c_str(), "r"); - if (!pFile) { - return false; - } - std::cout << "Using bin for poly\n"; - uint32_t nsegs[4]; - if (fread(&nsegs[0], sizeof(uint32_t), 4, pFile) < 4) { - std::cerr << "Malformed binary file\n"; - abort(); - } - if (fread(&nsegs[0], sizeof(uint32_t), 2, pFile) < 2) { - std::cerr << "Malformed binary file\n"; - abort(); - } - for (size_t i = 0; i < nsegs[0]; i++) { - uint32_t r[4]; - if (fread(&r[0], sizeof(uint32_t), 4, pFile) < 4) { - std::cerr << "Malformed binary file\n"; - abort(); - } - assert(r[1] < tuples.size()); - assert(r[2] < tuples.size()); - Element e(tuples[r[1]], tuples[r[2]], ++id); - elements.push_back(e); - } - fclose(pFile); - return true; - } - - void readPoly(std::string filename, std::vector& tuples) { - if (readPolyBin(filename, tuples)) - return; - else - writePoly(filename); - FILE* pFile = fopen(filename.append(".poly").c_str(), "r"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << "\n"; - abort(); - } - unsigned nsegs; - int r = fscanf(pFile, "%*u %*u %*u %*u"); - checkResults(r, 0, filename); - r = fscanf(pFile, "%u %*u", &nsegs); - checkResults(r, 1, filename); - for (size_t i = 0; i < nsegs; i++) { - unsigned index, n1, n2; - r = fscanf(pFile, "%u %u %u %*u", &index, &n1, &n2); - checkResults(r, 3, filename); - assert(n1 < tuples.size()); - assert(n2 < tuples.size()); - Element e(tuples[n1], tuples[n2], ++id); - elements.push_back(e); - } - fclose(pFile); - } - - void writePoly(std::string filename) { - std::string filename2 = filename; - FILE* pFile = fopen(filename.append(".poly").c_str(), "r"); - FILE* oFile = fopen(filename2.append(".poly.bin").c_str(), "w"); - if (!pFile) { - std::cerr << "Failed to load file " << filename << " (continuing)\n"; - return; - } - if (!oFile) { - std::cerr << "Failed to open file " << filename2 << " (continuing)\n"; - return; - } - unsigned nsegs[4]; - int r = fscanf(pFile, "%u %u %u %u", &nsegs[0], &nsegs[1], &nsegs[2], &nsegs[3]); - checkResults(r, 4, filename); - uint32_t nsegs32[4] = {nsegs[0], nsegs[1], nsegs[2], nsegs[3]}; - fwrite(&nsegs32[0], sizeof(uint32_t), 4, oFile); - r = fscanf(pFile, "%u %u", &nsegs[0], &nsegs[1]); - checkResults(r, 2, filename); - nsegs32[0] = nsegs[0]; nsegs32[1] = nsegs[1]; - fwrite(&nsegs32[0], sizeof(uint32_t), 2, oFile); - for (size_t i = 0; i < nsegs[0]; i++) { - unsigned index, n1, n2, n3; - r = fscanf(pFile, "%u %u %u %u", &index, &n1, &n2, &n3); - checkResults(r, 4, filename); - uint32_t r[4] = {index, n1, n2, n3}; - fwrite(&r[0], sizeof(uint32_t), 4, oFile); - } - fclose(pFile); - fclose(oFile); - } - - void addElement(Graph* mesh, GNode node, std::map& edge_map) { - Element& element = mesh->getData(node); - for (int i = 0; i < element.numEdges(); i++) { - Edge edge = element.getEdge(i); - if (edge_map.find(edge) == edge_map.end()) { - edge_map[edge] = node; - } else { - mesh->addEdge(node, edge_map[edge], Galois::MethodFlag::NONE); - edge_map.erase(edge); - } - } - } - - template - void divide(const Iter& b, const Iter& e) { - if (std::distance(b,e) > 16) { - std::sort(b,e, centerXCmp()); - Iter m = Galois::split_range(b,e); - std::sort(b,m, centerYCmpInv()); - std::sort(m,e, centerYCmp()); - divide(b, Galois::split_range(b,m)); - divide(Galois::split_range(b,m), m); - divide(m,Galois::split_range(m,e)); - divide(Galois::split_range(m,e), e); - } - } - - void makeGraph(Graph* mesh, bool parallelAllocate) { - //std::sort(elements.begin(), elements.end(), centerXCmp()); - divide(elements.begin(), elements.end()); - - if (parallelAllocate) - Galois::do_all(elements.begin(), elements.end(), create_nodes(mesh), Galois::loopname("allocate")); - else - std::for_each(elements.begin(), elements.end(), create_nodes(mesh)); - - std::map edge_map; - for (Graph::iterator ii = mesh->begin(), ee = mesh->end(); - ii != ee; ++ii) - addElement(mesh, *ii, edge_map); - } - -public: - Mesh(): id(0) { } - - void read(Graph* mesh, std::string basename, bool parallelAllocate) { - std::vector tuples; - readNodes(basename, tuples); - readElements(basename, tuples); - readPoly(basename, tuples); - makeGraph(mesh, parallelAllocate); - } -}; - -#endif diff --git a/maxflow/galois/apps/delaunayrefinement/Subgraph.h b/maxflow/galois/apps/delaunayrefinement/Subgraph.h deleted file mode 100644 index e93ea48..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Subgraph.h +++ /dev/null @@ -1,118 +0,0 @@ -/** Delaunay refinement -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Milind Kulkarni - */ -#ifndef SUBGRAPH_H -#define SUBGRAPH_H - -#include "Element.h" - -#include "Galois/Galois.h" -#include "Galois/Graph/Graph.h" - -#include -#include - -typedef Galois::Graph::FirstGraph Graph; -typedef Graph::GraphNode GNode; - -struct EdgeTuple { - GNode src; - GNode dst; - Edge data; - EdgeTuple(GNode s, GNode d, const Edge& _d):src(s), dst(d), data(_d) {} - - bool operator==(const EdgeTuple& rhs) const { - return src == rhs.src && dst == rhs.dst && data == data; - } -}; - -/** - * A sub-graph of the mesh. Used to store information about the original - * cavity - */ -class PreGraph { - typedef std::vector::other> NodesTy; - NodesTy nodes; - -public: - typedef NodesTy::iterator iterator; - - explicit PreGraph(Galois::PerIterAllocTy& cnx): nodes(cnx) {} - - bool containsNode(GNode N) { - return std::find(nodes.begin(), nodes.end(), N) != nodes.end(); - } - - void addNode(GNode n) { return nodes.push_back(n); } - void reset() { nodes.clear(); } - iterator begin() { return nodes.begin(); } - iterator end() { return nodes.end(); } -}; - -/** - * A sub-graph of the mesh. Used to store information about the original - * and updated cavity - */ -class PostGraph { - struct TempEdge { - size_t src; - GNode dst; - Edge edge; - TempEdge(size_t s, GNode d, const Edge& e): src(s), dst(d), edge(e) { } - }; - - typedef std::vector::other> NodesTy; - typedef std::vector::other> EdgesTy; - - //! the nodes in the graph before updating - NodesTy nodes; - //! the edges that connect the subgraph to the rest of the graph - EdgesTy edges; - -public: - typedef NodesTy::iterator iterator; - typedef EdgesTy::iterator edge_iterator; - - explicit PostGraph(Galois::PerIterAllocTy& cnx): nodes(cnx), edges(cnx) { } - - void addNode(GNode n) { - nodes.push_back(n); - } - - void addEdge(GNode src, GNode dst, const Edge& e) { - edges.push_back(EdgeTuple(src, dst, e)); - } - - void reset() { - nodes.clear(); - edges.clear(); - } - - iterator begin() { return nodes.begin(); } - iterator end() { return nodes.end(); } - edge_iterator edge_begin() { return edges.begin(); } - edge_iterator edge_end() { return edges.end(); } -}; - -#endif diff --git a/maxflow/galois/apps/delaunayrefinement/Tuple.h b/maxflow/galois/apps/delaunayrefinement/Tuple.h deleted file mode 100644 index 0ba3771..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Tuple.h +++ /dev/null @@ -1,177 +0,0 @@ -/** A tuple -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Milind Kulkarni > - * @author Andrew Lenharth - */ -#ifndef TUPLE_H -#define TUPLE_H - -#include -#include - -class Tuple { - double _t[2]; -public: - Tuple(double a, double b) { - _t[0] = a; - _t[1] = b; - } - - Tuple() {}; - ~Tuple() {}; - - bool operator==(const Tuple& rhs) const { - for (int x = 0; x < 2; ++x) { - if (_t[x] != rhs._t[x]) return false; - } - return true; - } - - bool operator!=(const Tuple& rhs) const { - return !(*this == rhs); - } - - bool operator<(const Tuple& rhs) const { - for (int i = 0; i < 2; ++i) { - if (_t[i] < rhs._t[i]) return true; - else if (_t[i] > rhs._t[i]) return false; - } - return false; - } - - bool operator>(const Tuple& rhs) const { - for (int i = 0; i < 2; ++i) { - if (_t[i] > rhs._t[i]) return true; - else if (_t[i] < rhs._t[i]) return false; - } - return false; - } - - Tuple operator+(const Tuple& rhs) const { - return Tuple(_t[0]+rhs._t[0], _t[1]+rhs._t[1]); - } - - Tuple operator-(const Tuple& rhs) const { - return Tuple(_t[0]-rhs._t[0], _t[1]-rhs._t[1]); - } - - Tuple operator*(double d) const { //scalar product - return Tuple(_t[0]*d, _t[1]*d); - } - - double operator*(const Tuple& rhs) const { //dot product - return _t[0]*rhs._t[0] + _t[1]*rhs._t[1]; - } - - double operator[](int i) const { - return _t[i]; - }; - - int cmp(const Tuple& x) const { - if (*this == x) - return 0; - if (*this > x) - return 1; - return -1; - } - - double distance_squared(const Tuple& p) const { //squared distance between current tuple and x - double sum = 0.0; - for (int i = 0; i < 2; ++i) { - double d = _t[i] - p._t[i]; - sum += d * d; - } - return sum; - } - - double distance(const Tuple& p) const { //distance between current tuple and x - return sqrt(distance_squared(p)); - } - - double angle(const Tuple& a, const Tuple& b) const { //angle formed by a, current tuple, b - Tuple vb = a - *this; - Tuple vc = b - *this; - double dp = vb*vc; - double c = dp / sqrt(distance_squared(a) * distance_squared(b)); - return (180/M_PI) * acos(c); - } - - void angleCheck(const Tuple& a, const Tuple& b, bool& ob, bool& sm, double M) const { //angle formed by a, current tuple, b - Tuple vb = a - *this; - Tuple vc = b - *this; - double dp = vb*vc; - - if (dp < 0) { - ob = true; - return; - } - - double c = dp / sqrt(distance_squared(b) * distance_squared(a)); - if (c > cos(M*M_PI/180)) { - sm = true; - return; - } - return; - } - - bool angleGTCheck(const Tuple& a, const Tuple& b, double M) const { //angle formed by a, current tuple, b - Tuple vb = a - *this; - Tuple vc = b - *this; - double dp = vb*vc; - - if (dp < 0) - return false; - - double c = dp / sqrt(distance_squared(b) * distance_squared(a)); - return c > cos(M*M_PI/180); - } - - bool angleOBCheck(const Tuple& a, const Tuple& b) const { //angle formed by a, current tuple, b - Tuple vb = a - *this; - Tuple vc = b - *this; - double dp = vb*vc; - - return dp < 0; - } - - void print(std::ostream& os) const { - os << "(" << _t[0] << ", " << _t[1] << ")"; - } - - static int cmp(Tuple a, Tuple b) {return a.cmp(b);} - static double distance(Tuple a, Tuple b) {return a.distance(b);} - static double angle(const Tuple& a, const Tuple& b, const Tuple& c) {return b.angle(a, c);} - static void angleCheck(const Tuple& a, const Tuple& b, const Tuple& c, bool& ob, bool& sm, double M) { b.angleCheck(a, c, ob, sm, M); } - static bool angleGTCheck(const Tuple& a, const Tuple& b, const Tuple& c, double M) { return b.angleGTCheck(a, c, M); } - static bool angleOBCheck(const Tuple& a, const Tuple& b, const Tuple& c) { return b.angleOBCheck(a, c); } -}; - -static inline std::ostream& operator<<(std::ostream& os, const Tuple& rhs) { - rhs.print(os); - return os; -} - -static inline Tuple operator*(double d, Tuple rhs) { - return rhs * d; -} - - -#endif diff --git a/maxflow/galois/apps/delaunayrefinement/Verifier.h b/maxflow/galois/apps/delaunayrefinement/Verifier.h deleted file mode 100644 index d8c6298..0000000 --- a/maxflow/galois/apps/delaunayrefinement/Verifier.h +++ /dev/null @@ -1,160 +0,0 @@ -/** Delaunay triangulation verifier -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Xin Sui - */ - -#ifndef VERIFIER_H -#define VERIFIER_H - -#include "Galois/Galois.h" -#include "Galois/ParallelSTL/ParallelSTL.h" - -#include -#include -#include - -class Verifier { - struct inconsistent: public std::unary_function { - Graph* graph; - inconsistent(Graph* g): graph(g) { } - - bool operator()(const GNode& node) const { - Element& e = graph->getData(node); - - size_t dist = std::distance(graph->edge_begin(node), graph->edge_end(node)); - if (e.dim() == 2) { - if (dist != 1) { - std::cerr << "Error: Segment " << e << " has " << dist << " relation(s)\n"; - return true; - } - } else if (e.dim() == 3) { - if (dist != 3) { - std::cerr << "Error: Triangle " << e << " has " << dist << " relation(s)\n"; - return true; - } - } else { - std::cerr << "Error: Element with " << e.dim() << " edges\n"; - return true; - } - return false; - } - }; - - struct not_delaunay: public std::unary_function { - Graph* graph; - not_delaunay(Graph* g): graph(g) { } - - bool operator()(const GNode& node) { - Element& e1 = graph->getData(node); - - for (Graph::edge_iterator jj = graph->edge_begin(node), - ej = graph->edge_end(node); jj != ej; ++jj) { - const GNode& n = graph->getEdgeDst(jj); - Element& e2 = graph->getData(n); - if (e1.dim() == 3 && e2.dim() == 3) { - Tuple t2; - if (!getTupleT2OfRelatedEdge(e1, e2, t2)) { - std::cerr << "missing tuple\n"; - return true; - } - if (e1.inCircle(t2)) { - std::cerr << "Delaunay property violated: point " << t2 << " in element " << e1 << "\n"; - return true; - } - } - } - return false; - } - - bool getTupleT2OfRelatedEdge(const Element& e1, const Element& e2, Tuple& t) { - int e2_0 = -1; - int e2_1 = -1; - int phase = 0; - - for (int i = 0; i < e1.dim(); i++) { - for (int j = 0; j < e2.dim(); j++) { - if (e1.getPoint(i) != e2.getPoint(j)) - continue; - - if (phase == 0) { - e2_0 = j; - phase = 1; - break; - } - - e2_1 = j; - for (int k = 0; k < 3; k++) { - if (k != e2_0 && k != e2_1) { - t = e2.getPoint(k); - return true; - } - } - } - } - return false; - } - }; - - bool checkReachability(Graph* graph) { - std::stack remaining; - std::set found; - remaining.push(*(graph->begin())); - - while (!remaining.empty()) { - GNode node = remaining.top(); - remaining.pop(); - if (!found.count(node)) { - if (!graph->containsNode(node)) { - std::cerr << "Reachable node was removed from graph\n"; - } - found.insert(node); - int i = 0; - for (Graph::edge_iterator ii = graph->edge_begin(node), - ei = graph->edge_end(node); ii != ei; ++ii) { - GNode n = graph->getEdgeDst(ii); - assert(i < 3); - assert(graph->containsNode(n)); - assert(node != n); - ++i; - remaining.push(n); - } - } - } - - if (found.size() != graph->size()) { - std::cerr << "Error: Not all elements are reachable. "; - std::cerr << "Found: " << found.size() << " needed: " << graph->size() << ".\n"; - return false; - } - return true; - } - -public: - bool verify(Graph* g) { - return Galois::ParallelSTL::find_if(g->begin(), g->end(), inconsistent(g)) == g->end() - && Galois::ParallelSTL::find_if(g->begin(), g->end(), not_delaunay(g)) == g->end() - && checkReachability(g); - } -}; - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/CMakeLists.txt b/maxflow/galois/apps/delaunaytriangulation/CMakeLists.txt deleted file mode 100644 index ad7c095..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/CMakeLists.txt +++ /dev/null @@ -1,9 +0,0 @@ -#if(CMAKE_CXX_COMPILER_ID MATCHES "Intel") -# message(WARNING "NOT compiling delaunaytriangulation (compiler error in ICPC 12.1.0 20111011)") -#else() -#endif() -if(CMAKE_COMPILER_IS_GNUCC) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ffast-math") -endif() -app(delaunaytriangulation DelaunayTriangulation.cpp Element.cpp) -app(delaunaytriangulation-det DelaunayTriangulationDet.cpp Element.cpp) diff --git a/maxflow/galois/apps/delaunaytriangulation/Cavity.h b/maxflow/galois/apps/delaunaytriangulation/Cavity.h deleted file mode 100644 index e05183b..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Cavity.h +++ /dev/null @@ -1,169 +0,0 @@ -/** A cavity -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#ifndef CAVITY_H -#define CAVITY_H - -#include "Graph.h" - -#include - -//! A cavity which will be retrangulated -template > -class Cavity: private boost::noncopyable { - typedef typename Alloc::template rebind::other GNodeVectorAlloc; - typedef std::vector GNodeVector; - typedef typename Alloc::template rebind>::other GNodeIntPairVectorAlloc; - typedef std::vector, GNodeIntPairVectorAlloc> GNodeIntPairVector; - - struct InCircumcenter { - const Graph& graph; - Tuple tuple; - InCircumcenter(const Graph& g, const Tuple& t): graph(g), tuple(t) { } - bool operator()(const GNode& n) const { - Element& e = graph.getData(n, Galois::MethodFlag::NONE); - return e.inCircle(tuple); - } - }; - - Searcher searcher; - GNodeVector newNodes; - GNodeIntPairVector outside; - GNode center; - Point* point; - Graph& graph; - const Alloc& alloc; - - //! Find triangles that border cavity but are not in the cavity - void findOutside() { - for (typename Searcher::GNodeVector::iterator ii = searcher.inside.begin(), - ei = searcher.inside.end(); ii != ei; ++ii) { - - for (Graph::edge_iterator jj = graph.edge_begin(*ii, Galois::MethodFlag::NONE), - ej = graph.edge_end(*ii, Galois::MethodFlag::NONE); jj != ej; ++jj) { - GNode n = graph.getEdgeDst(jj); - // i.e., if (!e.boundary() && e.inCircle(point->t())) - if (std::find(searcher.matches.begin(), searcher.matches.end(), n) - != searcher.matches.end()) - continue; - - int index = graph.getEdgeData(graph.findEdge(n, *ii, Galois::MethodFlag::NONE)); - outside.push_back(std::make_pair(n, index)); - - Element& e = graph.getData(n, Galois::MethodFlag::NONE); - Point* p2 = e.getPoint(index); - Point* p3 = e.getPoint((index + 1) % 3); - - p2->get(Galois::MethodFlag::CHECK_CONFLICT); - p3->get(Galois::MethodFlag::CHECK_CONFLICT); - } - } - } - - void addElements() { - GNodeVector newNodes(alloc); - - // Create new nodes - for (typename GNodeIntPairVector::iterator ii = outside.begin(), - ei = outside.end(); ii != ei; ++ii) { - const GNode& n = ii->first; - int& index = ii->second; - - Element& e = graph.getData(n, Galois::MethodFlag::NONE); - - Point* p2 = e.getPoint(index); - Point* p3 = e.getPoint((index + 1) % 3); - - Element newE(point, p2, p3); - GNode newNode = graph.createNode(newE); - graph.addNode(newNode, Galois::MethodFlag::NONE); - - point->addElement(newNode); - p2->addElement(newNode); - p3->addElement(newNode); - - graph.getEdgeData(graph.addEdge(newNode, n, Galois::MethodFlag::NONE)) = 1; - graph.getEdgeData(graph.addEdge(n, newNode, Galois::MethodFlag::NONE)) = index; - - newNodes.push_back(newNode); - } - - // Update new node connectivity - for (unsigned i = 0; i < newNodes.size(); ++i) { - const GNode& n1 = newNodes[i]; - const Element& e1 = graph.getData(n1, Galois::MethodFlag::NONE); - for (unsigned j = i + 1; j < newNodes.size(); ++j) { - if (i != j) { - const GNode& n2 = newNodes[j]; - const Element& e2 = graph.getData(n2, Galois::MethodFlag::NONE); - - for (int x = 2; x >= 1; --x) { - for (int y = 2; y >= 1; --y) { - if (e1.getPoint(x) == e2.getPoint(y)) { - int indexForNewNode = x & 2; - int indexForNode = y & 2; - graph.getEdgeData(graph.addEdge(n1, n2, Galois::MethodFlag::NONE)) = indexForNewNode; - graph.getEdgeData(graph.addEdge(n2, n1, Galois::MethodFlag::NONE)) = indexForNode; - } - } - } - } - } - } - } - - void removeElements() { - for (typename Searcher::GNodeVector::iterator ii = searcher.matches.begin(), - ei = searcher.matches.end(); ii != ei; ++ii) { - graph.removeNode(*ii, Galois::MethodFlag::NONE); - } - } - -public: - Cavity(Graph& g, const Alloc& a = Alloc()): - searcher(g, a), - newNodes(a), - outside(a), - graph(g), - alloc(a) - { } - - void init(const GNode& c, Point* p) { - center = c; - point = p; - } - - void build() { - assert(graph.getData(center).inCircle(point->t())); - searcher.findAll(center, InCircumcenter(graph, point->t())); - assert(!searcher.inside.empty()); - findOutside(); - } - - void update() { - removeElements(); - addElements(); - } -}; - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulation.cpp b/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulation.cpp deleted file mode 100644 index cf04451..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulation.cpp +++ /dev/null @@ -1,523 +0,0 @@ -/** Delaunay triangulation -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Delaunay triangulation of points in 2d. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#include "Point.h" -#include "Cavity.h" -#include "Verifier.h" - -#include "Galois/Galois.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/SpatialTree.h" -#include "Lonestar/BoilerPlate.h" -#include "llvm/Support/CommandLine.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Delaunay Triangulation"; -static const char* desc = "Produces a Delaunay triangulation for a set of points"; -static const char* url = "delaunay_triangulation"; - -static cll::opt inputname(cll::Positional, cll::desc(""), cll::Required); -static cll::opt doWriteMesh("writemesh", - cll::desc("Write the mesh out to files with basename"), - cll::value_desc("basename")); - -static Graph graph; -static Galois::Graph::SpatialTree2d tree; - -//! Our main functor -struct Process { - typedef int tt_needs_per_iter_alloc; - typedef int tt_does_not_need_push; - typedef Galois::PerIterAllocTy Alloc; - - struct ContainsTuple { - const Graph& graph; - Tuple tuple; - ContainsTuple(const Graph& g, const Tuple& t): graph(g), tuple(t) { } - bool operator()(const GNode& n) const { - assert(!graph.getData(n, Galois::MethodFlag::NONE).boundary()); - return graph.getData(n, Galois::MethodFlag::NONE).inTriangle(tuple); - } - }; - - void computeCenter(const Element& e, Tuple& t) const { - for (int i = 0; i < 3; ++i) { - const Tuple& o = e.getPoint(i)->t(); - for (int j = 0; j < 2; ++j) { - t[j] += o[j]; - } - } - for (int j = 0; j < 2; ++j) { - t[j] *= 1/3.0; - } - } - - void findBestNormal(const Element& element, const Point* p, const Point*& bestP1, const Point*& bestP2) { - Tuple center(0); - computeCenter(element, center); - int scale = element.clockwise() ? 1 : -1; - - Tuple origin = p->t() - center; -// double length2 = origin.x() * origin.x() + origin.y() * origin.y(); - bestP1 = bestP2 = NULL; - double bestVal = 0.0; - for (int i = 0; i < 3; ++i) { - int next = i + 1; - if (next > 2) next -= 3; - - const Point* p1 = element.getPoint(i); - const Point* p2 = element.getPoint(next); - double dx = p2->t().x() - p1->t().x(); - double dy = p2->t().y() - p1->t().y(); - Tuple normal(scale * -dy, scale * dx); - double val = normal.dot(origin); // / length2; - if (bestP1 == NULL || val > bestVal) { - bestVal = val; - bestP1 = p1; - bestP2 = p2; - } - } - assert(bestP1 != NULL && bestP2 != NULL && bestVal > 0); - } - - GNode findCorrespondingNode(GNode start, const Point* p1, const Point* p2) { - for (Graph::edge_iterator ii = graph.edge_begin(start, Galois::MethodFlag::CHECK_CONFLICT), - ei = graph.edge_end(start, Galois::MethodFlag::CHECK_CONFLICT); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Element& e = graph.getData(dst, Galois::MethodFlag::NONE); - int count = 0; - for (int i = 0; i < e.dim(); ++i) { - if (e.getPoint(i) == p1 || e.getPoint(i) == p2) { - if (++count == 2) - return dst; - } - } - } - GALOIS_DIE("unreachable"); - return start; - } - - bool planarSearch(const Point* p, GNode start, GNode& node) { - // Try simple hill climbing instead - ContainsTuple contains(graph, p->t()); - while (!contains(start)) { - Element& element = graph.getData(start, Galois::MethodFlag::CHECK_CONFLICT); - if (element.boundary()) { - // Should only happen when quad tree returns a boundary point which is rare - // There's only one way to go from here - assert(std::distance(graph.edge_begin(start), graph.edge_end(start)) == 1); - start = graph.getEdgeDst(graph.edge_begin(start, Galois::MethodFlag::CHECK_CONFLICT)); - } else { - // Find which neighbor will get us to point fastest by computing normal - // vectors - const Point *p1, *p2; - findBestNormal(element, p, p1, p2); - start = findCorrespondingNode(start, p1, p2); - } - } - - node = start; - return true; - } - - bool findContainingElement(const Point* p, GNode& node) { - Point** rp = tree.find(p->t().x(), p->t().y()); - if (!rp) - return false; - - (*rp)->get(Galois::MethodFlag::CHECK_CONFLICT); - - GNode someNode = (*rp)->someElement(); - - // Not in mesh yet - if (!someNode) { - GALOIS_DIE("unreachable"); - return false; - } - - return planarSearch(p, someNode, node); - } - - //! Parallel operator - GALOIS_ATTRIBUTE_NOINLINE - void operator()(Point* p, Galois::UserContext& ctx) { - p->get(Galois::MethodFlag::CHECK_CONFLICT); - assert(!p->inMesh()); - - GNode node; - if (!findContainingElement(p, node)) { - // Someone updated an element while we were searching, producing - // a semi-consistent state - // ctx.push(p); - // Current version is safe with locking so this shouldn't happen - GALOIS_DIE("unreachable"); - return; - } - - assert(graph.getData(node).inTriangle(p->t())); - assert(graph.containsNode(node)); - - Cavity cav(graph, ctx.getPerIterAlloc()); - cav.init(node, p); - cav.build(); - cav.update(); - tree.insert(p->t().x(), p->t().y(), p); - } -}; - -typedef std::vector PointList; - -class ReadPoints { - void addBoundaryPoints() { - double minX, maxX, minY, maxY; - - minX = minY = std::numeric_limits::max(); - maxX = maxY = std::numeric_limits::min(); - - for (PointList::iterator ii = points.begin(), ei = points.end(); ii != ei; ++ii) { - double x = ii->t().x(); - double y = ii->t().y(); - if (x < minX) - minX = x; - else if (x > maxX) - maxX = x; - if (y < minY) - minY = y; - else if (y > maxY) - maxY = y; - } - - tree.init(minX, minY, maxX, maxY); - - size_t size = points.size(); - double width = maxX - minX; - double height = maxY - minY; - double maxLength = std::max(width, height); - double centerX = minX + width / 2.0; - double centerY = minY + height / 2.0; - double radius = maxLength * 3.0; // radius of circle that should cover all points - - for (int i = 0; i < 3; ++i) { - double dX = radius * cos(2*M_PI*(i/3.0)); - double dY = radius * sin(2*M_PI*(i/3.0)); - points.push_back(Point(centerX + dX, centerY + dY, size + i)); - } - } - - void nextLine(std::ifstream& scanner) { - scanner.ignore(std::numeric_limits::max(), '\n'); - } - - void fromTriangle(std::ifstream& scanner) { - double x, y; - long numPoints; - - scanner >> numPoints; - - int dim; - scanner >> dim; - assert(dim == 2); - int k; - scanner >> k; // number of attributes - assert(k == 0); - scanner >> k; // has boundary markers? - - for (long id = 0; id < numPoints; ++id) { - scanner >> k; // point id - scanner >> x >> y; - nextLine(scanner); - points.push_back(Point(x, y, id)); - } - } - - void fromPointList(std::ifstream& scanner) { - double x, y; - - // comment line - nextLine(scanner); - size_t id = 0; - while (!scanner.eof()) { - scanner >> x >> y; - if (x == 0 && y == 0) - break; - points.push_back(Point(x, y, id++)); - x = y = 0; - nextLine(scanner); - } - } - - PointList& points; - -public: - ReadPoints(PointList& p): points(p) { } - - void from(const std::string& name) { - std::ifstream scanner(name.c_str()); - if (!scanner.good()) { - GALOIS_DIE("Could not open file: ", name); - } - if (name.find(".node") == name.size() - 5) { - fromTriangle(scanner); - } else { - fromPointList(scanner); - } - scanner.close(); - - if (points.size()) - addBoundaryPoints(); - else { - GALOIS_DIE("No points found in file: ", name); - } - } -}; - -//! All Point* refer to elements in this bag -Galois::InsertBag basePoints; -Galois::InsertBag ptrPoints; - -static void addBoundaryNodes(Point* p1, Point* p2, Point* p3) { - Element large_triangle(p1, p2, p3); - GNode large_node = graph.createNode(large_triangle); - graph.addNode(large_node); - - p1->addElement(large_node); - p2->addElement(large_node); - p3->addElement(large_node); - - tree.insert(p1->t().x(), p1->t().y(), p1); - - Element border_ele1(p1, p2); - Element border_ele2(p2, p3); - Element border_ele3(p3, p1); - - GNode border_node1 = graph.createNode(border_ele1); - GNode border_node2 = graph.createNode(border_ele2); - GNode border_node3 = graph.createNode(border_ele3); - - graph.addNode(border_node1); - graph.addNode(border_node2); - graph.addNode(border_node3); - - graph.getEdgeData(graph.addEdge(large_node, border_node1)) = 0; - graph.getEdgeData(graph.addEdge(large_node, border_node2)) = 1; - graph.getEdgeData(graph.addEdge(large_node, border_node3)) = 2; - - graph.getEdgeData(graph.addEdge(border_node1, large_node)) = 0; - graph.getEdgeData(graph.addEdge(border_node2, large_node)) = 0; - graph.getEdgeData(graph.addEdge(border_node3, large_node)) = 0; -} - -struct insPt { - void operator()(Point& p) { - Point* pr = &basePoints.push(p); - ptrPoints.push(pr); - } -}; - -struct centerXCmp { - template - bool operator()(const T& lhs, const T& rhs) const { - return lhs.t().x() < rhs.t().x(); - } -}; - -struct centerYCmp { - template - bool operator()(const T& lhs, const T& rhs) const { - return lhs.t().y() < rhs.t().y(); - } -}; - -struct centerYCmpInv { - template - bool operator()(const T& lhs, const T& rhs) const { - return rhs.t().y() < lhs.t().y(); - } -}; - -template -void divide(const Iter& b, const Iter& e) { - if (std::distance(b,e) > 64) { - std::sort(b, e, centerXCmp()); - Iter m = Galois::split_range(b, e); - std::sort(b, m, centerYCmpInv()); - std::sort(m, e, centerYCmp()); - divide(b, Galois::split_range(b, m)); - divide(Galois::split_range(b, m), m); - divide(m,Galois::split_range(m, e)); - divide(Galois::split_range(m, e), e); - } else { - std::random_shuffle(b, e); - } -} - -void layoutPoints(PointList& points) { - divide(points.begin(), points.end() - 3); - Galois::do_all(points.begin(), points.end() - 3, insPt()); - Point* p1 = &basePoints.push(*(points.end() - 1)); - Point* p2 = &basePoints.push(*(points.end() - 2)); - Point* p3 = &basePoints.push(*(points.end() - 3)); - addBoundaryNodes(p1,p2,p3); -} - -static void readInput(const std::string& filename) { - PointList points; - ReadPoints(points).from(filename); - - std::cout << "configuration: " << points.size() << " points\n"; - - Galois::preAlloc(2 * numThreads // some per-thread state - + 2 * points.size() * sizeof(Element) // mesh is about 2x number of points (for random points) - * 32 // include graph node size - / (Galois::Runtime::MM::pageSize) // in pages - ); - Galois::reportPageAlloc("MeminfoPre"); - - layoutPoints(points); -} - -static void writePoints(const std::string& filename, const PointList& points) { - std::ofstream out(filename.c_str()); - // - out << points.size() << " 2 0 0\n"; - //out.setf(std::ios::fixed, std::ios::floatfield); - out.setf(std::ios::scientific, std::ios::floatfield); - out.precision(10); - long id = 0; - for (PointList::const_iterator it = points.begin(), end = points.end(); it != end; ++it) { - const Tuple& t = it->t(); - out << id++ << " " << t.x() << " " << t.y() << " 0\n"; - } - - out.close(); -} -static void writeMesh(const std::string& filename) { - long numTriangles = 0; - long numSegments = 0; - for (Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - Element& e = graph.getData(*ii); - if (e.boundary()) { - numSegments++; - } else { - numTriangles++; - } - } - - long tid = 0; - long sid = 0; - std::string elementName(filename); - std::string polyName(filename); - - elementName.append(".ele"); - polyName.append(".poly"); - - std::ofstream eout(elementName.c_str()); - std::ofstream pout(polyName.c_str()); - // - eout << numTriangles << " 3 0\n"; - // - // ... - // - pout << "0 2 0 0\n"; - pout << numSegments << " 1\n"; - for (Graph::iterator ii = graph.begin(), ee = graph.end(); ii != ee; ++ii) { - const Element& e = graph.getData(*ii); - if (e.boundary()) { - // - pout << sid++ << " " << e.getPoint(0)->id() << " " << e.getPoint(1)->id() << " 1\n"; - } else { - // [in ccw order] - eout << tid++ << " " << e.getPoint(0)->id() << " "; - if (e.clockwise()) { - eout << e.getPoint(2)->id() << " " << e.getPoint(1)->id() << "\n"; - } else { - eout << e.getPoint(1)->id() << " " << e.getPoint(2)->id() << "\n"; - } - } - } - - eout.close(); - // - pout << "0\n"; - pout.close(); -} - -static void generateMesh() { - typedef Galois::WorkList::AltChunkedLIFO<32> CA; - Galois::for_each_local(ptrPoints, Process(), Galois::wl()); -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - readInput(inputname); - - Galois::StatTimer T; - T.start(); - generateMesh(); - T.stop(); - std::cout << "mesh size: " << graph.size() << "\n"; - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify) { - Verifier verifier; - if (!verifier.verify(&graph)) { - GALOIS_DIE("Triangulation failed"); - } - std::cout << "Triangulation OK\n"; - } - - if (doWriteMesh.size()) { - std::string base = doWriteMesh; - std::cout << "Writing " << base << "\n"; - writeMesh(base.c_str()); - - PointList points; - // Reordering messes up connection between id and place in pointlist - ReadPoints(points).from(inputname); - writePoints(base.append(".node"), points); - } - - return 0; -} diff --git a/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulationDet.cpp b/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulationDet.cpp deleted file mode 100644 index 8ef83f7..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/DelaunayTriangulationDet.cpp +++ /dev/null @@ -1,718 +0,0 @@ -/** Delaunay triangulation -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Delaunay triangulation of points in 2d. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#include "Point.h" -#include "Cavity.h" -#include "QuadTree.h" -#include "Verifier.h" - -#include "Galois/Galois.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" - -#include "Lonestar/BoilerPlate.h" -#include "llvm/Support/CommandLine.h" - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Delaunay Triangulation"; -static const char* desc = "Produces a Delaunay triangulation for a set of points"; -static const char* url = "delaunay_triangulation"; - -static cll::opt doWriteMesh("writemesh", - cll::desc("Write the mesh out to files with basename"), - cll::value_desc("basename")); -static cll::opt doWritePoints("writepoints", - cll::desc("Write the (reordered) points to filename"), - cll::value_desc("filename")); -static cll::opt noReorderPoints("noreorder", - cll::desc("Don't reorder points to improve locality"), - cll::init(false)); -static cll::opt inputname(cll::Positional, cll::desc(""), cll::Required); - -enum DetAlgo { - nondet, - detBase, - detPrefix, - detDisjoint -}; - -static cll::opt detAlgo(cll::desc("Deterministic algorithm:"), - cll::values( - clEnumVal(nondet, "Non-deterministic"), - clEnumVal(detBase, "Base execution"), - clEnumVal(detPrefix, "Prefix execution"), - clEnumVal(detDisjoint, "Disjoint execution"), - clEnumValEnd), cll::init(nondet)); - -static Graph* graph; - -struct GetPointer: public std::unary_function { - Point* operator()(Point& p) const { return &p; } -}; - -//! Our main functor -template -struct Process { - typedef int tt_needs_per_iter_alloc; - typedef int tt_does_not_need_push; - typedef Galois::PerIterAllocTy Alloc; - - QuadTree* tree; - - struct ContainsTuple { - const Graph& graph; - Tuple tuple; - ContainsTuple(const Graph& g, const Tuple& t): graph(g), tuple(t) { } - bool operator()(const GNode& n) const { - assert(!graph.getData(n, Galois::MethodFlag::NONE).boundary()); - return graph.getData(n, Galois::MethodFlag::NONE).inTriangle(tuple); - } - }; - - Process(QuadTree* t): tree(t) { } - - void computeCenter(const Element& e, Tuple& t) const { - for (int i = 0; i < 3; ++i) { - const Tuple& o = e.getPoint(i)->t(); - for (int j = 0; j < 2; ++j) { - t[j] += o[j]; - } - } - for (int j = 0; j < 2; ++j) { - t[j] *= 1/3.0; - } - } - - void findBestNormal(const Element& element, const Point* p, const Point*& bestP1, const Point*& bestP2) { - Tuple center(0); - computeCenter(element, center); - int scale = element.clockwise() ? 1 : -1; - - Tuple origin = p->t() - center; -// double length2 = origin.x() * origin.x() + origin.y() * origin.y(); - bestP1 = bestP2 = NULL; - double bestVal = 0.0; - for (int i = 0; i < 3; ++i) { - int next = i + 1; - if (next > 2) next -= 3; - - const Point* p1 = element.getPoint(i); - const Point* p2 = element.getPoint(next); - double dx = p2->t().x() - p1->t().x(); - double dy = p2->t().y() - p1->t().y(); - Tuple normal(scale * -dy, scale * dx); - double val = normal.dot(origin); // / length2; - if (bestP1 == NULL || val > bestVal) { - bestVal = val; - bestP1 = p1; - bestP2 = p2; - } - } - assert(bestP1 != NULL && bestP2 != NULL && bestVal > 0); - } - - GNode findCorrespondingNode(GNode start, const Point* p1, const Point* p2) { - for (Graph::edge_iterator ii = graph->edge_begin(start, Galois::MethodFlag::CHECK_CONFLICT), - ei = graph->edge_end(start, Galois::MethodFlag::CHECK_CONFLICT); ii != ei; ++ii) { - GNode dst = graph->getEdgeDst(ii); - Element& e = graph->getData(dst, Galois::MethodFlag::NONE); - int count = 0; - for (int i = 0; i < e.dim(); ++i) { - if (e.getPoint(i) == p1 || e.getPoint(i) == p2) { - if (++count == 2) - return dst; - } - } - } - GALOIS_DIE("unreachable"); - return start; - } - - bool planarSearch(const Point* p, GNode start, GNode& node) { - // Try simple hill climbing instead - ContainsTuple contains(*graph, p->t()); - while (!contains(start)) { - Element& element = graph->getData(start, Galois::MethodFlag::CHECK_CONFLICT); - if (element.boundary()) { - // Should only happen when quad tree returns a boundary point which is rare - // There's only one way to go from here - assert(std::distance(graph->edge_begin(start), graph->edge_end(start)) == 1); - start = graph->getEdgeDst(graph->edge_begin(start, Galois::MethodFlag::CHECK_CONFLICT)); - } else { - // Find which neighbor will get us to point fastest by computing normal - // vectors - const Point *p1, *p2; - findBestNormal(element, p, p1, p2); - start = findCorrespondingNode(start, p1, p2); - } - } - - node = start; - return true; - } - - bool findContainingElement(const Point* p, GNode& node) { - Point* result; - if (!tree->find(p, result)) { - return false; - } - - result->get(Galois::MethodFlag::CHECK_CONFLICT); - - GNode someNode = result->someElement(); - - // Not in mesh yet - if (!someNode) { - return false; - } - - return planarSearch(p, someNode, node); - } - - struct LocalState { - Cavity cav; - LocalState(Process& self, Galois::PerIterAllocTy& alloc): cav(*graph, alloc) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - //! Parallel operator - void operator()(Point* p, Galois::UserContext& ctx) { - Cavity* cavp = NULL; - - if (Version == detDisjoint) { - bool used; - LocalState* localState = (LocalState*) ctx.getLocalState(used); - if (used) { - localState->cav.update(); - return; - } else { - cavp = &localState->cav; - } - } - - p->get(Galois::MethodFlag::CHECK_CONFLICT); - assert(!p->inMesh()); - - GNode node; - if (!findContainingElement(p, node)) { - // Someone updated an element while we were searching, producing - // a semi-consistent state - //ctx.push(p); - // Current version is safe with locking so this shouldn't happen - GALOIS_DIE("unreachable"); - return; - } - - assert(graph->getData(node).inTriangle(p->t())); - assert(graph->containsNode(node)); - - if (Version == detDisjoint) { - cavp->init(node, p); - cavp->build(); - } else { - Cavity cav(*graph, ctx.getPerIterAlloc()); - cav.init(node, p); - cav.build(); - if (Version == detPrefix) - return; - cav.update(); - } - } - - //! Serial operator - void operator()(Point* p) { - p->get(Galois::MethodFlag::CHECK_CONFLICT); - assert(!p->inMesh()); - - GNode node; - if (!findContainingElement(p, node)) { - GALOIS_DIE("Could not find triangle containing point"); - return; - } - - assert(graph->getData(node).inTriangle(p->t())); - assert(graph->containsNode(node)); - - Cavity<> cav(*graph); - cav.init(node, p); - cav.build(); - cav.update(); - } -}; - -typedef std::deque PointList; - -class ReadPoints { - void addBoundaryPoints() { - double minX, maxX, minY, maxY; - - minX = minY = std::numeric_limits::max(); - maxX = maxY = std::numeric_limits::min(); - - for (PointList::iterator ii = points.begin(), ei = points.end(); ii != ei; ++ii) { - double x = ii->t().x(); - double y = ii->t().y(); - if (x < minX) - minX = x; - else if (x > maxX) - maxX = x; - if (y < minY) - minY = y; - else if (y > maxY) - maxY = y; - } - - size_t size = points.size(); - double width = maxX - minX; - double height = maxY - minY; - double maxLength = std::max(width, height); - double centerX = minX + width / 2.0; - double centerY = minY + height / 2.0; - double radius = maxLength * 3.0; // radius of circle that should cover all points - - for (int i = 0; i < 3; ++i) { - double dX = radius * cos(2*M_PI*(i/3.0)); - double dY = radius * sin(2*M_PI*(i/3.0)); - points.push_back(Point(centerX + dX, centerY + dY, size + i)); - } - } - - void nextLine(std::ifstream& scanner) { - scanner.ignore(std::numeric_limits::max(), '\n'); - } - - void fromTriangle(std::ifstream& scanner) { - double x, y; - long numPoints; - - scanner >> numPoints; - - int dim; - scanner >> dim; - assert(dim == 2); - int k; - scanner >> k; // number of attributes - assert(k == 0); - scanner >> k; // has boundary markers? - - for (long id = 0; id < numPoints; ++id) { - scanner >> k; // point id - scanner >> x >> y; - nextLine(scanner); - points.push_back(Point(x, y, id)); - } - } - - void fromPointList(std::ifstream& scanner) { - double x, y; - - // comment line - nextLine(scanner); - size_t id = 0; - while (!scanner.eof()) { - scanner >> x >> y; - if (x == 0 && y == 0) - break; - points.push_back(Point(x, y, id++)); - x = y = 0; - nextLine(scanner); - } - } - - PointList& points; - -public: - ReadPoints(PointList& p): points(p) { } - - void from(const std::string& name) { - std::ifstream scanner(name.c_str()); - if (!scanner.good()) { - GALOIS_DIE("Could not open file: ", name); - } - if (name.find(".node") == name.size() - 5) { - fromTriangle(scanner); - } else { - fromPointList(scanner); - } - scanner.close(); - - if (points.size()) - addBoundaryPoints(); - else { - GALOIS_DIE("No points found in file: ", name); - } - } -}; - -static void writePoints(const std::string& filename, const PointList& points) { - std::ofstream out(filename.c_str()); - // - out << points.size() << " 2 0 0\n"; - //out.setf(std::ios::fixed, std::ios::floatfield); - out.setf(std::ios::scientific, std::ios::floatfield); - out.precision(10); - long id = 0; - for (PointList::const_iterator it = points.begin(), end = points.end(); it != end; ++it) { - const Tuple& t = it->t(); - out << id++ << " " << t.x() << " " << t.y() << " 0\n"; - } - - out.close(); -} - -//! All Point* refer to elements in this bag -Galois::InsertBag basePoints; -size_t maxRounds; -Galois::InsertBag* rounds; -const int roundShift = 4; //! round sizes are portional to (1 << roundsShift) - -static void copyPointsFromRounds(PointList& points) { - for (int i = maxRounds - 1; i >= 0; --i) { - Galois::InsertBag& pptrs = rounds[i]; - for (Galois::InsertBag::iterator ii = pptrs.begin(), ei = pptrs.end(); ii != ei; ++ii) { - points.push_back(*(*ii)); - } - } -} - -static void addBoundaryNodes(Point* p1, Point* p2, Point* p3) { - Element large_triangle(p1, p2, p3); - GNode large_node = graph->createNode(large_triangle); - graph->addNode(large_node); - - p1->addElement(large_node); - p2->addElement(large_node); - p3->addElement(large_node); - - Element border_ele1(p1, p2); - Element border_ele2(p2, p3); - Element border_ele3(p3, p1); - - GNode border_node1 = graph->createNode(border_ele1); - GNode border_node2 = graph->createNode(border_ele2); - GNode border_node3 = graph->createNode(border_ele3); - - graph->addNode(border_node1); - graph->addNode(border_node2); - graph->addNode(border_node3); - - graph->getEdgeData(graph->addEdge(large_node, border_node1)) = 0; - graph->getEdgeData(graph->addEdge(large_node, border_node2)) = 1; - graph->getEdgeData(graph->addEdge(large_node, border_node3)) = 2; - - graph->getEdgeData(graph->addEdge(border_node1, large_node)) = 0; - graph->getEdgeData(graph->addEdge(border_node2, large_node)) = 0; - graph->getEdgeData(graph->addEdge(border_node3, large_node)) = 0; -} - -//! Streaming point distribution -struct GenerateRounds { - typedef Galois::Runtime::PerThreadStorage CounterTy; - - const PointList& points; - size_t log2; - - GenerateRounds(const PointList& p, size_t l): points(p), log2(l) { } - - void operator()(size_t index) { - const Point& p = points[index]; - - Point* ptr = &basePoints.push(p); - int r = 0; - for (size_t i = 0; i < log2; ++i) { - size_t mask = (1UL << (i + 1)) - 1; - if ((index & mask) == (1UL << i)) { - r = i; - break; - } - } - - rounds[r / roundShift].push(ptr); - } -}; - -//! Blocked point distribution (exponentially increasing block size) with points randomized -//! within a round -static void generateRoundsOld(PointList& points, bool randomize) { - size_t counter = 0; - size_t round = 0; - size_t next = 1 << roundShift; - std::vector buf; - - PointList::iterator ii = points.begin(), ei = points.end(); - while (ii != ei) { - Point* ptr = &basePoints.push(*ii); - buf.push_back(ptr); - ++ii; - if (ii == ei || counter > next) { - next *= next; - int r = maxRounds - 1 - round; - if (randomize) - std::random_shuffle(buf.begin(), buf.end()); - std::copy(buf.begin(), buf.end(), std::back_inserter(rounds[r])); - buf.clear(); - ++round; - } - ++counter; - } -} - -static void generateRounds(PointList& points, bool addBoundary) { - size_t size = points.size() - 3; - - size_t log2 = std::max((size_t) floor(log(size) / log(2)), (size_t) 1); - maxRounds = log2 / roundShift; - rounds = new Galois::InsertBag[maxRounds+1]; // +1 for boundary points - - PointList ordered; - //ordered.reserve(size); - - if (noReorderPoints) { - std::copy(points.begin(), points.begin() + size, std::back_inserter(ordered)); - generateRoundsOld(ordered, false); - } else { - // Reorganize spatially - QuadTree q( - boost::make_transform_iterator(points.begin(), GetPointer()), - boost::make_transform_iterator(points.begin() + size, GetPointer())); - - q.output(std::back_inserter(ordered)); - - if (true) { - if (detAlgo == nondet) { - Galois::do_all( - boost::counting_iterator(0), - boost::counting_iterator(size), - GenerateRounds(ordered, log2)); - } else { - std::for_each( - boost::counting_iterator(0), - boost::counting_iterator(size), - GenerateRounds(ordered, log2)); - } - } else { - generateRoundsOld(ordered, true); - } - } - - if (!addBoundary) - return; - - // Now, handle boundary points - size_t last = points.size(); - Point* p1 = &basePoints.push(points[last-1]); - Point* p2 = &basePoints.push(points[last-2]); - Point* p3 = &basePoints.push(points[last-3]); - - rounds[maxRounds].push(p1); - rounds[maxRounds].push(p2); - rounds[maxRounds].push(p3); - - addBoundaryNodes(p1, p2, p3); -} - -static void readInput(const std::string& filename, bool addBoundary) { - PointList points; - ReadPoints(points).from(filename); - - std::cout << "configuration: " << points.size() << " points\n"; - - graph = new Graph(); - -#if 1 - Galois::preAlloc( - 32 * points.size() * sizeof(Element) * 1.5 // mesh is about 2x number of points (for random points) - / (Galois::Runtime::MM::pageSize) // in pages - ); -#else - Galois::preAlloc(1 * numThreads // some per-thread state - + 2 * points.size() * sizeof(Element) // mesh is about 2x number of points (for random points) - * 32 // include graph node size - / (Galois::Runtime::MM::pageSize) // in pages - ); -#endif - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T("generateRounds"); - T.start(); - generateRounds(points, addBoundary); - T.stop(); -} - -static void writeMesh(const std::string& filename) { - long numTriangles = 0; - long numSegments = 0; - for (Graph::iterator ii = graph->begin(), ei = graph->end(); ii != ei; ++ii) { - Element& e = graph->getData(*ii); - if (e.boundary()) { - numSegments++; - } else { - numTriangles++; - } - } - - long tid = 0; - long sid = 0; - std::string elementName(filename); - std::string polyName(filename); - - elementName.append(".ele"); - polyName.append(".poly"); - - std::ofstream eout(elementName.c_str()); - std::ofstream pout(polyName.c_str()); - // - eout << numTriangles << " 3 0\n"; - // - // ... - // - pout << "0 2 0 0\n"; - pout << numSegments << " 1\n"; - for (Graph::iterator ii = graph->begin(), ee = graph->end(); ii != ee; ++ii) { - const Element& e = graph->getData(*ii); - if (e.boundary()) { - // - pout << sid++ << " " << e.getPoint(0)->id() << " " << e.getPoint(1)->id() << " 1\n"; - } else { - // [in ccw order] - eout << tid++ << " " << e.getPoint(0)->id() << " "; - if (e.clockwise()) { - eout << e.getPoint(2)->id() << " " << e.getPoint(1)->id() << "\n"; - } else { - eout << e.getPoint(1)->id() << " " << e.getPoint(2)->id() << "\n"; - } - } - } - - eout.close(); - // - pout << "0\n"; - pout.close(); -} - -static void generateMesh() { - typedef Galois::WorkList::AltChunkedLIFO<32> Chunked; - - for (int i = maxRounds - 1; i >= 0; --i) { - Galois::StatTimer BT("buildtree"); - BT.start(); - Galois::InsertBag& tptrs = rounds[i+1]; - QuadTree tree(tptrs.begin(), tptrs.end()); - BT.stop(); - - Galois::StatTimer PT("ParallelTime"); - PT.start(); - Galois::InsertBag& pptrs = rounds[i]; - switch (detAlgo) { - case nondet: - Galois::for_each_local(pptrs, Process<>(&tree), Galois::wl()); break; - case detBase: - Galois::for_each_det(pptrs.begin(), pptrs.end(), Process<>(&tree)); break; - case detPrefix: - Galois::for_each_det(pptrs.begin(), pptrs.end(), Process(&tree), Process<>(&tree)); - break; - case detDisjoint: - Galois::for_each_det(pptrs.begin(), pptrs.end(), Process(&tree)); break; - default: GALOIS_DIE("Unknown algorithm: ", detAlgo); - } - PT.stop(); - } -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - bool writepoints = doWritePoints.size() > 0; - readInput(inputname, !writepoints); - if (writepoints) { - std::cout << "Writing " << doWritePoints << "\n"; - PointList points; - copyPointsFromRounds(points); - writePoints(doWritePoints, points); - delete graph; - delete [] rounds; - return 0; - } - - const char* name = 0; - switch (detAlgo) { - case nondet: name = "nondet"; break; - case detBase: name = "detBase"; break; - case detPrefix: name = "detPrefix"; break; - case detDisjoint: name = "detDisjoint"; break; - default: name = "unknown"; break; - } - Galois::Runtime::LL::gInfo("Algorithm ", name); - - Galois::StatTimer T; - T.start(); - generateMesh(); - T.stop(); - std::cout << "mesh size: " << graph->size() << "\n"; - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify) { - Verifier verifier; - if (!verifier.verify(graph)) { - GALOIS_DIE("Triangulation failed"); - } - std::cout << "Triangulation OK\n"; - } - - if (doWriteMesh.size()) { - std::string base = doWriteMesh; - std::cout << "Writing " << base << "\n"; - writeMesh(base.c_str()); - - PointList points; - // Reordering messes up connection between id and place in pointlist - ReadPoints(points).from(inputname); - writePoints(base.append(".node"), points); - } - - delete graph; - delete [] rounds; - - return 0; -} diff --git a/maxflow/galois/apps/delaunaytriangulation/Element.cpp b/maxflow/galois/apps/delaunaytriangulation/Element.cpp deleted file mode 100644 index 14293b3..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Element.cpp +++ /dev/null @@ -1,207 +0,0 @@ -/** An element (i.e., a triangle or a boundary line) -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#include "Element.h" -#include "Point.h" - -std::ostream& operator<<(std::ostream& out, const Element& e) { - return e.print(out); -} - -bool Element::inTriangle(const Tuple& p) const { - if (boundary()) - return false; - - const Tuple& p1 = points[0]->t(); - const Tuple& p2 = points[1]->t(); - const Tuple& p3 = points[2]->t(); - - if ((p1 == p) || (p2 == p) || (p3 == p)) { - return false; - } - - int count = 0; - double px = p.x(); - double py = p.y(); - double p1x = p1.x(); - double p1y = p1.y(); - double p2x = p2.x(); - double p2y = p2.y(); - double p3x = p3.x(); - double p3y = p3.y(); - - if (p2x < p1x) { - if ((p2x < px) && (p1x >= px)) { - if (((py - p2y) * (p1x - p2x)) < ((px - p2x) * (p1y - p2y))) { - count = 1; - } - } - } else { - if ((p1x < px) && (p2x >= px)) { - if (((py - p1y) * (p2x - p1x)) < ((px - p1x) * (p2y - p1y))) { - count = 1; - } - } - } - - if (p3x < p2x) { - if ((p3x < px) && (p2x >= px)) { - if (((py - p3y) * (p2x - p3x)) < ((px - p3x) * (p2y - p3y))) { - if (count == 1) { - return false; - } - count++; - } - } - } else { - if ((p2x < px) && (p3x >= px)) { - if (((py - p2y) * (p3x - p2x)) < ((px - p2x) * (p3y - p2y))) { - if (count == 1) { - return false; - } - count++; - } - } - } - - if (p1x < p3x) { - if ((p1x < px) && (p3x >= px)) { - if (((py - p1y) * (p3x - p1x)) < ((px - p1x) * (p3y - p1y))) { - if (count == 1) { - return false; - } - count++; - } - } - } else { - if ((p3x < px) && (p1x >= px)) { - if (((py - p3y) * (p1x - p3x)) < ((px - p3x) * (p1y - p3y))) { - if (count == 1) { - return false; - } - count++; - } - } - } - - return count == 1; -} - -bool Element::clockwise() const { - assert(!boundary()); - - double t1_x = points[0]->t().x(); - double t1_y = points[0]->t().y(); - - double t2_x = points[1]->t().x(); - double t2_y = points[1]->t().y(); - - double t3_x = points[2]->t().x(); - double t3_y = points[2]->t().y(); - - double counter_clockwise = (t2_x - t1_x) * (t3_y - t1_y) - (t3_x - t1_x) * (t2_y - t1_y); - - return counter_clockwise < 0; -} - -bool Element::inCircle(const Tuple& p) const { - if (boundary()) - return false; - - // This version computes the determinant of a matrix including the - // coordinates of each points + distance of these points to the origin - // in order to check if a point is inside a triangle or not - double t1_x = points[0]->t().x(); - double t1_y = points[0]->t().y(); - - double t2_x = points[1]->t().x(); - double t2_y = points[1]->t().y(); - - double t3_x = points[2]->t().x(); - double t3_y = points[2]->t().y(); - - double p_x = p.x(); - double p_y = p.y(); - - // Check if the points (t1,t2,t3) are sorted clockwise or - // counter-clockwise: - // -> counter_clockwise > 0 => counter clockwise - // -> counter_clockwise = 0 => degenerated triangle - // -> counter_clockwise < 0 => clockwise - double counter_clockwise = (t2_x - t1_x) * (t3_y - t1_y) - (t3_x - t1_x) * (t2_y - t1_y); - - // If the triangle is degenerate, then the triangle should be updated - if (counter_clockwise == 0.0) { - return true; - } - - // Compute the following determinant: - // | t1_x-p_x t1_y-p_y (t1_x-p_x)^2+(t1_y-p_y)^2 | - // | t2_x-p_x t2_y-p_y (t2_x-p_x)^2+(t2_y-p_y)^2 | - // | t3_x-p_x t3_y-p_y (t3_x-p_x)^2+(t3_y-p_y)^2 | - // - // If the determinant is >0 then the point (p_x,p_y) is inside the - // circumcircle of the triangle (t1,t2,t3). - - // Value of columns 1 and 2 of the matrix - double t1_p_x, t1_p_y, t2_p_x, t2_p_y, t3_p_x, t3_p_y; - // Determinant of minors extracted from columns 1 and 2 - // (det_t3_t1_m corresponds to the opposite) - double det_t1_t2, det_t2_t3, det_t3_t1_m; - // Values of the column 3 of the matrix - double t1_col3, t2_col3, t3_col3; - - t1_p_x = t1_x - p_x; - t1_p_y = t1_y - p_y; - t2_p_x = t2_x - p_x; - t2_p_y = t2_y - p_y; - t3_p_x = t3_x - p_x; - t3_p_y = t3_y - p_y; - - det_t1_t2 = t1_p_x * t2_p_y - t2_p_x * t1_p_y; - det_t2_t3 = t2_p_x * t3_p_y - t3_p_x * t2_p_y; - det_t3_t1_m = t3_p_x * t1_p_y - t1_p_x * t3_p_y; - t1_col3 = t1_p_x * t1_p_x + t1_p_y * t1_p_y; - t2_col3 = t2_p_x * t2_p_x + t2_p_y * t2_p_y; - t3_col3 = t3_p_x * t3_p_x + t3_p_y * t3_p_y; - - double det = t1_col3 * det_t2_t3 + t2_col3 * det_t3_t1_m + t3_col3 * det_t1_t2; - - // If the points are enumerated in clockwise, then negate the result - if (counter_clockwise < 0) { - return det < 0; - } - return det > 0; -} - -std::ostream& Element::print(std::ostream& out) const { - out << '['; - for (int i = 0; i < dim(); ++i) { - out << points[i]->id() << " "; - points[i]->print(out); - out << (i < (dim() - 1) ? ", " : ""); - } - out << ']'; - return out; -} - diff --git a/maxflow/galois/apps/delaunaytriangulation/Element.h b/maxflow/galois/apps/delaunaytriangulation/Element.h deleted file mode 100644 index 12148d7..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Element.h +++ /dev/null @@ -1,75 +0,0 @@ -/** An element (i.e., a triangle or a boundary line) -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#ifndef ELEMENT_H -#define ELEMENT_H - -#include "Tuple.h" - -#include -#include - -class Point; - -class Element { - Point* points[3]; - -public: - Element(const Element& e) { - points[0] = e.points[0]; - points[1] = e.points[1]; - points[2] = e.points[2]; - } - - Element(Point* a, Point* b, Point* c) { - points[0] = a; - points[1] = b; - points[2] = c; - } - - Element(Point* a, Point* b) { - points[0] = a; - points[1] = b; - points[2] = NULL; - } - - Point* getPoint(int i) { return points[i]; } - const Point* getPoint(int i) const { return points[i]; } - - bool boundary() const { return points[2] == NULL; } - int dim() const { return boundary() ? 2 : 3; } - - bool clockwise() const; - - //! determine if a tuple is inside the triangle - bool inTriangle(const Tuple& p) const; - - //! determine if the circumcircle of the triangle contains the tuple - bool inCircle(const Tuple& p) const; - - std::ostream& print(std::ostream& out) const; -}; - -std::ostream& operator<<(std::ostream& out, const Element& e); - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/Graph.h b/maxflow/galois/apps/delaunaytriangulation/Graph.h deleted file mode 100644 index 9b2e2c4..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Graph.h +++ /dev/null @@ -1,162 +0,0 @@ -/** -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Xin Sui - * @author Donald Nguyen - */ -#ifndef GRAPH_H -#define GRAPH_H - -#include "Element.h" - -#include "Galois/optional.h" -#include "Galois/Graph/Graph.h" - -#include -#include - -typedef Galois::Graph::FirstGraph Graph; -typedef Graph::GraphNode GNode; - -//! Factor out common graph traversals -template > -struct Searcher: private boost::noncopyable { - typedef Alloc allocator_type; - typedef typename Alloc::template rebind::other GNodeVectorAlloc; - typedef std::vector GNodeVector; - - struct Marker { - GNodeVector seen; - Marker(Graph&, const Alloc& a): seen(a) { } - void mark(GNode n) { - seen.push_back(n); - } - bool hasMark(GNode n) { - return std::find(seen.begin(), seen.end(), n) != seen.end(); - } - }; - - Graph& graph; - GNodeVector matches, inside; - const allocator_type& alloc; - - Searcher(Graph& g, const Alloc& a = allocator_type()): - graph(g), matches(a), inside(a), alloc(a) { } - - struct DetLess: public std::binary_function { - Graph& g; - DetLess(Graph& x): g(x) { } - bool operator()(GNode a, GNode b) const { - Element& e1 = g.getData(a, Galois::MethodFlag::NONE); - Element& e2 = g.getData(b, Galois::MethodFlag::NONE); - - for (int i = 0; i < 3; ++i) { - uintptr_t v1 = (i < 2 || !e1.boundary()) ? reinterpret_cast(e1.getPoint(i)) : 0; - uintptr_t v2 = (i < 2 || !e2.boundary()) ? reinterpret_cast(e2.getPoint(i)) : 0; - if (v1 < v2) - return true; - else if (v1 > v2) - return false; - } - return false; - } - }; - - void removeDupes(GNodeVector& v) { - std::sort(v.begin(), v.end(), DetLess(graph)); - typename GNodeVector::iterator end = std::unique(v.begin(), v.end()); - v.resize(end - v.begin()); - } - - template - void find_(const GNode& start, const Pred& pred, bool all) { - typedef Galois::optional SomeGNode; - typedef typename Alloc::template rebind>::other WorklistAlloc; - typedef std::deque, WorklistAlloc> Worklist; - - Worklist wl(alloc); - wl.push_back(std::make_pair(start, SomeGNode())); - - Marker marker(graph, alloc); - while (!wl.empty()) { - GNode cur = wl.front().first; - SomeGNode prev = wl.front().second; - - wl.pop_front(); - - if (!graph.containsNode(cur, Galois::MethodFlag::CHECK_CONFLICT)) - continue; - - if (marker.hasMark(cur)) - continue; - - // NB(ddn): Technically this makes DelaunayTriangulation.cpp::Process not cautious - if (!all) - marker.mark(cur); - - bool matched = false; - if (pred(cur)) { - matched = true; - matches.push_back(cur); - if (all) { - marker.mark(cur); - } - else - break; // Found it - } else { - if (all && prev) - inside.push_back(*prev); - } - - // Search neighbors (a) when matched and looking for all or (b) when no match and looking - // for first - if (matched == all) { - for (Graph::edge_iterator ii = graph.edge_begin(cur, Galois::MethodFlag::CHECK_CONFLICT), - ee = graph.edge_end(cur, Galois::MethodFlag::CHECK_CONFLICT); - ii != ee; ++ii) { - GNode dst = graph.getEdgeDst(ii); - wl.push_back(std::make_pair(dst, SomeGNode(cur))); - } - } - } - - if (all) { - removeDupes(matches); - removeDupes(inside); - } - } - - //! Find the first occurance of element matching pred - template - void findFirst(const GNode& start, const Pred& p) { - find_(start, p, false); - } - - //! Find all the elements matching pred (assuming monotonic predicate) - template - void findAll(const GNode& start, const Pred& p) { - find_(start, p, true); - return; - } -}; - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/Point.h b/maxflow/galois/apps/delaunaytriangulation/Point.h deleted file mode 100644 index ed88d0f..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Point.h +++ /dev/null @@ -1,81 +0,0 @@ -/** A coordinate and possibly a link to a containing triangle -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Donald Nguyen - */ -#ifndef POINT_H -#define POINT_H - -#include "Tuple.h" -#include "Graph.h" - -#include "Galois/CheckedObject.h" - -#include -#include - -class Point: public Galois::GChecked { - Tuple m_t; - GNode m_n; - long m_id; - -public: - Point(double x, double y, long id): m_t(x,y), m_n(NULL), m_id(id) {} - - const Tuple& t() const { return m_t; } - long id() const { return m_id; } - - Tuple& t() { return m_t; } - long& id() { return m_id; } - - void addElement(const GNode& n) { - m_n = n; - } - - void removeElement(const GNode& n) { - if (m_n == n) - m_n = NULL; - } - - bool inMesh() const { - return m_n != NULL; - } - - GNode someElement() const { - return m_n; - } - - void print(std::ostream& os) const { - os << "(id: " << m_id << " t: "; - m_t.print(os); - if (m_n != NULL) - os << " SOME)"; - else - os << " NULL)"; - } -}; - -static inline std::ostream& operator<<(std::ostream& os, const Point& rhs) { - rhs.print(os); - return os; -} - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/QuadTree.h b/maxflow/galois/apps/delaunaytriangulation/QuadTree.h deleted file mode 100644 index a8b7304..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/QuadTree.h +++ /dev/null @@ -1,625 +0,0 @@ -/** A quad-tree -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#ifndef QUADTREE_H -#define QUADTREE_H - -#include "Point.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include -#include - -#include - -inline int getIndex(const Tuple& a, const Tuple& b) { - int index = 0; - for (int i = 0; i < 2; ++i) { - if (a[i] < b[i]) { - index += 1 << i; - } - } - return index; -} - -inline void makeNewCenter(int index, const Tuple& center, double radius, Tuple& newCenter) { - newCenter = center; - for (int i = 0; i < 2; ++i) { - newCenter[i] += (index & (1 << i)) > 0 ? radius : -radius; - } -} - - -static const int maxLeafSize = 16; - -/** - * Finds points nearby a given point. - */ -class PQuadTree { - struct FindResult { - Point* p; - double best; - }; - - struct DerefPointer: public std::unary_function { - Point operator()(Point* p) const { return *p; } - }; - - struct Node { - typedef boost::array PointsTy; - Node* child[4]; - PointsTy* points; - int size; - - //! Make internal node - explicit Node() { - memset(child, 0, sizeof(*child) * 4); - points = NULL; - } - - //! Make leaf node - Node(Point* p, PointsTy* ps) { - memset(child, 0, sizeof(*child) * 4); - points = ps; - points->at(0) = p; - size = 1; - } - - bool isLeaf() const { - return points != NULL; - } - }; - - void deleteNode(Node* root) { - if (root->isLeaf()) { - pointsAlloc.destroy(root->points); - pointsAlloc.deallocate(root->points, 1); - } else { - for (int i = 0; i < 4; ++i) { - if (root->child[i]) - deleteNode(root->child[i]); - } - } - nodeAlloc.destroy(root); - nodeAlloc.deallocate(root, 1); - } - - Node* newNode() { - Node* n = nodeAlloc.allocate(1); - nodeAlloc.construct(n, Node()); - return n; - } - - Node* newNode(Point *p) { - Node* n = nodeAlloc.allocate(1); - Node::PointsTy* ps = pointsAlloc.allocate(1); - pointsAlloc.construct(ps, Node::PointsTy()); - nodeAlloc.construct(n, Node(p, ps)); - return n; - } - - - //! Provide appropriate initial values for reduction - template - struct MTuple: public Tuple { - MTuple(): Tuple(isMax ? std::numeric_limits::min() : std::numeric_limits::max()) { } - MTuple(const Tuple& t): Tuple(t) { } - }; - - template - struct MTupleReducer { - void operator()(MTuple& lhs, const MTuple& rhs) const { - for (int i = 0; i < 2; ++i) - lhs[i] = isMax ? std::max(lhs[i], rhs[i]) : std::min(lhs[i], rhs[i]); - } - }; - - struct MinBox: public Galois::GReducible, MTupleReducer > { - MinBox() { } - }; - - struct MaxBox: public Galois::GReducible, MTupleReducer > { - MaxBox() { } - }; - - struct ComputeBox { - MinBox& least; - MaxBox& most; - ComputeBox(MinBox& l, MaxBox& m): least(l), most(m) { } - void operator()(const Point* p) { - least.update(p->t()); - most.update(p->t()); - } - }; - - template - struct WorkItem { - IterTy begin; - IterTy end; - Tuple center; - double radius; - Node* root; - PQuadTree* self; - - WorkItem(PQuadTree* s, IterTy b, IterTy e, Node* n, Tuple c, double r): - begin(b), end(e), center(c), radius(r), root(n), self(s) { } - - void operator()() { - for (; begin != end; ++begin) { - self->add(root, *begin, center, radius); - } - } - }; - - template - struct PAdd { - void operator()(WorkItem& w) { - w(); - } - void operator()(WorkItem& w, Galois::UserContext >&) { - w(); - } - }; - - struct Split: public std::unary_function { - int index; - TupleDataTy pivot; - Split(int i, TupleDataTy p): index(i), pivot(p) { } - bool operator()(Point* p) { - return p->t()[index] < pivot; - } - }; - - Tuple m_center; - double m_radius; - Node* m_root; - - Galois::Runtime::MM::FSBGaloisAllocator nodeAlloc; - Galois::Runtime::MM::FSBGaloisAllocator pointsAlloc; - - template - void init(IterTy begin, IterTy end) { - MinBox least; - MaxBox most; - Galois::do_all(begin, end, ComputeBox(least, most)); - //std::for_each(begin, end, ComputeBox(least, most)); - - MTuple mmost = most.reduce(); - MTuple lleast = least.reduce(); - - m_radius = std::max(mmost.x() - lleast.x(), mmost.y() - lleast.y()) / 2.0; - - m_center = lleast; - m_center.x() += m_radius; - m_center.y() += m_radius; - } - - template - void divideWork(IterTy begin, IterTy end, Node* root, Tuple center, double radius, OutIterTy& out, int depth) { - if (depth == 0 || std::distance(begin, end) <= 16) { - *out++ = WorkItem(this, begin, end, root, center, radius); - return; - } - - IterTy its[5]; - its[0] = begin; - its[4] = end; - - its[2] = std::partition(its[0], its[4], Split(1, center[1])); - its[1] = std::partition(its[0], its[2], Split(0, center[0])); - its[3] = std::partition(its[2], its[4], Split(0, center[0])); - - radius *= 0.5; - --depth; - - for (int i = 0; i < 4; ++i) { - Tuple newC; - root->child[i] = newNode(); - makeNewCenter(i, center, radius, newC); - divideWork(its[i], its[i+1], root->child[i], newC, radius, out, depth); - } - } - - bool couldBeCloser(const Point* p, const Tuple& center, double radius, FindResult& result) { - if (result.p == NULL) - return true; - - const Tuple& t = p->t(); - double d = 0; - for (int i = 0; i < t.dim(); ++i) { - double min = center[i] - radius - t[i]; - double max = center[i] + radius - t[i]; - d += std::min(min*min, max*max); - } - return d < result.best; - } - - bool find(Node* root, const Point* p, const Tuple& center, double radius, FindResult& result) { - if (root->isLeaf()) { - bool retval = false; - const Tuple& t0 = p->t(); - for (int i = 0; i < root->size; ++i) { - const Point* o = root->points->at(i); - if (!o->inMesh()) - continue; - - double d = 0; - const Tuple& t1 = o->t(); - for (int j = 0; j < t0.dim(); ++j) { - double v = t0[j] - t1[j]; - d += v * v; - } - if (result.p == NULL || d < result.best) { - result.p = root->points->at(i); - result.best = d; - retval = true; - } - } - return retval; - } - - // Search, starting at closest quadrant to p - radius *= 0.5; - int start = getIndex(center, p->t()); - for (int i = 0; i < 4; ++i) { - int index = (start + i) % 4; - Node* kid = root->child[index]; - if (kid != NULL) { - Tuple newCenter; - makeNewCenter(index, center, radius, newCenter); - if (couldBeCloser(p, newCenter, radius, result)) { - if (false) { - // exhaustive - find(kid, p, newCenter, radius, result); - } else { - // return only first - if (find(kid, p, newCenter, radius, result)) - return true; - } - } - } - } - return false; - } - - void makeInternal(Node* root, const Tuple& center, double radius) { - assert(root->isLeaf()); - - Node::PointsTy* points = root->points; - root->points = NULL; - - for (Node::PointsTy::iterator ii = points->begin(), ei = points->begin() + root->size; - ii != ei; ++ii) { - add(root, *ii, center, radius); - } - pointsAlloc.destroy(points); - pointsAlloc.deallocate(points, 1); - } - - void add(Node* root, Point* p, const Tuple& center, double radius) { - if (root->isLeaf()) { - if (root->size < maxLeafSize) { - root->points->at(root->size++) = p; - } else { - makeInternal(root, center, radius); - add(root, p, center, radius); - } - return; - } - - int index = getIndex(center, p->t()); - Node*& kid = root->child[index]; - if (kid == NULL) { - kid = newNode(p); - } else { - radius *= 0.5; - assert(radius != 0.0); - Tuple newCenter; - makeNewCenter(index, center, radius, newCenter); - add(kid, p, newCenter, radius); - } - } - - template - void output(Node* root, OutputTy out) { - if (root->isLeaf()) { - std::copy( - boost::make_transform_iterator(root->points->begin(), DerefPointer()), - boost::make_transform_iterator(root->points->begin() + root->size, DerefPointer()), - out); - } else { - for (int i = 0; i < 4; ++i) { - Node* kid = root->child[i]; - if (kid != NULL) - output(kid, out); - } - } - } - -public: - template - PQuadTree(IterTy begin, IterTy end) { - m_root = newNode(); - - init(begin, end); - - typedef std::vector PointsBufTy; - typedef WorkItem WIT; - typedef std::vector WorkTy; - typedef Galois::WorkList::dChunkedLIFO<1> WL; - PointsBufTy points; - std::copy(begin, end, std::back_inserter(points)); - - WorkTy work; - std::back_insert_iterator it(work); - divideWork(points.begin(), points.end(), m_root, m_center, m_radius, it, 4); - Galois::for_each(work.begin(), work.end(), PAdd(), Galois::wl()); - } - - ~PQuadTree() { - deleteNode(m_root); - } - - template - void output(OutputTy out) { - if (m_root != NULL) { - output(m_root, out); - } - } - - //! Find point nearby to p - bool find(const Point* p, Point*& result) { - FindResult r; - r.p = NULL; - if (m_root) { - find(m_root, p, m_center, m_radius, r); - if (r.p != NULL) { - result = r.p; - return true; - } - } - return false; - } -}; - -/** - * Finds points nearby a given point. - */ -class SQuadTree { - struct FindResult { - Point* p; - double best; - }; - - struct DerefPointer: public std::unary_function { - Point operator()(Point* p) const { return *p; } - }; - - struct Node { - Node* child[4]; - Point** points; - int size; - - bool isLeaf() const { - return points != NULL; - } - - void makeInternal(const Tuple& center, double radius) { - memset(child, 0, sizeof(*child) * 4); - Point** begin = points; - points = NULL; - - for (Point **p = begin, **end = begin + size; p != end; ++p) { - add(*p, center, radius); - } - delete [] begin; - } - - void add(Point* p, const Tuple& center, double radius) { - if (isLeaf()) { - if (size < maxLeafSize) { - points[size] = p; - ++size; - } else { - makeInternal(center, radius); - add(p, center, radius); - } - return; - } - - int index = getIndex(center, p->t()); - Node*& kid = child[index]; - if (kid == NULL) { - kid = new Node(); - kid->points = new Point*[maxLeafSize]; - kid->points[0] = p; - kid->size = 1; - } else { - radius *= 0.5; - assert(radius != 0.0); - Tuple newCenter; - makeNewCenter(index, center, radius, newCenter); - kid->add(p, newCenter, radius); - } - } - - bool couldBeCloser(const Point* p, const Tuple& center, double radius, FindResult& result) { - if (result.p == NULL) - return true; - - const Tuple& t = p->t(); - double d = 0; - for (int i = 0; i < t.dim(); ++i) { - double min = center[i] - radius - t[i]; - double max = center[i] + radius - t[i]; - d += std::min(min*min, max*max); - } - return d < result.best; - } - - void find(const Point* p, const Tuple& center, double radius, FindResult& result) { - if (isLeaf()) { - const Tuple& t0 = p->t(); - for (int i = 0; i < size; ++i) { - double d = 0; - const Point* o = points[i]; - if (!o->inMesh()) - continue; - const Tuple& t1 = o->t(); - for (int j = 0; j < t0.dim(); ++j) { - double v = t0[j] - t1[j]; - d += v * v; - } - if (result.p == NULL || d < result.best) { - result.p = points[i]; - result.best = d; - } - } - return; - } - - // Search, starting at closest quadrant to p - radius *= 0.5; - int start = getIndex(center, p->t()); - for (int i = 0; i < 4; ++i) { - int index = (start + i) % 4; - Node* kid = child[index]; - if (kid != NULL) { - Tuple newCenter; - makeNewCenter(index, center, radius, newCenter); - if (kid->couldBeCloser(p, newCenter, radius, result)) - kid->find(p, newCenter, radius, result); - } - } - } - - template - void output(OutputTy out) { - if (isLeaf()) { - std::copy( - boost::make_transform_iterator(points, DerefPointer()), - boost::make_transform_iterator(points + size, DerefPointer()), - out); - } else { - for (int i = 0; i < 4; ++i) { - Node* kid = child[i]; - if (kid != NULL) - kid->output(out); - } - } - } - }; - - void deleteNode(Node*& n) { - if (n == NULL) - return; - if (n->isLeaf()) { - delete [] n->points; - n->points = NULL; - } else { - for (int i = 0; i < 4; ++i) { - deleteNode(n->child[i]); - } - } - - delete n; - n = NULL; - } - - template - void computeBox(Begin begin, End end, Tuple& least, Tuple& most) { - least.x() = least.y() = std::numeric_limits::max(); - most.x() = most.y() = std::numeric_limits::min(); - - for (; begin != end; ++begin) { - const Tuple& p = (*begin)->t(); - for (int i = 0; i < 2; ++i) { - if (p[i] < least[i]) - least[i] = p[i]; - - if (p[i] > most[i]) - most[i] = p[i]; - } - } - } - - template - void init(Begin begin, End end) { - Tuple least, most; - computeBox(begin, end, least, most); - - radius = std::max(most.x() - least.x(), most.y() - least.y()) / 2.0; - center = least; - center.x() += radius; - center.y() += radius; - } - - void add(Point* p) { - if (root == NULL) { - root = new Node(); - root->points = NULL; - memset(root->child, 0, sizeof(*root->child) * 4); - } - root->add(p, center, radius); - } - - Tuple center; - double radius; - Node* root; - -public: - template - SQuadTree(Begin begin, End end): root(NULL) { - init(begin, end); - for (; begin != end; ++begin) - add(*begin); - } - - ~SQuadTree() { - deleteNode(root); - } - - //! Find point nearby to p - bool find(const Point* p, Point*& result) { - FindResult r; - r.p = NULL; - if (root) { - root->find(p, center, radius, r); - if (r.p != NULL) { - result = r.p; - return true; - } - } - return false; - } - - template - void output(OutputTy out) { - if (root != NULL) { - root->output(out); - } - } -}; - -typedef PQuadTree QuadTree; - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/Tuple.h b/maxflow/galois/apps/delaunaytriangulation/Tuple.h deleted file mode 100644 index d50f13a..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Tuple.h +++ /dev/null @@ -1,165 +0,0 @@ -/** A tuple -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - */ -#ifndef TUPLE_H -#define TUPLE_H - -#include -#include - -typedef double TupleDataTy; - -class Tuple { - TupleDataTy data[2]; - -public: - Tuple() { data[0] = 0; data[1] = 0; } - Tuple(TupleDataTy xy) { data[0] = xy; data[1] = xy; } - Tuple(TupleDataTy x, TupleDataTy y) { data[0] = x; data[1] = y; } - int dim() const { return 2; } - TupleDataTy x() const { return data[0]; } - TupleDataTy y() const { return data[1]; } - - TupleDataTy& x() { return data[0]; } - TupleDataTy& y() { return data[1]; } - - bool operator==(const Tuple& rhs) const { - for (int i = 0; i < 2; ++i) { - if (data[i] != rhs.data[i]) return false; - } - return true; - } - - bool operator!=(const Tuple& rhs) const { - return !(*this == rhs); - } - - TupleDataTy operator[](int index) const { - return data[index]; - } - - TupleDataTy& operator[](int index) { - return data[index]; - } - - Tuple operator+(const Tuple& rhs) const { - return Tuple(data[0]+rhs.data[0], data[1]+rhs.data[1]); - } - - Tuple operator-(const Tuple& rhs) const { - return Tuple(data[0]-rhs.data[0], data[1]-rhs.data[1]); - } - - //!scalar product - Tuple operator*(TupleDataTy d) const { - return Tuple(data[0]*d, data[1]*d); - } - - //! dot product - TupleDataTy dot(const Tuple& rhs) const { - return data[0]*rhs.data[0] + data[1]*rhs.data[1]; - } - - TupleDataTy cross(const Tuple& rhs) const { - return data[0]*rhs.data[1] - data[1]*rhs.data[0]; - } - - void print(std::ostream& os) const { - os << "(" << data[0] << ", " << data[1] << ")"; - } -}; - -static inline std::ostream& operator<<(std::ostream& os, const Tuple& rhs) { - rhs.print(os); - return os; -} - -class Tuple3 { - TupleDataTy data[3]; - -public: - Tuple3() { data[0] = 0; data[1] = 0; data[2] = 0;} - Tuple3(TupleDataTy xyz) { data[0] = xyz; data[1] = xyz; data[1] = xyz;} - Tuple3(TupleDataTy x, TupleDataTy y, TupleDataTy z) { data[0] = x; data[1] = y; data[2] = z;} - int dim() const { return 3; } - TupleDataTy x() const { return data[0]; } - TupleDataTy y() const { return data[1]; } - TupleDataTy z() const { return data[2]; } - - TupleDataTy& x() { return data[0]; } - TupleDataTy& y() { return data[1]; } - TupleDataTy& z() { return data[2]; } - - bool operator==(const Tuple3& rhs) const { - for (int i = 0; i < 3; ++i) { - if (data[i] != rhs.data[i]) return false; - } - return true; - } - - bool operator!=(const Tuple3& rhs) const { - return !(*this == rhs); - } - - TupleDataTy operator[](int index) const { - return data[index]; - } - - TupleDataTy& operator[](int index) { - return data[index]; - } - - Tuple3 operator+(const Tuple3& rhs) const { - return Tuple3(data[0]+rhs.data[0], data[1]+rhs.data[1], data[2]+rhs.data[2]); - } - - Tuple3 operator-(const Tuple3& rhs) const { - return Tuple3(data[0]-rhs.data[0], data[1]-rhs.data[1], data[2]+rhs.data[2]); - } - - //!scalar product - Tuple3 operator*(TupleDataTy d) const { - return Tuple3(data[0]*d, data[1]*d, data[2]*d); - } - - //! dot product - TupleDataTy dot(const Tuple3& rhs) const { - return data[0]*rhs.data[0] + data[1]*rhs.data[1] + data[2]*rhs.data[2]; - } - - Tuple3 cross(const Tuple3& rhs) const { - return Tuple3(data[1]*rhs.data[2] - data[2]*rhs.data[1], - data[2]*rhs.data[0] - data[0]*rhs.data[2], - data[0]*rhs.data[1] - data[1]*rhs.data[0]); - } - - void print(std::ostream& os) const { - os << "(" << data[0] << ", " << data[1] << ", " << data[2] << ")"; - } -}; - -static inline std::ostream& operator<<(std::ostream& os, const Tuple3& rhs) { - rhs.print(os); - return os; -} - -#endif diff --git a/maxflow/galois/apps/delaunaytriangulation/Verifier.h b/maxflow/galois/apps/delaunaytriangulation/Verifier.h deleted file mode 100644 index 7b484e7..0000000 --- a/maxflow/galois/apps/delaunaytriangulation/Verifier.h +++ /dev/null @@ -1,163 +0,0 @@ -/** Delaunay triangulation verifier -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * @author Xin Sui - */ - -#ifndef VERIFIER_H -#define VERIFIER_H - -#include "Graph.h" -#include "Point.h" - -#include "Galois/Galois.h" -#include "Galois/ParallelSTL/ParallelSTL.h" - -#include -#include -#include - -class Verifier { - struct inconsistent: public std::unary_function { - Graph* graph; - inconsistent(Graph* g): graph(g) { } - - bool operator()(const GNode& node) const { - Element& e = graph->getData(node); - - size_t dist = std::distance(graph->edge_begin(node), graph->edge_end(node)); - if (e.dim() == 2) { - if (dist != 1) { - std::cerr << "Error: Segment " << e << " has " << dist << " relation(s)\n"; - return true; - } - } else if (e.dim() == 3) { - if (dist != 3) { - std::cerr << "Error: Triangle " << e << " has " << dist << " relation(s)\n"; - return true; - } - } else { - std::cerr << "Error: Element with " << e.dim() << " edges\n"; - return true; - } - return false; - } - }; - - struct not_delaunay: public std::unary_function { - Graph* graph; - not_delaunay(Graph* g): graph(g) { } - - bool operator()(const GNode& node) { - Element& e1 = graph->getData(node); - - for (Graph::edge_iterator jj = graph->edge_begin(node), - ej = graph->edge_end(node); jj != ej; ++jj) { - const GNode& n = graph->getEdgeDst(jj); - Element& e2 = graph->getData(n); - if (e1.dim() == 3 && e2.dim() == 3) { - Tuple t2; - if (!getTupleT2OfRelatedEdge(e1, e2, t2)) { - std::cerr << "missing tuple\n"; - return true; - } - if (e1.inCircle(t2)) { - std::cerr << "Delaunay property violated: point " << t2 << " in element " << e1 << "\n"; - return true; - } - } - } - return false; - } - - bool getTupleT2OfRelatedEdge(const Element& e1, const Element& e2, Tuple& t) { - int e2_0 = -1; - int e2_1 = -1; - int phase = 0; - - for (int i = 0; i < e1.dim(); i++) { - for (int j = 0; j < e2.dim(); j++) { - if (e1.getPoint(i) != e2.getPoint(j)) - continue; - - if (phase == 0) { - e2_0 = j; - phase = 1; - break; - } - - e2_1 = j; - for (int k = 0; k < 3; k++) { - if (k != e2_0 && k != e2_1) { - t = e2.getPoint(k)->t(); - return true; - } - } - } - } - return false; - } - }; - - bool checkReachability(Graph* graph) { - std::stack remaining; - std::set found; - remaining.push(*(graph->begin())); - - while (!remaining.empty()) { - GNode node = remaining.top(); - remaining.pop(); - if (!found.count(node)) { - if (!graph->containsNode(node)) { - std::cerr << "Reachable node was removed from graph\n"; - } - found.insert(node); - int i = 0; - for (Graph::edge_iterator ii = graph->edge_begin(node), - ei = graph->edge_end(node); ii != ei; ++ii) { - GNode n = graph->getEdgeDst(ii); - assert(i < 3); - assert(graph->containsNode(n)); - assert(node != n); - ++i; - remaining.push(n); - } - } - } - - if (found.size() != graph->size()) { - std::cerr << "Error: Not all elements are reachable. "; - std::cerr << "Found: " << found.size() << " needed: " << graph->size() << ".\n"; - return false; - } - return true; - } - -public: - bool verify(Graph* g) { - return Galois::ParallelSTL::find_if(g->begin(), g->end(), inconsistent(g)) == g->end() - && Galois::ParallelSTL::find_if(g->begin(), g->end(), not_delaunay(g)) == g->end() - && checkReachability(g); - } -}; - -#endif diff --git a/maxflow/galois/apps/des/CMakeLists.txt b/maxflow/galois/apps/des/CMakeLists.txt deleted file mode 100644 index d9322b0..0000000 --- a/maxflow/galois/apps/des/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -file(GLOB Sources -./*.cpp -./common/*.cpp -./logic/*.cpp -) - -app(DESunorderedSerial unordered/DESunorderedSerial.cpp ${Sources}) -app(DESunordered unordered/DESunordered.cpp ${Sources}) - -app(DESorderedSerial ordered/DESorderedSerial.cpp ${Sources}) -app(DESordered ordered/DESordered.cpp ${Sources}) -app(DESorderedHand ordered/DESorderedHand.cpp ${Sources}) -app(DESorderedHandNB ordered/DESorderedHandNB.cpp ${Sources}) -app(DESorderedHandSet ordered/DESorderedHandSet.cpp ${Sources}) - - -include_directories(.) -include_directories(./common) -include_directories(./logic) -include_directories(./ordered) -include_directories(./unordered) diff --git a/maxflow/galois/apps/des/common/BaseSimObject.h b/maxflow/galois/apps/des/common/BaseSimObject.h deleted file mode 100644 index e3b3d4d..0000000 --- a/maxflow/galois/apps/des/common/BaseSimObject.h +++ /dev/null @@ -1,180 +0,0 @@ -/** Base SimObject-*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#ifndef DES_BASE_SIM_OBJECT_H -#define DES_BASE_SIM_OBJECT_H - -#include "Galois/Graph/Graph.h" - -#include -#include - - -namespace des { - -template -class BaseSimObject { -protected: - - size_t id; - size_t eventIDcntr; - -public: - typedef Event_tp Event_ty; - - BaseSimObject (size_t id): id (id), eventIDcntr (0) {} - - virtual ~BaseSimObject () {} - - size_t getID () const { return id; } - - virtual std::string str () const { - std::ostringstream ss; - ss << "SimObject-" << id; - return ss.str (); - } - - - virtual BaseSimObject* clone () const = 0; - - Event_ty makeEvent ( - BaseSimObject* recvObj, - const typename Event_ty::Action_ty& action, - const typename Event_ty::Type& type, - const des::SimTime& sendTime, - des::SimTime delay=des::MIN_DELAY) { - - - if (delay < des::MIN_DELAY) { - delay = des::MIN_DELAY; - } - - des::SimTime recvTime = sendTime + delay; - - assert (recvTime > sendTime); - assert (recvTime < 2*des::INFINITY_SIM_TIME); - - return Event_ty ((eventIDcntr++), this, recvObj, action, type, sendTime, recvTime); - - } - - Event_ty makeZeroEvent () { - return Event_ty ((eventIDcntr++), this, this, typename Event_ty::Action_ty (), Event_ty::NULL_EVENT, 0, 0); - } - -protected: - - struct SendWrapper { - virtual void send (BaseSimObject* dstObj, const Event_ty& event) = 0; - virtual ~SendWrapper () {} - }; - - - struct BaseOutDegIter: public std::iterator { - typedef std::iterator Super_ty; - - BaseOutDegIter () {}; - - virtual typename Super_ty::reference operator * () = 0; - - virtual typename Super_ty::pointer operator -> () = 0; - - virtual BaseOutDegIter& operator ++ () = 0; - - // since BaseOutDegIter is virtual, can't return copy of BaseOutDegIter here - virtual void operator ++ (int) = 0; - - virtual bool is_equal (const BaseOutDegIter& that) const = 0; - - friend bool operator == (const BaseOutDegIter& left, const BaseOutDegIter& right) { - return left.is_equal (right); - } - - friend bool operator != (const BaseOutDegIter& left, const BaseOutDegIter& right) { - return !left.is_equal (right); - } - }; - - - template - struct OutDegIterator: public BaseOutDegIter { - typedef BaseOutDegIter Base; - typedef typename G::edge_iterator GI; - - G& graph; - GI edgeIter; - - OutDegIterator (G& _graph, GI _edgeIter): BaseOutDegIter (), graph (_graph), edgeIter (_edgeIter) {} - - virtual typename Base::reference operator * () { - return graph.getData (graph.getEdgeDst (edgeIter), Galois::MethodFlag::NONE); - } - - virtual typename Base::pointer operator-> () { - return &(operator * ()); - } - - virtual OutDegIterator& operator ++ () { - ++edgeIter; - return *this; - } - - virtual void operator ++ (int) { - operator ++ (); - } - - virtual bool is_equal (const BaseOutDegIter& t) const { - - assert (dynamic_cast*> (&t) != NULL); - const OutDegIterator& that = static_cast&> (t); - - assert (&that != NULL); - assert (&(this->graph) == &(that.graph)); - - return (this->edgeIter == that.edgeIter); - } - - }; - - template - OutDegIterator make_begin (G& graph, typename G::GraphNode& node) const { - assert (graph.getData (node, Galois::MethodFlag::NONE) == this); - return OutDegIterator (graph, graph.edge_begin (node, Galois::MethodFlag::NONE)); - } - - template - OutDegIterator make_end (G& graph, typename G::GraphNode& node) const { - assert (graph.getData (node, Galois::MethodFlag::NONE) == this); - return OutDegIterator (graph, graph.edge_end (node, Galois::MethodFlag::NONE)); - } - - virtual void execEventIntern (const Event_ty& event, SendWrapper& sendWrap, BaseOutDegIter& b, BaseOutDegIter& e) = 0; - - virtual size_t getInputIndex (const Event_ty& event) const = 0; -}; - -} // end namespace - - -#endif // DES_BASE_SIM_OBJECT_H diff --git a/maxflow/galois/apps/des/common/Event.h b/maxflow/galois/apps/des/common/Event.h deleted file mode 100644 index f492a5b..0000000 --- a/maxflow/galois/apps/des/common/Event.h +++ /dev/null @@ -1,290 +0,0 @@ -/** Event: is the basic structure of an event in the simulation -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_EVENT_H -#define DES_EVENT_H - -#include -#include - -#include "comDefs.h" -#include "BaseSimObject.h" - -/** - * The Class Event. - * - * @param the type representing the action to be performed on receipt of this event - */ -namespace des { - -template -class Event { - - // template - friend class des::BaseSimObject; - -public: - - enum Type { REGULAR_EVENT, NULL_EVENT }; - - typedef A Action_ty; - typedef BaseSimObject BaseSimObj_ty; - -private: - /** The id: not guaranteed to be unique. */ - size_t id; - - /** The send obj. */ - BaseSimObj_ty* sendObj; - - /** The recv obj. */ - BaseSimObj_ty* recvObj; - - /** The action to be performed on receipt of this event. */ - A action; - - /** type of event null or non-null */ - Type type; - - /** The send time. */ - SimTime sendTime; - - /** The recv time. */ - SimTime recvTime; - - - - -protected: - /** - * Instantiates a new base event. - * - * @param id not guaranteed to be unique - * @param sendObj the sending simulation obj - * @param recvObj the receiving simulatio obj - * @param action the action - * @param type the type - * @param sendTime the send time - * @param recvTime the recv time - */ - Event (size_t id, BaseSimObj_ty* sendObj, BaseSimObj_ty* recvObj, const A& action, const Type& type, const SimTime& sendTime, const SimTime& recvTime): - id (id), sendObj (sendObj), recvObj (recvObj), action (action), type (type), sendTime (sendTime), recvTime (recvTime) {} - -public: - friend bool operator == (const Event& left, const Event& right) { - return (left.id == right.id) - && (left.sendObj == right.sendObj) - && (left.recvObj == right.recvObj) - && (left.action == right.action) - && (left.type == right.type) - && (left.sendTime == right.sendTime) - && (left.recvTime == right.recvTime); - } - - friend bool operator != (const Event& left, const Event& right) { - return !(left == right); - } - - friend std::ostream& operator << (std::ostream& out, const Event& event) { - return (out << event.str ()); - } - - - /** - * Detailed string. - * - * @return the string - */ - std::string detailStr() const { - std::ostringstream ss; - ss << "Event-" << id << ", " << (type == NULL_EVENT ? "NULL_EVENT" : "REGULAR_EVENT") - << ", sendTime = " << sendTime << ", recvTime = " << recvTime - << ", sendObj = " << sendObj->getID () << ", recvObj = " << recvObj->getID () - << std::endl; - // << "action = " << action.str () << std::endl << std::endl; - return ss.str (); - } - - /** - * a simpler string representation for debugging - */ - std::string shortStr () const { - std::ostringstream ss; - ss << getRecvTime (); - return ss.str (); - } - - inline std::string str () const { - return detailStr (); - } - - /** - * Gets the send obj. - * - * @return the send obj - */ - BaseSimObj_ty* getSendObj() const { - return sendObj; - } - - - /** - * Gets the recv obj. - * - * @return the recv obj - */ - BaseSimObj_ty* getRecvObj() const { - return recvObj; - } - - - /** - * Gets the send time. - * - * @return the send time - */ - const SimTime& getSendTime() const { - return sendTime; - } - - /** - * Gets the recv time. - * - * @return the recv time - */ - const SimTime& getRecvTime() const { - return recvTime; - } - /** - * Gets the action. - * - * @return the action - */ - const A& getAction() const { - return action; - } - - - const Type& getType () const { - return type; - } - - - /** - * Gets the id. - * - * @return the id - */ - size_t getID() const { - return id; - } - -}; - -/** - * EventRecvTimeLocalTieBrkCmp is used to compare two events and - * break ties when the receiving time of two events is the same - * - * Ties between events with same recvTime need to be borken consistently, - * i.e. compare(m,n) and compare (n,m) are consistent with each other during - * the life of events 'm' and 'n'. - * - * There are at least two reasons for breaking ties between events of same time stamps: - * - * - Chandy-Misra's algorithm requires FIFO communication channels on edges between the - * stations. i.e. On a given input edge, two events with the same time stamp should not be - * reordered. Therefore ties must be resolved for events received on the same input i.e. when - * the sender is the same for two events. - * - * - PriorityQueue's are usually implemented using heaps and trees, which rebalance when an item is - * removed/added. This means if we add two items 'a' and 'b' with the same priority in the time - * order (a,b), then depending on what other itmes are added and removed, we may end up removing 'a' and - * 'b' in the order (b,a), i.e. PriorityQueue may reorder elements of same priority. Therefor, - * If we break ties between events on same input and not break ties between events - * on different inputs, this may result in reordering events on the same input. - * - */ - -template -struct EventRecvTimeLocalTieBrkCmp { - - /** - * - * Compares two events 'left' and 'right' based on getRecvTime(). - * if recvTime is same, then we compare the sender (using id), because two events from the same - * sender should not be reordered. - * If the sender is the same then we use the id on the event to - * break the tie, since, sender is guaranteed to assign a unique - * id to events - * - * - * @param left - * @param right - * @return -1 if left < right. 1 if left > right. Should not return 0 unless left and right are - * aliases - */ - - static int compare (const Event_tp& left, const Event_tp& right) { - int res; - if ( left.getRecvTime () < right.getRecvTime ()) { - res = -1; - - } else if (left.getRecvTime () > right.getRecvTime ()) { - res = 1; - - } else { - - res = left.getSendObj ()->getID () - right.getSendObj ()->getID (); - - if (res == 0) { // same sender - res = left.getID () - right.getID (); - } - - } - - return res; - - } - - bool operator () (const Event_tp& left, const Event_tp& right) const { - return compare (left, right) < 0; - } - - /** - * returns true if left > right - * Since std::priority_queue is a max heap, we use > semantics instead of < - * in order to get a min heap and thus process events in increasing order of recvTime. - */ - struct RevCmp { - bool operator () (const Event_tp& left, const Event_tp& right) const { - return EventRecvTimeLocalTieBrkCmp::compare (left, right) > 0; - } - }; -}; - - -} // namespace des - - - -#endif // DES_EVENT_H diff --git a/maxflow/galois/apps/des/common/Input.h b/maxflow/galois/apps/des/common/Input.h deleted file mode 100644 index 5f6b41c..0000000 --- a/maxflow/galois/apps/des/common/Input.h +++ /dev/null @@ -1,109 +0,0 @@ -/** Input represents an input port in the circuit -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_INPUT_H_ -#define DES_INPUT_H_ - -#include -#include - -#include "SimGate.h" -#include "BasicPort.h" - - -namespace des { - -template -class Input: public SimGate { - -protected: - typedef SimGate Base; - typedef typename Base::Event_ty Event_ty; - -public: - /** - * Instantiates a new Input. - */ - Input(size_t id, des::BasicPort& impl) - : Base (id, impl) {} - - - virtual Input* clone () const { - return new Input (*this); - } - - virtual des::BasicPort& getImpl () const { - assert (dynamic_cast (&Base::getImpl ()) != NULL); - des::BasicPort* ptr = static_cast (&Base::getImpl ()); - assert (ptr != NULL); - return *ptr; - } - - /** - * A string representation - */ - virtual std::string str () const { - std::ostringstream ss; - ss << "Input: " << Base::str (); - return ss.str (); - } - -protected: - /** - * Sends a copy of event at the input to all the outputs in the circuit - * @see OneInputGate::execEvent(). - * - * @param event the event - * @param sendWrap - * @param b begining of range - * @param e end of range - */ - virtual void execEventIntern (const Event_ty& event, - typename Base::SendWrapper& sendWrap, - typename Base::BaseOutDegIter& b, typename Base::BaseOutDegIter& e) { - - if (event.getType () == Event_ty::NULL_EVENT) { - Base::execEventIntern (event, sendWrap, b, e); - - } else { - - const des::LogicUpdate& lu = event.getAction (); - if (getImpl().getInputName () == lu.getNetName()) { - getImpl().setInputVal (lu.getNetVal()); - getImpl().setOutputVal (lu.getNetVal()); - - des::LogicUpdate drvFanout (getImpl ().getOutputName (), getImpl ().getOutputVal ()); - - Base::sendEventsToFanout (event, drvFanout, Event_ty::REGULAR_EVENT, sendWrap, b, e); - } else { - getImpl ().netNameMismatch (lu); - } - } - - } - - -}; - -} // end namespace des -#endif /* DES_INPUT_H_ */ diff --git a/maxflow/galois/apps/des/common/Output.h b/maxflow/galois/apps/des/common/Output.h deleted file mode 100644 index 5dc7f7c..0000000 --- a/maxflow/galois/apps/des/common/Output.h +++ /dev/null @@ -1,91 +0,0 @@ -/** Output is an output port in the circuit -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#ifndef DES_OUTPUT_H_ -#define DES_OUTPUT_H_ - - -#include -#include -#include - - -#include "Input.h" - -namespace des { -/** - * The Class Output. - */ -template -class Output: public Input { - - typedef Input Base; - typedef typename Base::Event_ty Event_ty; - -public: - /** - * Instantiates a new Output. - */ - Output(size_t id, des::BasicPort& impl) - : Input (id, impl) {} - - virtual Output* clone () const { - return new Output (*this); - } - - /** - * A string representation - */ - virtual std::string str () const { - std::ostringstream ss; - ss << "Output: " << Base::Base::str (); - return ss.str (); - } - -protected: - - /** - * Output just receives events and updates its state, does not send out any events - */ - virtual void execEventIntern (const Event_ty& event, - typename Base::SendWrapper& sendWrap, - typename Base::BaseOutDegIter& b, typename Base::BaseOutDegIter& e) { - - if (event.getType () != Event_ty::NULL_EVENT) { - - const des::LogicUpdate& lu = event.getAction (); - if (lu.getNetName () == Base::getImpl ().getInputName ()) { - Base::getImpl ().applyUpdate (lu); - } else { - Base::getImpl ().netNameMismatch (lu); - } - } - - } - -}; - -} // end namespace des - -#endif /* DES_OUTPUT_H_ */ diff --git a/maxflow/galois/apps/des/common/SimGate.h b/maxflow/galois/apps/des/common/SimGate.h deleted file mode 100644 index 61c442a..0000000 --- a/maxflow/galois/apps/des/common/SimGate.h +++ /dev/null @@ -1,139 +0,0 @@ -/** defines the interface for a SimGate and implements some common functionality -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 22, 2011 - * - * @author M. Amber Hassaan - */ - -#ifndef DES_SIMGATE_H -#define DES_SIMGATE_H - -#include -#include - -#include -#include - -#include "logicDefs.h" -#include "LogicUpdate.h" -#include "LogicGate.h" - -#include "Event.h" -#include "SimObject.h" - - -namespace des { -/** - * The Class SimGate represents an abstract logic gate. - */ -template -class SimGate: public S { - -protected: - typedef S Base; - des::LogicGate& impl; - -public: - typedef Event Event_ty; - - SimGate (size_t id, des::LogicGate& impl) - : Base (id, impl.getNumOutputs (), impl.getNumInputs ()), impl(impl) - {} - - SimGate (const SimGate& that): Base (that), impl (that.impl) {} - - virtual SimGate* clone () const { - return new SimGate (*this); - } - - virtual des::LogicGate& getImpl () const { - return impl; - } - - virtual size_t getInputIndex (const Event_ty& event) const { - assert (dynamic_cast (event.getRecvObj ()) == this); - - const std::string& netName = event.getAction ().getNetName (); - return impl.getInputIndex (netName); - } - - /** - * A string representation - */ - virtual std::string str () const { - std::ostringstream ss; - ss << Base::str () << ": " << impl.str (); - return ss.str (); - } - - -protected: - - /** - * Send events to fanout, which are the out going neighbors in the circuit graph. - */ - void sendEventsToFanout (const Event_ty& inputEvent, - const des::LogicUpdate& msg, const Event_ty::Type& type, - typename Base::SendWrapper& sendWrap, - typename Base::BaseOutDegIter& b, typename Base::BaseOutDegIter& e) { - - assert (dynamic_cast (this) != NULL); - SimGate* srcGate = static_cast (this); - - const des::SimTime& sendTime = inputEvent.getRecvTime(); - - for (; b != e; ++b) { - - assert (dynamic_cast (*b) != NULL); - SimGate* dstGate = static_cast (*b); - - Event_ty ne = srcGate->makeEvent (dstGate, msg, type, sendTime, impl.getDelay ()); - - sendWrap.send (dstGate, ne); - } - } - - - - virtual void execEventIntern (const Event_ty& event, - typename Base::SendWrapper& sendWrap, - typename Base::BaseOutDegIter& b, typename Base::BaseOutDegIter& e) { - - if (event.getType () != Event_ty::NULL_EVENT) { - // update the inputs of fanout gates - const des::LogicUpdate& lu = event.getAction (); - - impl.applyUpdate (lu); - - } // else output is unchanged in case of NULL_EVENT - - - des::LogicUpdate drvFanout (impl.getOutputName (), impl.getOutputVal ()); - - sendEventsToFanout (event, drvFanout, event.getType (), sendWrap, b, e); - - } - - -}; - -} // end namespace des -#endif // DES_SIMGATE_H diff --git a/maxflow/galois/apps/des/common/SimInit.h b/maxflow/galois/apps/des/common/SimInit.h deleted file mode 100644 index 76e80c9..0000000 --- a/maxflow/galois/apps/des/common/SimInit.h +++ /dev/null @@ -1,436 +0,0 @@ -/** SimInit initializes the circuit graph and creates initial set of events -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 23, 2011 - * - * @author M. Amber Hassaan - */ - - -#ifndef DES_SIMINIT_H -#define DES_SIMINIT_H - -#include -#include -#include -#include -#include - -#include - - -#include "comDefs.h" -#include "BaseSimObject.h" -#include "Event.h" -#include "SimInit.h" -#include "SimGate.h" -#include "Input.h" -#include "Output.h" - -#include "logicDefs.h" -#include "LogicUpdate.h" -#include "LogicFunctions.h" -#include "LogicGate.h" -#include "OneInputGate.h" -#include "TwoInputGate.h" -#include "BasicPort.h" -#include "NetlistParser.h" - -namespace des { - -template -class SimInit { - -public: - typedef NetlistParser::StimulusMapType StimulusMapType; - typedef SimGate_tp SimGate_ty; - typedef Input_tp Input_ty; - typedef Output_tp Output_ty; - typedef Event Event_ty; - typedef BaseSimObject BaseSimObj_ty; - - -protected: - - /** The netlist parser. */ - NetlistParser parser; - - /** The input simulation objs. */ - std::vector inputObjs; - - /** The output simulation objs. */ - std::vector outputObjs; - - /** The gates i.e. other than input and output ports. */ - std::vector gateObjs; - - /** The initial events. */ - std::vector initEvents; - - /** The num edges. */ - size_t numEdges; - - /** The num nodes. */ - size_t numNodes; - - /** Counter for BaseSimObj_ty's */ - size_t simObjCntr; - - - -protected: - /** - * Creates the input simulation objs. - */ - void createInputObjs() { - const std::vector& inputPorts = parser.getInputPorts (); - for (std::vector::const_iterator i = inputPorts.begin (), e = inputPorts.end (); i != e; ++i) { - inputObjs.push_back(new Input_ty((simObjCntr++), **i)); - } - } - - /** - * Creates the output simulation objs. - */ - void createOutputObjs() { - const std::vector& outputPorts = parser.getOutputPorts (); - for (std::vector::const_iterator i = outputPorts.begin (), e = outputPorts.end (); i != e; ++i) { - outputObjs.push_back(new Output_ty((simObjCntr++), **i)); - } - } - - - - /** - * Creates the gate objs. - */ - void createGateObjs() { - for (std::vector::const_iterator i = parser.getGates ().begin (), ei = parser.getGates ().end (); - i != ei; ++i) { - gateObjs.push_back (new SimGate_ty ((simObjCntr++), **i)); - } - } - - - /** - * Creates the initial events. - */ - void createInitEvents (bool createNullEvents=true) { - - const StimulusMapType& inputStimulusMap = parser.getInputStimulusMap(); - - for (std::vector::const_iterator i = inputObjs.begin (), ei = inputObjs.end (); i != ei; ++i ) { - - Input_ty* currInput = dynamic_cast (*i); - - assert ((currInput != NULL)); - - const BasicPort& impl = currInput->getImpl (); - - StimulusMapType::const_iterator it = inputStimulusMap.find (impl.getOutputName ()); - assert ((it != inputStimulusMap.end ())); - - const std::vector >& tvList = it->second; - - - for (std::vector< std::pair >::const_iterator j = tvList.begin () - , ej = tvList.end (); j != ej; ++j) { - - const std::pair& p = *j; - LogicUpdate lu(impl.getInputName(), p.second); - - Event_ty e = currInput->makeEvent(currInput, lu, Event_ty::REGULAR_EVENT, p.first); - - initEvents.push_back(e); - } - - if (createNullEvents) { - // final NULL_EVENT scheduled at INFINITY_SIM_TIME to signal that no more - // non-null events will be received on an input - - LogicUpdate lu (impl.getInputName (), LOGIC_ZERO); - Event_ty fe = currInput->makeEvent (currInput, lu, Event_ty::NULL_EVENT, INFINITY_SIM_TIME); - initEvents.push_back (fe); - } - - } - - } - /** - * helper function, which creates graph nodes corresponding to any simulation object - * and add the node to the graph. No connections made yet - */ - template - static void createGraphNodes (const std::vector& simObjs, G& graph, size_t& numNodes) { - typedef typename G::GraphNode GNode; - - for (typename std::vector::const_iterator i = simObjs.begin (), ei = simObjs.end (); i != ei; ++i) { - BaseSimObj_ty* so = *i; - GNode n = graph.createNode(so); - graph.addNode(n, Galois::MethodFlag::NONE); - ++numNodes; - } - } - /** - * Creates the connections i.e. edges in the graph - * An edge is created whenever a gate's output is connected to - * another gate's input. - */ - template - void createConnections (G& graph) { - typedef typename G::GraphNode GNode; - - // read in all nodes first, since iterator may not support concurrent modification - std::vector allNodes; - - for (typename G::iterator i = graph.begin (), ei = graph.end (); i != ei; ++i) { - allNodes.push_back (*i); - } - - for (typename std::vector::iterator i = allNodes.begin (), ei = allNodes.end (); i != ei; ++i) { - GNode& src = *i; - - SimGate_tp* sg = dynamic_cast (graph.getData (src, Galois::MethodFlag::NONE)); - assert (sg != NULL); - const LogicGate& srcGate = sg->getImpl (); - - const std::string& srcOutName = srcGate.getOutputName (); - - for (typename std::vector::iterator j = allNodes.begin (), ej = allNodes.end (); j != ej; ++j) { - GNode& dst = *j; - SimGate_tp* dg = dynamic_cast (graph.getData (dst, Galois::MethodFlag::NONE)); - assert (dg != NULL); - const LogicGate& dstGate = dg->getImpl (); - - if (dstGate.hasInputName (srcOutName)) { - assert (&srcGate != &dstGate); // disallowing self loops - if (graph.findEdge(src, dst) == graph.edge_end(src)) { - ++numEdges; - graph.addEdge (src, dst, Galois::MethodFlag::NONE); - } - - } - - } // end inner for - } // end outer for - - } - - - - /** - * destructor helper - */ - void destroy () { - destroyVec (inputObjs); - destroyVec (outputObjs); - destroyVec (gateObjs); - initEvents.clear (); - } - -public: - - /** - * Instantiates a new simulation initializer. - * - * @param netlistFile the netlist file - */ - SimInit(const std::string& netlistFile) - : parser(netlistFile), simObjCntr(0) - {} - - virtual ~SimInit () { - destroy (); - } - - /** - * Initialize. - * - * Processing steps - * create the input and output objects and add to netlistArrays - * create the gate objects - * connect the netlists by populating the fanout lists - * create a list of initial events - */ - template - void initialize (G& graph) { - typedef typename G::GraphNode GNode; - - destroy (); - - numNodes = 0; - numEdges = 0; - - // create input and output objects - createInputObjs (); - - createOutputObjs (); - - createGateObjs (); - - createInitEvents (); - - // create nodes for inputObjs - createGraphNodes (inputObjs, graph, numNodes); - - // create nodes for outputObjs - createGraphNodes (outputObjs, graph, numNodes); - - // create nodes for all gates - createGraphNodes (gateObjs, graph, numNodes); - - // create the connections based on net names - createConnections(graph); - - } - - - /** - * Verify the output by comparing the final values of the outputs of the circuit - * from simulation against the values precomputed in the netlist file - */ - void verify () const { - - // const std::vector& outputObjs = getOutputObjs(); - const std::map& outValues = getOutValues(); - - int exitStatus = 0; - - for (typename std::vector::const_iterator i = outputObjs.begin () - , ei = outputObjs.end (); i != ei; ++i) { - BaseSimObj_ty* so = *i; - - Output_ty* outObj = dynamic_cast< Output_ty* > (so); - assert (outObj != NULL); - - BasicPort& outp = outObj->getImpl (); - - const LogicVal& simulated = outp.getOutputVal(); - const LogicVal& expected = (outValues.find (outp.getInputName ()))->second; - - if (simulated != expected) { - exitStatus = 1; - std::cerr << "Wrong output value for " << outp.getInputName () - << ", expected : " << expected - << ", simulated : " << simulated - << std::endl; - } - } - - if (exitStatus != 0) { - std::cerr << "-----------------------------------------------------------" << std::endl; - - for (typename std::vector::const_iterator i = outputObjs.begin () - , ei = outputObjs.end (); i != ei; ++i) { - BaseSimObj_ty* so = *i; - - Output_ty* outObj = dynamic_cast< Output_ty* > (so); - assert (outObj != NULL); - - BasicPort& outp = outObj->getImpl (); - const LogicVal& expected = (outValues.find (outp.getInputName ()))->second; - - std::cerr << "expected: " << expected << ", " << outObj->str () << std::endl; - } - - abort (); - } else { - std::cout << ">>> OK: Simulation verified as correct" << std::endl; - } - } - - - /** - * Gets the inits the events. - * - * @return the inits the events - */ - const std::vector& getInitEvents() const { - return initEvents; - } - - /** - * Gets the input names. - * - * @return the input names - */ - const std::vector& getInputNames() const { - return parser.getInputNames(); - } - - /** - * Gets the input objs. - * - * @return the input objs - */ - const std::vector& getInputObjs() const { - return inputObjs; - } - - /** - * Gets the output names. - * - * @return the output names - */ - const std::vector& getOutputNames() const { - return parser.getOutputNames(); - } - - /** - * Gets the output objs. - * - * @return the output objs - */ - const std::vector getOutputObjs() const { - return outputObjs; - } - - /** - * Gets the out values. - * - * @return the out values - */ - const std::map& getOutValues() const { - return parser.getOutValues(); - } - - /** - * Gets the number edges. - * - * @return the number edges - */ - size_t getNumEdges() const { - return numEdges; - } - - /** - * Gets the number of nodes - * - * @return the number of nodes - */ - size_t getNumNodes() const { - return numNodes; - } - -}; - -} // end namespace des - -#endif /* DES_SIMINIT_H */ diff --git a/maxflow/galois/apps/des/common/abstractMain.h b/maxflow/galois/apps/des/common/abstractMain.h deleted file mode 100644 index 8ddc144..0000000 --- a/maxflow/galois/apps/des/common/abstractMain.h +++ /dev/null @@ -1,142 +0,0 @@ -/** AbstractMain holds common functionality for main classes -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 24, 2011 - * - * @author M. Amber Hassaan - */ - -#ifndef DES_ABSTRACT_MAIN_H_ -#define DES_ABSTRACT_MAIN_H_ - -#include -#include -#include -#include -#include - -#include - -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Galois.h" -#include "Galois/Runtime/Sampling.h" - -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#include "comDefs.h" -#include "BaseSimObject.h" -#include "Event.h" -#include "SimInit.h" -#include "SimGate.h" -#include "Input.h" -#include "Output.h" - -namespace des { - -namespace cll = llvm::cl; - -static const char* name = "Discrete Event Simulation"; -static const char* desc = "Perform logic circuit simulation using Discrete Event Simulation"; -static const char* url = "discrete_event_simulation"; - -static cll::opt netlistFile(cll::Positional, cll::desc(""), cll::Required); - -/** - * The Class AbstractMain holds common functionality for {@link des_unord::DESunorderedSerial} and {@link des_unord::DESunordered}. - */ -// TODO: graph type can also be exposed to sub-classes as a template parameter -template -class AbstractMain { - -public: - - typedef Galois::Graph::FirstGraph Graph; - typedef typename Graph::GraphNode GNode; - - -protected: - static const unsigned CHUNK_SIZE = 8; - - static const unsigned DEFAULT_EPI = 32; - - /** - * Gets the version. - * - * @return the version - */ - virtual std::string getVersion() const = 0; - - /** - * Run loop. - * - * @throws Exception the exception - */ - virtual void runLoop(const SimInit_tp& simInit, Graph& graph) = 0; - - - virtual void initRemaining (const SimInit_tp& simInit, Graph& graph) = 0; - -public: - /** - * Run the simulation - * @param argc - * @param argv - */ - void run(int argc, char* argv[]) { - - Galois::StatManager sm; - LonestarStart(argc, argv, name, desc, url); - - SimInit_tp simInit(netlistFile); - Graph graph; - simInit.initialize (graph); - - // Graph graph; - // graph.copyFromGraph (in_graph); - - printf("circuit graph: %d nodes, %zd edges\n", graph.size(), simInit.getNumEdges()); - printf("Number of initial events = %zd\n", simInit.getInitEvents().size()); - - initRemaining (simInit, graph); - - Galois::StatTimer t; - - t.start (); - Galois::Runtime::beginSampling (); - - runLoop(simInit, graph); - - Galois::Runtime::endSampling (); - t.stop (); - - if (!skipVerify) { - simInit.verify (); - } - - } - -}; - -} // namespace des -#endif // DES_ABSTRACT_MAIN_H_ diff --git a/maxflow/galois/apps/des/common/comDefs.cpp b/maxflow/galois/apps/des/common/comDefs.cpp deleted file mode 100644 index f04cc19..0000000 --- a/maxflow/galois/apps/des/common/comDefs.cpp +++ /dev/null @@ -1,29 +0,0 @@ -/** Implementation corresponding to @file comDefs.h -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#include "comDefs.h" - -std::string des::toLowerCase (std::string str) { - std::transform (str.begin (), str.end (), str.begin (), ::tolower); - return str; -} diff --git a/maxflow/galois/apps/des/common/comDefs.h b/maxflow/galois/apps/des/common/comDefs.h deleted file mode 100644 index d782f62..0000000 --- a/maxflow/galois/apps/des/common/comDefs.h +++ /dev/null @@ -1,70 +0,0 @@ -/** Some common definitions and helper functions -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_COM_DEFS_H -#define DES_COM_DEFS_H - -#include -#include -#include -#include - - -namespace des { - -/** - * type for time in simulation world - */ -typedef long long SimTime; - -// const SimTime INFINITY_SIM_TIME = std::numeric_limits::max (); -// The above definition is bad because INFINITY_SIM_TIME + small_value will cause an overflow -// and the result is not INFINITY_SIM_TIME any more - -/** The Constant INFINITY_SIM_TIME is used by NULL_EVENT messages to signal the end of simulation. */ -const SimTime INFINITY_SIM_TIME = (1 << 30); - -const SimTime MIN_DELAY = 1l; - - -/** - * Helper function to convert a string to lower case - */ -std::string toLowerCase (std::string str); - -/** - * freeing pointers in a vector - * before the vector itself is destroyed - */ -template -void destroyVec (std::vector& vec) { - for (typename std::vector::iterator i = vec.begin (), ei = vec.end (); i != ei; ++i) { - delete *i; - *i = NULL; - } - vec.clear (); -} - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/logic/BasicPort.cpp b/maxflow/galois/apps/des/logic/BasicPort.cpp deleted file mode 100644 index e696e93..0000000 --- a/maxflow/galois/apps/des/logic/BasicPort.cpp +++ /dev/null @@ -1,27 +0,0 @@ -/** OneInputGate implements the basic structure of a one input logic gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "BasicPort.h" - -const des::BUF& des::BasicPort::BUFFER = des::BUF(); diff --git a/maxflow/galois/apps/des/logic/BasicPort.h b/maxflow/galois/apps/des/logic/BasicPort.h deleted file mode 100644 index 8c2d4fd..0000000 --- a/maxflow/galois/apps/des/logic/BasicPort.h +++ /dev/null @@ -1,51 +0,0 @@ -/** OneInputGate implements the basic structure of a one input logic gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_BASIC_PORT_H -#define DES_BASIC_PORT_H - -#include - -#include "LogicFunctions.h" -#include "OneInputGate.h" - -namespace des { - -class BasicPort: public OneInputGate { -private: - static const BUF& BUFFER; - -public: - BasicPort (const std::string& outputName, const std::string& inputName) - : OneInputGate (BUFFER, outputName, inputName) {} - - BasicPort* makeClone () const { return new BasicPort (*this); } - - -}; - - -} // namespace des - - -#endif // DES_BASIC_PORT_H diff --git a/maxflow/galois/apps/des/logic/LogicFunctions.h b/maxflow/galois/apps/des/logic/LogicFunctions.h deleted file mode 100644 index 142511a..0000000 --- a/maxflow/galois/apps/des/logic/LogicFunctions.h +++ /dev/null @@ -1,220 +0,0 @@ -/** Defines the basic functors for one and two input logic gates -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_LOGIC_FUNCTIONS_H_ -#define DES_LOGIC_FUNCTIONS_H_ - -#include -#include - -#include "logicDefs.h" - - -namespace des { - -/** - * LogicFunc is a functor, serving as a common base type - * for one and two input functors. - */ -struct LogicFunc { - virtual const std::string str () const = 0; -}; - -/** - * Interface of a functor for modeling the funciton of a one input one, output logic gate. - * Each implementation of this interface is a different kind of one input gate, e.g. an - * inverter, buffer etc - */ - -struct OneInputFunc: public LogicFunc { - virtual LogicVal operator () (const LogicVal& in) const = 0; -}; - -/** - * Interface of a functor for modeling functionality a logic gate with two inputs and one output. - * Each implementation of this interface describes a two input gate - */ -struct TwoInputFunc: public LogicFunc { - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const = 0; -}; - -/** - * Buffer - */ -struct BUF : public OneInputFunc, public std::unary_function { - LogicVal _buf_ (const LogicVal& in) const { - return in; - } - - virtual LogicVal operator () (const LogicVal& in) const { - return _buf_ (in); - } - - virtual const std::string str () const { return "BUF"; } -}; - -/** - * Inverter - */ -struct INV : public OneInputFunc, public std::unary_function { - LogicVal _not_ (const LogicVal& in) const { - if (in == LOGIC_ZERO) { - return LOGIC_ONE; - } else if (in == LOGIC_ONE) { - return LOGIC_ZERO; - } else { - return LOGIC_UNKNOWN; - } - } - - virtual LogicVal operator () (const LogicVal& in) const { - return _not_ (in); - } - - virtual const std::string str () const { return "INV"; } -}; - - -/** - * And with two inputs - */ - -struct AND2: public TwoInputFunc, public std::binary_function { - LogicVal _and_ (const LogicVal& x, const LogicVal& y) const { - if (x == LOGIC_ZERO || y == LOGIC_ZERO) { - return LOGIC_ZERO; - - } else if (x == LOGIC_ONE ) { - return y; - - } else if (y == LOGIC_ONE) { - return x; - - } else { - return LOGIC_UNKNOWN; - } - - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _and_ (x, y); - } - - virtual const std::string str () const { return "AND2"; } -}; - -/** - * Nand with two inputs - */ -struct NAND2: public AND2 { - LogicVal _nand_ (const LogicVal& x, const LogicVal& y) const { - return INV()._not_ (AND2::_and_ (x, y)); - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _nand_ (x, y); - } - - virtual const std::string str () const { return "NAND2"; } -}; - -/** - * OR with two inputs - */ -struct OR2: public TwoInputFunc, public std::binary_function { - LogicVal _or_ (const LogicVal& x, const LogicVal& y) const { - if (x == LOGIC_ONE || y == LOGIC_ONE) { - return LOGIC_ONE; - } else if (x == LOGIC_ZERO) { - return y; - } else if (y == LOGIC_ZERO) { - return x; - } else { - return LOGIC_UNKNOWN; - } - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _or_ (x, y); - } - - virtual const std::string str () const { return "OR2"; } -}; - -/** - * NOR with two inputs - */ -struct NOR2: public OR2 { - LogicVal _nor_ (const LogicVal& x, const LogicVal& y) const { - return INV()._not_ (OR2::_or_ (x, y)); - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _nor_ (x, y); - } - - virtual const std::string str () const { return "NOR2"; } -}; - -/** - * XOR with two inputs - */ -struct XOR2: public TwoInputFunc, public std::binary_function { - LogicVal _xor_ (const LogicVal& x, const LogicVal& y) const { - if (x == LOGIC_UNKNOWN || y == LOGIC_UNKNOWN) { - return LOGIC_UNKNOWN; - } else if (INV()._not_(x) == y) { - return LOGIC_ONE; - } else if (x == y) { - return LOGIC_ZERO; - } else { - return LOGIC_UNKNOWN; - } - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _xor_ (x, y); - } - - virtual const std::string str () const { return "XOR2"; } -}; - -/** - * XNOR with two inputs - */ -struct XNOR2: public XOR2 { - LogicVal _xnor_ (const LogicVal& x, const LogicVal& y) const { - return INV()._not_ (XOR2::_xor_ (x, y) ); - } - - virtual LogicVal operator () (const LogicVal& x, const LogicVal& y) const { - return _xnor_ (x, y); - } - - virtual const std::string str () const { return "XNOR2"; } -}; - - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/logic/LogicGate.cpp b/maxflow/galois/apps/des/logic/LogicGate.cpp deleted file mode 100644 index 791ae63..0000000 --- a/maxflow/galois/apps/des/logic/LogicGate.cpp +++ /dev/null @@ -1,24 +0,0 @@ -/** LogicGate implements the basic structure of a logic gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#include "LogicGate.h" diff --git a/maxflow/galois/apps/des/logic/LogicGate.h b/maxflow/galois/apps/des/logic/LogicGate.h deleted file mode 100644 index 2c9e542..0000000 --- a/maxflow/galois/apps/des/logic/LogicGate.h +++ /dev/null @@ -1,225 +0,0 @@ -/** LogicGate implements the basic structure of a logic gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_BASE_LOGIC_GATE_H_ -#define DES_BASE_LOGIC_GATE_H_ - -#include -#include - -#include "comDefs.h" -#include "logicDefs.h" -#include "LogicUpdate.h" - - -namespace des { - -class LogicGate { - -public: - - LogicGate () {} - - LogicGate (const LogicGate& that) {} - - virtual ~LogicGate () {} - - virtual LogicGate* makeClone () const = 0; - - /** - * @return number of inputs - */ - virtual size_t getNumInputs () const = 0; - - /** - * @return number of outputs - */ - virtual size_t getNumOutputs () const = 0; - - /** - * @return current output value - */ - virtual LogicVal getOutputVal () const = 0; - - /** - * @return namem of the output - */ - virtual const std::string& getOutputName () const = 0; - - /** - * @param update - * - * applies the update to internal state e.g. change to some input. Must update the output - * if the inputs have changed - */ - virtual void applyUpdate (const LogicUpdate& update) = 0; - - /** - * Evaluate output based on the current state of the input - * - * @return the - */ - virtual LogicVal evalOutput() const = 0; - - /** - * @param net: name of a wire - * @return true if has an input with the name equal to 'net' - */ - virtual bool hasInputName(const std::string& net) const = 0; - - /** - * @param inputName net name - * @return index of the input matching the net name provided - */ - virtual size_t getInputIndex (const std::string& inputName) const = 0; - - /** - * @param net: name of a wire - * @return true if has an output with the name equal to 'net' - */ - virtual bool hasOutputName(const std::string& net) const = 0; - - /** - * @return string representation - */ - virtual std::string str () const = 0; - - /** - * @return delay of the gate - */ - virtual const SimTime& getDelay() const = 0; - - /** - * @param delay value to set to - */ - virtual void setDelay (const SimTime& delay) = 0; - - - /** - * Handles an erroneous situation, where the net name in - * LogicUpdate provided does not match any of the inputs. - * - * @param le - */ - virtual void netNameMismatch (const LogicUpdate& le) const = 0; -}; - - -template -class BaseLogicGate: public LogicGate { - -protected: - - /** The output name. */ - std::string outputName; - - /** The output val. */ - LogicVal outputVal; - - /** The delay. */ - SimTime delay; - -public: - - BaseLogicGate (const std::string& outputName, const LogicVal& outVal, const SimTime& delay) - : outputName (outputName), outputVal (outVal) { - setDelay (delay); - } - - /** - * Gets the delay. - * - * @return the delay - */ - virtual const SimTime& getDelay() const { - return delay; - } - - /** - * Sets the delay. - * - * @param delay the new delay - */ - virtual void setDelay(const SimTime& delay) { - this->delay = delay; - if (this->delay <= 0) { - this->delay = MIN_DELAY; - } - } - - /** - * @return number of inputs - */ - virtual size_t getNumInputs () const { return Nin; } - - /** - * @return number of outputs - */ - virtual size_t getNumOutputs () const { return Nout; } - - /** - * @return current output value - */ - virtual LogicVal getOutputVal () const { return outputVal; } - - /** - * @return namem of the output - */ - virtual const std::string& getOutputName () const { return outputName; } - - /** - * @param net: name of a wire - * @return true if has an output with the name equal to 'net' - */ - virtual bool hasOutputName(const std::string& net) const { return (outputName == net); } - - - /** - * Sets the output name. - * - * @param outputName the new output name - */ - void setOutputName(const std::string& outputName) { - this->outputName = outputName; - } - - /** - * Sets the output val. - * - * @param outputVal the new output val - */ - void setOutputVal(const LogicVal& outputVal) { - this->outputVal = outputVal; - } - - - virtual void netNameMismatch (const LogicUpdate& le) const { - std::cerr << "Received logic update : " << le.str () << " with mismatching net name, this = " << str () << std::endl; - exit (-1); - } -}; - - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/logic/LogicUpdate.h b/maxflow/galois/apps/des/logic/LogicUpdate.h deleted file mode 100644 index 1929e4d..0000000 --- a/maxflow/galois/apps/des/logic/LogicUpdate.h +++ /dev/null @@ -1,101 +0,0 @@ -/** LogicUpdate corresponds to a change in the input or output of a gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 23, 2011 - * - * @author M. Amber Hassaan - */ - -#ifndef DES_LOGICUPDATE_H_ -#define DES_LOGICUPDATE_H_ - -#include -#include - -#include "logicDefs.h" - - -namespace des { - -/** - * The Class LogicUpdate is the msg carried by events. represents a change in the value of a net. - */ -class LogicUpdate { - - /** The net name. */ - const std::string* netName; - - /** The net val. */ - LogicVal netVal; - -public: - - /** - * Instantiates a new logi update - * - * @param netName the net name - * @param netVal the net val - */ - LogicUpdate(const std::string& netName, const LogicVal& netVal) - : netName (&netName), netVal (netVal) {} - - LogicUpdate (): netName (NULL), netVal(LOGIC_UNKNOWN) {} - - friend bool operator == (const LogicUpdate& left, const LogicUpdate& right) { - return ((*left.netName) == (*right.netName)) && (left.netVal == right.netVal); - } - - friend bool operator != (const LogicUpdate& left, const LogicUpdate& right) { - return !(left == right); - } - - /** - * string representation - */ - const std::string str() const { - std::ostringstream ss; - ss << "netName = " << *netName << " netVal = " << netVal; - return ss.str (); - } - - /** - * Gets the net name. - * - * @return the net name - */ - const std::string& getNetName() const { - return *netName; - } - - - /** - * Gets the net val. - * - * @return the net val - */ - LogicVal getNetVal() const { - return netVal; - } - -}; - -} // namespace des - -#endif /* DES_LOGICUPDATE_H_ */ diff --git a/maxflow/galois/apps/des/logic/NetlistParser.cpp b/maxflow/galois/apps/des/logic/NetlistParser.cpp deleted file mode 100644 index 3f661b6..0000000 --- a/maxflow/galois/apps/des/logic/NetlistParser.cpp +++ /dev/null @@ -1,4 +0,0 @@ -#include "NetlistParser.h" - -const char* des::NetlistParser::DELIM = " \n\t,;()="; -const char* des::NetlistParser::COMMENTS = "//"; diff --git a/maxflow/galois/apps/des/logic/NetlistParser.h b/maxflow/galois/apps/des/logic/NetlistParser.h deleted file mode 100644 index 7ec282c..0000000 --- a/maxflow/galois/apps/des/logic/NetlistParser.h +++ /dev/null @@ -1,575 +0,0 @@ -/** NetlistParser reads a circuit netlist containing logic gates and wires etc -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * Created on: Jun 23, 2011 - * - * @author M. Amber Hassaan - */ - -#ifndef DES_NETLISTPARSER_H_ -#define DES_NETLISTPARSER_H_ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "comDefs.h" -#include "logicDefs.h" -#include "LogicFunctions.h" -#include "LogicGate.h" -#include "OneInputGate.h" -#include "TwoInputGate.h" -#include "BasicPort.h" - - -namespace des { - -/** - * NetlistTokenizer is a simple string tokenizer, which - * usings C strtok () function from
cstring
- */ - -class NetlistTokenizer: public boost::noncopyable { - /** - * correct way to use is to first check if hasMoreTokens and then call nextToken - * - * need to read one token ahead so because hasMoreTokens is called before nextToken - * - * basic algorithm - * - * initially currTokPtr = NULL, nextTokPtr = nextTokenInt - * - * In nextToken - * return string at currTokPtr - * currTokPtr = nextTokPtr - * nextTokPtr = read next token - * - * algorithm for reading next token - * read next token (with NULL) - * while next token is null or beginning of comment { - * read next line (break out the loop if file has ended) - * read first token - * } - * create a string and return - * - * things to check for - * - end of file, reading error (in this case getNextLine() should return NULL) - * - * initialization: - * - initially nextTokPtr should be NULL and this fine because - * reading nextTok with null should return null; - * - */ - -private: - /** file handle for input stream */ - std::ifstream ifs; - - /** what characters mark end of a token */ - const char* delim; - - /** string representing beginning of a line comment */ - const char* comments; - - - /** current line read from the file */ - char* linePtr; - - /** ptr to next token */ - char* nextTokPtr; - - /** ptr to current token, returned on a call to nextToken () */ - //char* currTokPtr; - -private: - - /** - * @returns true if nextTokPtr starts with comment pattern - */ - bool isCommentBegin () const { - std::string tok(nextTokPtr); - if (tok.find (comments) == 0) { - return true; - } else { - return false; - } - } - - /** - * read next line from the file - * and return it as a C string - */ - char* getNextLine () { - // read next line - std::string currLine; - - if (ifs.eof () || ifs.bad ()) { - linePtr = NULL; - } - else { - - std::getline (ifs, currLine); - - delete[] linePtr; - linePtr = new char[currLine.size () + 1]; - strcpy (linePtr, currLine.c_str ()); - } - - return linePtr; - } - - /** - * read next token as a C string - */ - char* readNextToken () { - nextTokPtr = strtok (NULL, delim); - - while (nextTokPtr == NULL || isCommentBegin ()) { - linePtr = getNextLine (); - if (linePtr == NULL) { - nextTokPtr = NULL; - break; - } - nextTokPtr = strtok (linePtr, delim); - } - return nextTokPtr; - } - -public: - /** - * Constructor - * - * @param fileName: the file to read from - * @param delim: a string containing characters that mark end of a token - * @param comments: a string that contains beginning of a comment - */ - NetlistTokenizer (const char* fileName, const char* delim, const char* comments) - : ifs(fileName), delim (delim), comments(comments), linePtr (NULL) { - - if (!ifs.good ()) { - std::cerr << "Failed to open this file for reading: " << fileName << std::endl; - abort (); - } - nextTokPtr = readNextToken(); - - } - - /** - * returns the next token from the file - */ - const std::string nextToken () { - assert (nextTokPtr != NULL); - - std::string retval(nextTokPtr); - nextTokPtr = readNextToken (); - - return retval; - } - - bool hasMoreTokens () const { - return !ifs.eof() || nextTokPtr != NULL; - } -}; - -/** - * The Class NetlistParser parses an input netlist file. - */ -class NetlistParser { -public: - - /** following is the list of token separators; characters meant to be ignored */ - static const char* DELIM; - - /** beginning of a comment string */ - static const char* COMMENTS; - -public: - typedef std::map > > StimulusMapType; - -private: - - - /** The netlist file. */ - const std::string& netlistFile; - - /** The input names. */ - std::vector inputNames; - - /** input ports */ - std::vector inputPorts; - - /** The output names. */ - std::vector outputNames; - - /** output ports */ - std::vector outputPorts; - - /** The out values. */ - std::map outValues; - - /** The input stimulus map has a list of (time, value) pairs for each input. */ - StimulusMapType inputStimulusMap; - - /** The gates. */ - std::vector gates; - - /** The finish time. */ - SimTime finishTime; - - - -private: - /** - * A mapping from string name (in the netlist) to functor that implements - * the corresponding functionality. Helps in initialization - */ - static const std::map& oneInputGates () { - static std::map oneInMap; - oneInMap.insert(std::make_pair (toLowerCase ("INV"), new INV())); - return oneInMap; - } - - /** - * A mapping from string name (in the netlist) to functor that implements - * the corresponding functionality. Helps in initialization - */ - static const std::map& twoInputGates () { - static std::map twoInMap; - twoInMap.insert(std::make_pair (toLowerCase ("AND2") , new AND2())); - twoInMap.insert(std::make_pair (toLowerCase ("NAND2") , new NAND2())); - twoInMap.insert(std::make_pair (toLowerCase ("OR2") , new OR2())); - twoInMap.insert(std::make_pair (toLowerCase ("NOR2") , new NOR2())); - twoInMap.insert(std::make_pair (toLowerCase ("XOR2") , new XOR2())); - twoInMap.insert(std::make_pair (toLowerCase ("XNOR2") , new XNOR2())); - return twoInMap; - } - - - /** - * Parses the port list i.e. inputs and outputs - * - * @param tokenizer the tokenizer - * @param portNames the net names for input/output ports - */ - static void parsePortList(NetlistTokenizer& tokenizer, std::vector& portNames) { - std::string token = toLowerCase (tokenizer.nextToken ()); - while (token != ("end")) { - portNames.push_back(token); - token = toLowerCase (tokenizer.nextToken ()); - } - } - - static const char* getInPrefix () { return "in_"; } - - static const char* getOutPrefix () { return "out_"; } - - void createInputPorts () { - for (std::vector::const_iterator i = inputNames.begin (), ei = inputNames.end (); i != ei; ++i) { - const std::string& out = *i; - std::string in = getInPrefix() + out; - inputPorts.push_back (new BasicPort (out, in)); - } - } - - void createOutputPorts () { - for (std::vector::const_iterator i = outputNames.begin (), ei = outputNames.end (); i != ei; ++i) { - const std::string& in = *i; - std::string out = getOutPrefix() + in; - outputPorts.push_back (new BasicPort (out, in)); - } - } - - /** - * Parses the out values, which are the expected values of the circuit outputs at the end of - * simulation - * - * @param tokenizer the tokenizer - * @param outValues the expected out values at the end of the simulation - */ - static void parseOutValues(NetlistTokenizer& tokenizer, std::map& outValues) { - std::string token = toLowerCase (tokenizer.nextToken ()); - while (token != ("end")) { - std::string outName = token; - token = toLowerCase (tokenizer.nextToken ()); - LogicVal value = token[0]; - token = toLowerCase (tokenizer.nextToken ()); - - outValues.insert (std::make_pair(outName, value)); - } - } - - /** - * Parses the initialization list for all the inputs. - * - * @param tokenizer the tokenizer - * @param inputStimulusMap the input stimulus map - */ - static void parseInitList(NetlistTokenizer& tokenizer, StimulusMapType& inputStimulusMap) { - // capture the name of the input signal - std::string input = toLowerCase (tokenizer.nextToken ()); - - std::string token = toLowerCase (tokenizer.nextToken ()); - - std::vector > timeValList; - while (token != ("end")) { - - SimTime t(atol (token.c_str ())); // SimTime.parseLong(token); - - token = toLowerCase (tokenizer.nextToken ()); - LogicVal v = token[0]; - - timeValList.push_back (std::make_pair(t, v)); - - token = toLowerCase (tokenizer.nextToken ()); - } - - inputStimulusMap.insert (std::make_pair(input, timeValList)); - } - - /** - * Parses the actual list of gates - * - * @param tokenizer the tokenizer - * @param gates the gates - */ - static void parseNetlist(NetlistTokenizer& tokenizer, std::vector& gates) { - - std::string token = toLowerCase (tokenizer.nextToken ()); - - while (token != ("end")) { - - if (oneInputGates().count (token) > 0) { - - const OneInputFunc* func = (oneInputGates ().find (token))->second; - - std::string outputName = toLowerCase (tokenizer.nextToken ()); // output name - - std::string inputName = toLowerCase (tokenizer.nextToken ()); // input - - - OneInputGate* g = new OneInputGate (*func, outputName, inputName); - gates.push_back (g); - - // possibly delay, if no delay then next gate or end - token = toLowerCase (tokenizer.nextToken ()); - if (token[0] == '#') { - token = token.substr(1); - SimTime d(atol (token.c_str ())); // SimTime.parseLong(token); - g->setDelay(d); - - } else { - continue; - } - } else if (twoInputGates().count (token) > 0) { - - const TwoInputFunc* func = (twoInputGates ().find (token))->second; - - std::string outputName = toLowerCase (tokenizer.nextToken ()); // output name - std::string input1Name = toLowerCase (tokenizer.nextToken ()); // input 1 - - std::string input2Name = toLowerCase (tokenizer.nextToken ()); // input 2 - - TwoInputGate* g = new TwoInputGate (*func, outputName, input1Name, input2Name); - gates.push_back (g); - - // possibly delay, if no delay then next gate or end - token = toLowerCase (tokenizer.nextToken ()); - if (token[0] == '#') { - token = token.substr(1); - SimTime d(atol (token.c_str ())); // SimTime.parseLong(token); - g->setDelay(d); - - } else { - continue; - } - } else { - std::cerr << "Unknown type of gate " << token << std::endl; - abort (); - } - - //necessary to move forward in the while loop - token = toLowerCase (tokenizer.nextToken ()); - } // end of while - } - - /** - * Parses the netlist contained in fileName. - * - * Parsing steps - * parse input signal names - * parse output signal names - * parse finish time - * parse stimulus lists for each input signal - * parse the netlist - * - * @param fileName the file name - */ - - void parse(const std::string& fileName) { - std::cout << "input: reading circuit from file: " << fileName << std::endl; - - - NetlistTokenizer tokenizer (fileName.c_str (), DELIM, COMMENTS); - - std::string token; - - while (tokenizer.hasMoreTokens()) { - - token = toLowerCase (tokenizer.nextToken ()); - - if (token == ("inputs")) { - parsePortList(tokenizer, inputNames); - } else if (token == ("outputs")) { - parsePortList(tokenizer, outputNames); - } else if (token == ("outvalues")) { - parseOutValues(tokenizer, outValues); - } else if (token == ("finish")) { - token = toLowerCase (tokenizer.nextToken ()); - finishTime = SimTime (atol (token.c_str ())); // SimTime.parseLong(token); - } else if (token == ("initlist")) { - parseInitList(tokenizer, inputStimulusMap); - } else if (token == ("netlist")) { - parseNetlist(tokenizer, gates); - } - } // end outer while - - createInputPorts (); - createOutputPorts (); - } // end parse() - - - void destroy () { - destroyVec (gates); - } - -public: - /** - * Instantiates a new netlist parser. - * - * @param netlistFile the netlist file - */ - NetlistParser(const std::string& netlistFile): netlistFile(netlistFile) { - parse(netlistFile); - } - - - ~NetlistParser () { - destroy (); - } - - - - - /** - * Gets the finish time. - * - * @return the finish time - */ - const SimTime& getFinishTime() const { - return finishTime; - } - - /** - * Gets the netlist file. - * - * @return the netlist file - */ - const std::string& getNetlistFile() const { - return netlistFile; - } - - /** - * Gets the input names. - * - * @return the input names - */ - const std::vector& getInputNames() const { - return inputNames; - } - - /** - * - * @return input ports vector - */ - const std::vector& getInputPorts () const { - return inputPorts; - } - - /** - * Gets the output names. - * - * @return the output names - */ - const std::vector& getOutputNames() const { - return outputNames; - } - - /** - * - * @return output ports vector - */ - const std::vector& getOutputPorts () const { - return outputPorts; - } - - /** - * Gets the out values. - * - * @return the out values - */ - const std::map& getOutValues() const { - return outValues; - } - - /** - * Gets the input stimulus map. - * - * @return the input stimulus map - */ - const StimulusMapType& getInputStimulusMap() const { - return inputStimulusMap; - } - - /** - * Gets the gates. - * - * @return the gates - */ - const std::vector& getGates() const { - return gates; - } - -}; - - -} // namespace des - - -#endif /* DES_NETLISTPARSER_H_ */ diff --git a/maxflow/galois/apps/des/logic/OneInputGate.h b/maxflow/galois/apps/des/logic/OneInputGate.h deleted file mode 100644 index 35c28ce..0000000 --- a/maxflow/galois/apps/des/logic/OneInputGate.h +++ /dev/null @@ -1,166 +0,0 @@ -/** OneInputGate implements the basic structure of a one input logic gate -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_BASE_ONE_INPUT_GATE_H_ -#define DES_BASE_ONE_INPUT_GATE_H_ - -#include -#include - -#include "comDefs.h" -#include "logicDefs.h" -#include "LogicFunctions.h" -#include "LogicGate.h" - - -namespace des { - -struct OneInputGateTraits { - static const size_t N_OUT = 1; - static const size_t N_IN = 1; -}; - -class OneInputGate: public BaseLogicGate { -private: - typedef BaseLogicGate SuperTy; - -protected: - /** - * a functor which computes an output LogicVal when - * provided an input LogicVal - */ - const OneInputFunc& func; - - /** The input name. */ - std::string inputName; - - /** The input val. */ - LogicVal inputVal; - -public: - /** - * Instantiates a new one input gate. - */ - OneInputGate (const OneInputFunc& func, const std::string& outputName, const std::string& inputName, const SimTime& delay = MIN_DELAY) - : SuperTy (outputName, LOGIC_ZERO, delay), func (func), inputName (inputName) , inputVal (LOGIC_ZERO) {} - - - virtual OneInputGate* makeClone () const { - return new OneInputGate (*this); - } - - - /** - * Applies the update to internal state e.g. change to some input. Must update the output - * if the inputs have changed. - */ - virtual void applyUpdate (const LogicUpdate& lu) { - if (hasInputName (lu.getNetName ())) { - inputVal = lu.getNetVal (); - } else { - SuperTy::netNameMismatch (lu); - } - - this->outputVal = evalOutput (); - } - - /** - * Evaluate output based on the current state of the input - * - * @return the - */ - virtual LogicVal evalOutput () const { return func (inputVal); } - - /** - * @param net: name of a wire - * @return true if has an input with the name equal to 'net' - */ - virtual bool hasInputName (const std::string& net) const { return (inputName == net); } - - /** - * @param inputName net name - * @return index of the input matching the net name provided - */ - virtual size_t getInputIndex (const std::string& inputName) const { - if (this->inputName == (inputName)) { - return 0; // since there is only one input - } - abort (); - return -1; // error - - } - - - - /** - * @return string representation - */ - virtual std::string str () const { - std::ostringstream ss; - - ss << func.str () << " output: " << outputName << " = " << outputVal << ", input: " << inputName << " = " << inputVal; - return ss.str (); - } - - /** - * Gets the input name. - * - * @return the input name - */ - const std::string& getInputName () const { - return inputName; - } - - /** - * Sets the input name. - * - * @param inputName the new input name - */ - void setInputName (const std::string& inputName) { - this->inputName = inputName; - } - - /** - * Gets the input val. - * - * @return the input val - */ - const LogicVal& getInputVal () const { - return inputVal; - } - - /** - * Sets the input val. - * - * @param inputVal the new input val - */ - void setInputVal (const LogicVal& inputVal) { - this->inputVal = inputVal; - } - -}; - - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/logic/TwoInputGate.h b/maxflow/galois/apps/des/logic/TwoInputGate.h deleted file mode 100644 index c02ad3b..0000000 --- a/maxflow/galois/apps/des/logic/TwoInputGate.h +++ /dev/null @@ -1,230 +0,0 @@ -/** TwoInputGate is basic structure of a two input gates -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_BASE_TWO_INPUT_GATE_H_ -#define DES_BASE_TWO_INPUT_GATE_H_ - -#include -#include - -#include "comDefs.h" -#include "logicDefs.h" -#include "LogicFunctions.h" -#include "LogicGate.h" - - -namespace des { - -struct TwoInputGateTraits { - static const size_t N_OUT = 1; - static const size_t N_IN = 2; -}; - -class TwoInputGate: public BaseLogicGate { -private: - typedef BaseLogicGate SuperTy; - -protected: - - /** - * The functor that computes a value for the output of the gate - * when provided with the values of the input - */ - const TwoInputFunc& func; - - /** The input1 name. */ - std::string input1Name; - - /** The input2 name. */ - std::string input2Name; - - /** The input1 val. */ - LogicVal input1Val; - - /** The input2 val. */ - LogicVal input2Val; - - -public: - /** - * Instantiates a new two input gate. - */ - TwoInputGate (const TwoInputFunc& func, const std::string& outputName, const std::string& input1Name, - const std::string& input2Name, const SimTime& delay = MIN_DELAY) - : SuperTy (outputName, LOGIC_ZERO, delay) - , func (func) - , input1Name (input1Name) - , input2Name (input2Name) - , input1Val (LOGIC_ZERO) - , input2Val (LOGIC_ZERO) {} - - - virtual TwoInputGate* makeClone () const { - return new TwoInputGate (*this); - } - - /** - * Applies the update to internal state e.g. change to some input. Must update the output - * if the inputs have changed - * - * @param lu the update - */ - virtual void applyUpdate (const LogicUpdate& lu) { - - if (input1Name == (lu.getNetName ())) { - input1Val = lu.getNetVal (); - - } else if (input2Name == (lu.getNetName ())) { - input2Val = lu.getNetVal (); - - } else { - SuperTy::netNameMismatch (lu); - } - - // output has been changed - // update output immediately - // generate events to send to all fanout gates to update their inputs afer delay - this->outputVal = evalOutput (); - } - - /** - * Evaluate output based on the current state of the input - * - * @return the - */ - virtual LogicVal evalOutput () const { return func (input1Val, input2Val); } - - /** - * @param net: name of a wire - * @return true if has an input with the name equal to 'net' - */ - virtual bool hasInputName (const std::string& net) const { - return (input1Name == (net) || input2Name == (net)); - } - - /** - * @param inputName net name - * @return index of the input matching the net name provided - */ - virtual size_t getInputIndex (const std::string& inputName) const { - if (this->input2Name == (inputName)) { - return 1; - - } else if (this->input1Name == (inputName)) { - return 0; - - } else { - abort (); - return -1; // error - } - } - - - /** - * @return string representation - */ - virtual std::string str () const { - std::ostringstream ss; - ss << func.str () << " output: " << outputName << " = " << outputVal << " input1: " << input1Name << " = " - << input1Val << " input2: " << input2Name << " = " << input2Val; - return ss.str (); - } - - /** - * Gets the input1 name. - * - * @return the input1 name - */ - const std::string& getInput1Name () const { - return input1Name; - } - - /** - * Sets the input1 name. - * - * @param input1Name the new input1 name - */ - void setInput1Name (const std::string& input1Name) { - this->input1Name = input1Name; - } - - /** - * Gets the input1 val. - * - * @return the input1 val - */ - const LogicVal& getInput1Val () const { - return input1Val; - } - - /** - * Sets the input1 val. - * - * @param input1Val the new input1 val - */ - void setInput1Val (const LogicVal& input1Val) { - this->input1Val = input1Val; - } - - /** - * Gets the input2 name. - * - * @return the input2 name - */ - const std::string& getInput2Name () { - return input2Name; - } - - /** - * Sets the input2 name. - * - * @param input2Name the new input2 name - */ - void setInput2Name (const std::string& input2Name) { - this->input2Name = input2Name; - } - - /** - * Gets the input2 val. - * - * @return the input2 val - */ - const LogicVal& getInput2Val () const { - return input2Val; - } - - /** - * Sets the input2 val. - * - * @param input2Val the new input2 val - */ - void setInput2Val (const LogicVal& input2Val) { - this->input2Val = input2Val; - } - -}; - - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/logic/logicDefs.h b/maxflow/galois/apps/des/logic/logicDefs.h deleted file mode 100644 index e29b0bb..0000000 --- a/maxflow/galois/apps/des/logic/logicDefs.h +++ /dev/null @@ -1,41 +0,0 @@ -/** Some common definitions and helper functions -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_LOGIC_DEFS_H_ -#define DES_LOGIC_DEFS_H_ - - -namespace des { - -/** type used for value of a signal e.g. 0, 1, X , Z */ -typedef char LogicVal; - -/** the unknown logic value */ -const char LOGIC_UNKNOWN = 'X'; -const char LOGIC_ZERO = '0'; -const char LOGIC_ONE = '1'; - - -} // namespace des - -#endif diff --git a/maxflow/galois/apps/des/ordered/DESordered.cpp b/maxflow/galois/apps/des/ordered/DESordered.cpp deleted file mode 100644 index cbf0a91..0000000 --- a/maxflow/galois/apps/des/ordered/DESordered.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESordered -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESordered.h" - -int main (int argc, char* argv[]) { - - des_ord::DESordered s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/ordered/DESordered.h b/maxflow/galois/apps/des/ordered/DESordered.h deleted file mode 100644 index 8e5f713..0000000 --- a/maxflow/galois/apps/des/ordered/DESordered.h +++ /dev/null @@ -1,257 +0,0 @@ -/** DES ordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_ORDERED_H -#define DES_ORDERED_H - - -#include "Galois/Accumulator.h" -#include "Galois/Timer.h" -#include "Galois/Atomic.h" -#include "Galois/Galois.h" - -#include "Galois/Runtime/PerThreadWorkList.h" -#include "Galois/Runtime/ll/PaddedLock.h" -#include "Galois/Runtime/ll/CompilerSpecific.h" - -#include "abstractMain.h" -#include "SimInit.h" -#include "TypeHelper.h" - -#include -#include -#include - -#include - - -namespace des_ord { - -typedef Galois::GAccumulator Accumulator_ty; - -typedef des::EventRecvTimeLocalTieBrkCmp Cmp_ty; - -typedef Galois::Runtime::PerThreadVector AddList_ty; - -struct SimObjInfo; -typedef std::vector VecSobjInfo; - - -struct SimObjInfo: public TypeHelper { - - typedef des::AbstractMain::GNode GNode; - GNode node; - size_t numInputs; - size_t numOutputs; - std::vector lastInputEvents; - mutable volatile des::SimTime clock; - - SimObjInfo () {} - - SimObjInfo (GNode node, SimObj_ty* sobj): node (node) { - SimGate_ty* sg = static_cast (sobj); - assert (sg != NULL); - - numInputs = sg->getImpl ().getNumInputs (); - numOutputs = sg->getImpl ().getNumOutputs (); - - lastInputEvents.resize (numInputs, sobj->makeZeroEvent ()); - - clock = 0; - } - - - void recv (const Event_ty& event) { - - SimGate_ty* dstGate = static_cast (event.getRecvObj ()); - assert (dstGate != NULL); - - const std::string& outNet = event.getAction ().getNetName (); - size_t dstIn = dstGate->getImpl ().getInputIndex (outNet); // get the input index of the net to which my output is connected - - assert (dstIn < lastInputEvents.size ()); - lastInputEvents[dstIn] = event; - } - - bool isReady (const Event_ty& event) const { - // not ready if event has a timestamp greater than the latest event received - // on any input. - // an input with INFINITY_SIM_TIME is dead and will not receive more non-null events - // in the future - bool notReady = false; - - if (event.getRecvTime () < clock) { - return true; - - } else { - - des::SimTime new_clk = 2 * des::INFINITY_SIM_TIME; - for (std::vector::const_iterator e = lastInputEvents.begin () - , ende = lastInputEvents.end (); e != ende; ++e) { - - if ((e->getRecvTime () < des::INFINITY_SIM_TIME) && - (Cmp_ty::compare (event, *e) > 0)) { - notReady = true; - // break; - } - - if (e->getRecvTime () < des::INFINITY_SIM_TIME) { - new_clk = std::min (new_clk, e->getRecvTime ()); - } - } - - this->clock = new_clk; - } - - return !notReady; - } - -}; - - -class DESordered: - public des::AbstractMain, public TypeHelper { - - struct NhoodVisitor { - typedef int tt_has_fixed_neighborhood; - - Graph& graph; - VecSobjInfo& sobjInfoVec; - - NhoodVisitor (Graph& graph, VecSobjInfo& sobjInfoVec) - : graph (graph), sobjInfoVec (sobjInfoVec) - {} - - template - void operator () (const Event_ty& event, C&) const { - SimObjInfo& recvInfo = sobjInfoVec[event.getRecvObj ()->getID ()]; - graph.getData (recvInfo.node, Galois::MethodFlag::CHECK_CONFLICT); - } - }; - - struct ReadyTest { - VecSobjInfo& sobjInfoVec; - - ReadyTest (VecSobjInfo& sobjInfoVec): sobjInfoVec (sobjInfoVec) {} - - bool operator () (const Event_ty& event) const { - SimObjInfo& sinfo = sobjInfoVec[event.getRecvObj ()->getID ()]; - return sinfo.isReady (event); - } - }; - - - struct OpFunc { - Graph& graph; - std::vector& sobjInfoVec; - AddList_ty& newEvents; - Accumulator_ty& nevents; - - OpFunc ( - Graph& graph, - std::vector& sobjInfoVec, - AddList_ty& newEvents, - Accumulator_ty& nevents) - : - graph (graph), - sobjInfoVec (sobjInfoVec), - newEvents (newEvents), - nevents (nevents) - {} - - template - void operator () (const Event_ty& event, C& lwl) { - - // std::cout << ">>> Processing: " << event.detailedString () << std::endl; - - // TODO: needs a PQ with remove operation to work correctly - assert (ReadyTest (sobjInfoVec) (event)); - - SimObj_ty* recvObj = static_cast (event.getRecvObj ()); - SimObjInfo& recvInfo = sobjInfoVec[recvObj->getID ()]; - - nevents += 1; - newEvents.get ().clear (); - - recvObj->execEvent (event, graph, recvInfo.node, newEvents.get ()); - - for (AddList_ty::local_iterator a = newEvents.get ().begin () - , enda = newEvents.get ().end (); a != enda; ++a) { - - SimObjInfo& sinfo = sobjInfoVec[a->getRecvObj()->getID ()]; - sinfo.recv (*a); - lwl.push (*a); - - // std::cout << "### Adding: " << a->detailedString () << std::endl; - } - - } - - }; - - std::vector sobjInfoVec; - -protected: - virtual std::string getVersion () const { return "Handwritten Ordered ODG, no barrier"; } - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - sobjInfoVec.clear (); - sobjInfoVec.resize (graph.size ()); - - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - sobjInfoVec[so->getID ()] = SimObjInfo (*n, so); - } - } - - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - for (std::vector::const_iterator e = simInit.getInitEvents ().begin () - , ende = simInit.getInitEvents ().end (); e != ende; ++e) { - - SimObjInfo& sinfo = sobjInfoVec[e->getRecvObj ()->getID ()]; - sinfo.recv (*e); - } - - AddList_ty newEvents; - Accumulator_ty nevents; - - Galois::for_each_ordered ( - simInit.getInitEvents ().begin (), simInit.getInitEvents ().end (), - Cmp_ty (), - NhoodVisitor (graph, sobjInfoVec), - OpFunc (graph, sobjInfoVec, newEvents, nevents), - ReadyTest (sobjInfoVec)); - - std::cout << "Number of events processed= " << - nevents.reduce () << std::endl; - } -}; - - -} - -#endif // DES_ORDERED_H diff --git a/maxflow/galois/apps/des/ordered/DESorderedHand.cpp b/maxflow/galois/apps/des/ordered/DESorderedHand.cpp deleted file mode 100644 index 3b4b0a4..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedHand.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESorderedHand -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESorderedHand.h" - -int main (int argc, char* argv[]) { - - des_ord::DESorderedHand s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/ordered/DESorderedHand.h b/maxflow/galois/apps/des/ordered/DESorderedHand.h deleted file mode 100644 index 1c1796f..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedHand.h +++ /dev/null @@ -1,506 +0,0 @@ -/** DES ordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - - -#ifndef DES_ORDERED_EXP_H -#define DES_ORDERED_EXP_H - -#include "Galois/Accumulator.h" -#include "Galois/Timer.h" - -#include "Galois/Runtime/PerThreadWorkList.h" -#include "Galois/Runtime/ll/PaddedLock.h" -#include "Galois/Runtime/ll/CompilerSpecific.h" - -#include -#include -#include -#include - -#include - - -#include "abstractMain.h" -#include "SimInit.h" -#include "TypeHelper.h" - - -namespace des_ord { - -typedef Galois::GAccumulator Accumulator_ty; - -typedef des::EventRecvTimeLocalTieBrkCmp Cmp_ty; - -typedef Galois::Runtime::PerThreadVector AddList_ty; - -struct SimObjInfo: public TypeHelper { - - typedef Galois::Runtime::LL::SimpleLock Lock_ty; - typedef des::AbstractMain::GNode GNode; - typedef std::set > PQ; - - Lock_ty mutex; - PQ pendingEvents; - - GNode node; - size_t numInputs; - size_t numOutputs; - std::vector inputTimes; - - SimObjInfo () {} - - SimObjInfo (const GNode& node, SimObj_ty* sobj): node (node) { - SimGate_ty* sg = static_cast (sobj); - assert (sg != NULL); - - numInputs = sg->getImpl ().getNumInputs (); - numOutputs = sg->getImpl ().getNumOutputs (); - - inputTimes.resize (numInputs, des::SimTime ()); - } - - void recv (const Event_ty& event) { - - SimGate_ty* dstGate = static_cast (event.getRecvObj ()); - assert (dstGate != NULL); - - const std::string& outNet = event.getAction ().getNetName (); - size_t dstIn = dstGate->getImpl ().getInputIndex (outNet); // get the input index of the net to which my output is connected - - assert (dstIn < inputTimes.size ()); - inputTimes[dstIn] = event.getRecvTime (); - - mutex.lock (); - pendingEvents.insert (event); - mutex.unlock (); - } - - bool hasPending () const { - mutex.lock (); - bool ret = !pendingEvents.empty (); - mutex.unlock (); - return ret; - } - - bool hasReady () const { - mutex.lock (); - bool ret = false; - if (!pendingEvents.empty ()) { - ret = isReady (*pendingEvents.begin ()); - } - mutex.unlock (); - - return ret; - } - - Event_ty getMin () const { - mutex.lock (); - Event_ty ret = *pendingEvents.begin (); - mutex.unlock (); - - return ret; - } - - bool isMin (const Event_ty& event) const { - mutex.lock (); - bool ret = !pendingEvents.empty () - && (*pendingEvents.begin () == event); - mutex.unlock (); - - return ret; - } - - Event_ty removeMin () { - mutex.lock (); - assert (!pendingEvents.empty ()); - Event_ty event = *pendingEvents.begin (); - pendingEvents.erase (pendingEvents.begin ()); - mutex.unlock (); - - return event; - } - - void remove (const Event_ty& event) { - mutex.lock (); - assert (pendingEvents.find (event) != pendingEvents.end ()); - pendingEvents.erase (event); - mutex.unlock (); - } - - - bool isReady (const Event_ty& event) const { - // not ready if event has a timestamp greater than the latest event received - // on any input. - // an input with INFINITY_SIM_TIME is dead and will not receive more non-null events - // in the future - bool notReady = false; - for (std::vector::const_iterator i = inputTimes.begin () - , endi = inputTimes.end (); i != endi; ++i) { - - if ((*i < des::INFINITY_SIM_TIME) && (event.getRecvTime () > *i)) { - notReady = true; - break; - } - } - - return !notReady; - } - -}; - - -std::vector::iterator -getGlobalMin (std::vector& sobjInfoVec) { - - std::vector::iterator minPos = sobjInfoVec.end (); - - for (std::vector::iterator i = sobjInfoVec.begin () - , endi = sobjInfoVec.end (); i != endi; ++i) { - - if (i->hasPending ()) { - - if (minPos == sobjInfoVec.end ()) { - minPos = i; - - } else if (Cmp_ty::compare (i->getMin (), minPos->getMin ()) < 0) { - minPos = i; - } - } - } - - return minPos; -} - -class DESorderedHand: - public des::AbstractMain, public TypeHelper { - - typedef Galois::Runtime::PerThreadVector WL_ty; - - - - struct FindReady { - WL_ty& readyEvents; - Accumulator_ty& findIter; - - FindReady ( - WL_ty& readyEvents, - Accumulator_ty& findIter) - : - readyEvents (readyEvents), - findIter (findIter) - {} - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (SimObjInfo& sinfo) { - findIter += 1; - - if (sinfo.hasReady ()) { - readyEvents.get ().push_back (sinfo.removeMin ()); - } - } - }; - - struct ProcessEvents { - Graph& graph; - std::vector& sobjInfoVec; - AddList_ty& newEvents; - Accumulator_ty& nevents; - - - ProcessEvents ( - Graph& graph, - std::vector& sobjInfoVec, - AddList_ty& newEvents, - Accumulator_ty& nevents) - : - graph (graph), - sobjInfoVec (sobjInfoVec), - newEvents (newEvents), - nevents (nevents) - {} - - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (const Event_ty& event) { - nevents += 1; - - - newEvents.get ().clear (); - SimObj_ty* recvObj = static_cast (event.getRecvObj ()); - GNode recvNode = sobjInfoVec[recvObj->getID ()].node; - - recvObj->execEvent (event, graph, recvNode, newEvents.get ()); - - for (AddList_ty::local_iterator a = newEvents.get ().begin () - , enda = newEvents.get ().end (); a != enda; ++a) { - - sobjInfoVec[a->getRecvObj ()->getID ()].recv (*a); - } - } - }; - - - - std::vector sobjInfoVec; - -protected: - virtual std::string getVersion () const { return "Handwritten Ordered ODG based"; } - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - sobjInfoVec.clear (); - sobjInfoVec.resize (graph.size ()); - - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - sobjInfoVec[so->getID ()] = SimObjInfo (*n, so); - } - } - - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - for (std::vector::const_iterator i = simInit.getInitEvents ().begin () - , endi = simInit.getInitEvents ().end (); i != endi; ++i) { - - SimObj_ty* recvObj = static_cast (i->getRecvObj ()); - sobjInfoVec[recvObj->getID ()].recv (*i); - - } - - WL_ty readyEvents; - AddList_ty newEvents; - - Accumulator_ty findIter; - Accumulator_ty nevents; - size_t round = 0; - size_t gmin_calls = 0; - - Galois::TimeAccumulator t_find; - Galois::TimeAccumulator t_gmin; - Galois::TimeAccumulator t_simulate; - - while (true) { - ++round; - readyEvents.clear_all (); - - assert (readyEvents.empty_all ()); - - t_find.start (); - Galois::do_all ( - // Galois::Runtime::do_all_coupled ( - sobjInfoVec.begin (), sobjInfoVec.end (), - FindReady (readyEvents, findIter), Galois::loopname("find_ready_events")); - t_find.stop (); - - // std::cout << "Number of ready events found: " << readyEvents.size_all () << std::endl; - - if (readyEvents.empty_all ()) { - t_gmin.start (); - - ++gmin_calls; - - std::vector::iterator minPos = getGlobalMin (sobjInfoVec); - - if (minPos == sobjInfoVec.end ()) { - break; - - } else { - readyEvents.get ().push_back (minPos->removeMin ()); - } - - t_gmin.stop (); - } - - t_simulate.start (); - Galois::do_all ( - // Galois::Runtime::do_all_coupled ( - readyEvents.begin_all (), readyEvents.end_all (), - ProcessEvents (graph, sobjInfoVec, newEvents, nevents), Galois::loopname("process_ready_events")); - t_simulate.stop (); - } - - std::cout << "Number of rounds = " << round << std::endl; - std::cout << "Number of iterations spent in finding ready events = " << - findIter.reduce () << std::endl; - std::cout << "Number of events processed = " << nevents.reduce () << std::endl; - std::cout << "Average parallelism: " << double (nevents.reduce ())/ double (round) << std::endl; - std::cout << "Number of times global min computed = " << gmin_calls << std::endl; - std::cout << "Time spent in finding ready events = " << t_find.get () << std::endl; - std::cout << "Time spent in computing global min = " << t_gmin.get () << std::endl; - std::cout << "Time spent in simulating events = " << t_simulate.get () << std::endl; - - } - -}; - - -class DESorderedHandNB: - public des::AbstractMain, public TypeHelper { - - struct OpFuncEagerAdd { - Graph& graph; - std::vector& sobjInfoVec; - AddList_ty& newEvents; - Accumulator_ty& niter; - Accumulator_ty& nevents; - - OpFuncEagerAdd ( - Graph& graph, - std::vector& sobjInfoVec, - AddList_ty& newEvents, - Accumulator_ty& niter, - Accumulator_ty& nevents) - : - graph (graph), - sobjInfoVec (sobjInfoVec), - newEvents (newEvents), - niter (niter), - nevents (nevents) - {} - - template - void operator () (const Event_ty& event, C& lwl) { - - niter += 1; - - SimObj_ty* recvObj = static_cast (event.getRecvObj ()); - SimObjInfo& recvInfo = sobjInfoVec[recvObj->getID ()]; - - graph.getData (recvInfo.node, Galois::MethodFlag::CHECK_CONFLICT); - - if (recvInfo.isReady (event) - && recvInfo.isMin (event)) { - nevents += 1; - newEvents.get ().clear (); - - GNode& recvNode = sobjInfoVec[recvObj->getID ()].node; - - recvObj->execEvent (event, graph, recvNode, newEvents.get ()); - - for (AddList_ty::local_iterator a = newEvents.get ().begin () - , enda = newEvents.get ().end (); a != enda; ++a) { - - SimObjInfo& sinfo = sobjInfoVec[a->getRecvObj ()->getID ()]; - - sinfo.recv (*a); - - // if (sinfo.getMin () == *a) { - // lwl.push (*a); - // } - lwl.push (sinfo.getMin ()); - - } - - - assert (recvInfo.isReady (event)); - - - recvInfo.remove (event); - if (recvInfo.hasReady ()) { - lwl.push (recvInfo.getMin ()); - } - - } - - } - - }; - - std::vector sobjInfoVec; - -protected: - virtual std::string getVersion () const { return "Handwritten Ordered ODG, no barrier"; } - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - sobjInfoVec.clear (); - sobjInfoVec.resize (graph.size ()); - - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - sobjInfoVec[so->getID ()] = SimObjInfo (*n, so); - } - } - - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - std::vector initWL; - - for (std::vector::const_iterator i = simInit.getInitEvents ().begin () - , endi = simInit.getInitEvents ().end (); i != endi; ++i) { - - SimObj_ty* recvObj = static_cast (i->getRecvObj ()); - SimObjInfo& sinfo = sobjInfoVec[recvObj->getID ()]; - sinfo.recv (*i); - - if (sinfo.isMin (*i)) { // for eager add - initWL.push_back (*i); - } - - // std::cout << "initial event: " << i->detailedString () << std::endl - // << "is_ready: " << sinfo.isReady (*i) << ", is_min: " << (sinfo.getMin () == *i) << std::endl; - } - - AddList_ty newEvents; - Accumulator_ty niter; - Accumulator_ty nevents; - size_t round = 0; - - while (true) { - ++round; - - typedef Galois::WorkList::dChunkedFIFO<16> WL_ty; - - Galois::for_each(initWL.begin (), initWL.end (), - OpFuncEagerAdd (graph, sobjInfoVec, newEvents, niter, nevents), - Galois::wl()); - - initWL.clear (); - - std::vector::iterator minPos = getGlobalMin (sobjInfoVec); - - if (minPos == sobjInfoVec.end ()) { - break; - - } else { - initWL.push_back (minPos->getMin ()); - } - - } - - std::cout << "Number of rounds = " << round << std::endl; - std::cout << "Number of iterations or attempts = " << - niter.reduce () << std::endl; - std::cout << "Number of events processed= " << - nevents.reduce () << std::endl; - } - -}; - - -} // namespace des_ord -#endif // DES_ORDERED_EXP_H diff --git a/maxflow/galois/apps/des/ordered/DESorderedHandNB.cpp b/maxflow/galois/apps/des/ordered/DESorderedHandNB.cpp deleted file mode 100644 index b740066..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedHandNB.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESorderedHand -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESorderedHand.h" - -int main (int argc, char* argv[]) { - - des_ord::DESorderedHandNB s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/ordered/DESorderedHandSet.cpp b/maxflow/galois/apps/des/ordered/DESorderedHandSet.cpp deleted file mode 100644 index 58d9284..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedHandSet.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESorderedHandSet -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESorderedHandSet.h" - -int main (int argc, char* argv[]) { - - des_ord::DESorderedHandSet s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/ordered/DESorderedHandSet.h b/maxflow/galois/apps/des/ordered/DESorderedHandSet.h deleted file mode 100644 index cf657c1..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedHandSet.h +++ /dev/null @@ -1,432 +0,0 @@ -/** DES ordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - - -#ifndef DES_ORDERED_HAND_SET_H -#define DES_ORDERED_HAND_SET_H - -#include "Galois/Accumulator.h" -#include "Galois/Timer.h" -#include "Galois/Atomic.h" - -#include "Galois/Runtime/PerThreadWorkList.h" -#include "Galois/Runtime/ll/PaddedLock.h" -#include "Galois/Runtime/ll/CompilerSpecific.h" - -#include -#include -#include - -#include - -#include "abstractMain.h" -#include "SimInit.h" -#include "TypeHelper.h" - - -namespace des_ord { - -typedef Galois::GAccumulator Accumulator_ty; - -typedef des::EventRecvTimeLocalTieBrkCmp Cmp_ty; - -typedef Galois::Runtime::PerThreadVector AddList_ty; - -typedef Galois::GAtomicPadded AtomicBool_ty; - -static const bool DEBUG = false; - -struct SimObjInfo: public TypeHelper { - - - struct MarkedEvent { - Event_ty event; - // mutable AtomicBool_ty flag; - mutable bool flag; - - explicit MarkedEvent (const Event_ty& _event) - : event (_event), flag (false) - {} - - bool isMarked () const { return flag; } - - bool mark () const { - // return flag.cas (false, true); - if (flag == false) { - flag = true; - return true; - - } else { - return false; - } - } - - void unmark () { - flag = false; - } - - operator const Event_ty& () const { return event; } - }; - - - typedef Galois::Runtime::LL::PaddedLock Lock_ty; - typedef des::AbstractMain::GNode GNode; - typedef std::set > PQ; - // typedef std::priority_queue, Cmp_ty::RevCmp> PQ; - - Lock_ty mutex; - PQ pendingEvents; - - GNode node; - size_t numInputs; - size_t numOutputs; - std::vector lastInputEvents; - - volatile mutable des::SimTime clock; - - SimObjInfo () {} - - SimObjInfo (GNode node, SimObj_ty* sobj): node (node) { - SimGate_ty* sg = static_cast (sobj); - assert (sg != NULL); - - numInputs = sg->getImpl ().getNumInputs (); - numOutputs = sg->getImpl ().getNumOutputs (); - - lastInputEvents.resize (numInputs, sobj->makeZeroEvent ()); - - clock = 0; - } - - void recv (const Event_ty& event) { - - SimGate_ty* dstGate = static_cast (event.getRecvObj ()); - assert (dstGate != NULL); - - const std::string& outNet = event.getAction ().getNetName (); - size_t dstIn = dstGate->getImpl ().getInputIndex (outNet); // get the input index of the net to which my output is connected - - assert (dstIn < lastInputEvents.size ()); - assert (Cmp_ty::compare (event, lastInputEvents[dstIn]) >= 0); // event >= last[dstIn] - lastInputEvents[dstIn] = event; - - mutex.lock (); - pendingEvents.insert (MarkedEvent(event)); - mutex.unlock (); - } - - bool isReady (const Event_ty& event) const { - // not ready if event has a timestamp greater than the latest event received - // on any input. - // an input with INFINITY_SIM_TIME is dead and will not receive more non-null events - // in the future - bool notReady = false; - if (event.getRecvTime () < clock) { - return true; - - } else { - - des::SimTime new_clk = 2 * des::INFINITY_SIM_TIME; - - for (std::vector::const_iterator e = lastInputEvents.begin () - , ende = lastInputEvents.end (); e != ende; ++e) { - - if ((e->getRecvTime () < des::INFINITY_SIM_TIME) && - (Cmp_ty::compare (event, *e) > 0)) { - notReady = true; - // break; - } - - if (e->getRecvTime () < des::INFINITY_SIM_TIME) { - new_clk = std::min (new_clk, e->getRecvTime ()); - } - } - - this->clock = new_clk; - } - - return !notReady; - } - - - bool hasPending () const { - mutex.lock (); - bool ret = !pendingEvents.empty (); - mutex.unlock (); - return ret; - } - - MarkedEvent getMin () const { - mutex.lock (); - MarkedEvent ret = *pendingEvents.begin (); - mutex.unlock (); - - return ret; - } - - bool isMin (const Event_ty& event) const { - mutex.lock (); - bool ret = false; - if (!pendingEvents.empty ()) { - ret = (event == *pendingEvents.begin ()); - } - mutex.unlock (); - - return ret; - } - - - Event_ty removeMin () { - mutex.lock (); - assert (!pendingEvents.empty ()); - Event_ty event = *pendingEvents.begin (); - pendingEvents.erase (pendingEvents.begin ()); - mutex.unlock (); - - return event; - } - - - - bool isSrc (const Event_ty& event) const { - return isReady(event) - && (event.getRecvTime() < des::INFINITY_SIM_TIME ? isMin (event) : true); - } - - bool canAddMin () const { - mutex.lock (); - bool ret = !pendingEvents.empty () - && !(pendingEvents.begin ()->isMarked ()) - && isReady (*pendingEvents.begin ()) - && pendingEvents.begin ()->mark (); - mutex.unlock (); - - return ret; - } - -}; - - -std::vector::iterator -getGlobalMin (std::vector& sobjInfoVec) { - - std::vector::iterator minPos = sobjInfoVec.end (); - - for (std::vector::iterator i = sobjInfoVec.begin () - , endi = sobjInfoVec.end (); i != endi; ++i) { - - if (i->hasPending ()) { - - if (minPos == endi) { - minPos = i; - - } else if (Cmp_ty::compare (i->getMin (), minPos->getMin ()) < 0) { - minPos = i; - } - } - } - - return minPos; - -} - -class DESorderedHandSet: - public des::AbstractMain, public TypeHelper { - - static const unsigned EPI = 32; - - struct OpFuncSet { - Graph& graph; - std::vector& sobjInfoVec; - AddList_ty& newEvents; - Accumulator_ty& nevents; - - OpFuncSet ( - Graph& graph, - std::vector& sobjInfoVec, - AddList_ty& newEvents, - Accumulator_ty& nevents) - : - graph (graph), - sobjInfoVec (sobjInfoVec), - newEvents (newEvents), - nevents (nevents) - {} - - template - void operator () (Event_ty event, C& lwl) { - - // std::cout << ">>> Processing: " << event.detailedString () << std::endl; - - unsigned epi = 0; - while (epi < EPI) { - ++epi; - - SimObj_ty* recvObj = static_cast (event.getRecvObj ()); - SimObjInfo& srcInfo = sobjInfoVec[recvObj->getID ()]; - - if (DEBUG && !srcInfo.isSrc (event)) { - abort (); - } - - nevents += 1; - newEvents.get ().clear (); - - recvObj->execEvent (event, graph, srcInfo.node, newEvents.get ()); - - for (AddList_ty::local_iterator a = newEvents.get ().begin () - , enda = newEvents.get ().end (); a != enda; ++a) { - - SimObjInfo& sinfo = sobjInfoVec[a->getRecvObj ()->getID ()]; - - sinfo.recv (*a); - - - if (sinfo.canAddMin ()) { - - assert (sinfo.getMin ().isMarked ()); - lwl.push (sinfo.getMin ()); - - // std::cout << "### Adding: " << static_cast (sinfo.getMin ()).detailedString () << std::endl; - } - - } - - - if (DEBUG && !srcInfo.isSrc (event)) { abort (); } - srcInfo.removeMin (); - - if (srcInfo.canAddMin ()) { - - assert (srcInfo.isSrc (srcInfo.getMin ())); - assert (srcInfo.getMin ().isMarked ()); - - event = srcInfo.getMin (); - assert (srcInfo.isSrc (event)); - - if (epi == EPI) { lwl.push (event); } - // lwl.push (srcInfo.getMin ()); - // std::cout << "%%% Adding: " << static_cast (srcInfo.getMin ()).detailedString () << std::endl; - } else { - break; - } - - } // end while - - SimObjInfo& srcInfo = sobjInfoVec[event.getRecvObj ()->getID ()]; - if (srcInfo.canAddMin ()) { - assert (srcInfo.getMin ().isMarked ()); - lwl.push (srcInfo.getMin ()); - } - - - } - - }; - - std::vector sobjInfoVec; - -protected: - virtual std::string getVersion () const { return "Handwritten Ordered ODG, no barrier"; } - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - sobjInfoVec.clear (); - sobjInfoVec.resize (graph.size ()); - - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - sobjInfoVec[so->getID ()] = SimObjInfo (*n, so); - } - } - - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - std::vector initWL; - - for (std::vector::const_iterator i = simInit.getInitEvents ().begin () - , endi = simInit.getInitEvents ().end (); i != endi; ++i) { - - SimObj_ty* recvObj = static_cast (i->getRecvObj ()); - SimObjInfo& sinfo = sobjInfoVec[recvObj->getID ()]; - sinfo.recv (*i); - - } - - - for (std::vector::const_iterator i = simInit.getInitEvents ().begin (), endi = - simInit.getInitEvents ().end (); i != endi; ++i) { - - BaseSimObj_ty* recvObj = i->getRecvObj (); - SimObjInfo& sinfo = sobjInfoVec[recvObj->getID ()]; - - if (sinfo.canAddMin ()) { - - initWL.push_back (sinfo.getMin ()); - // std::cout << "Initial source found: " << Event_ty (sinfo.getMin ()).detailedString () << std::endl; - } - - } - - std::cout << "Number of initial sources: " << initWL.size () << std::endl; - - AddList_ty newEvents; - Accumulator_ty nevents; - size_t round = 0; - - while (true) { - ++round; - - typedef Galois::WorkList::dChunkedFIFO WL_ty; - - Galois::for_each(initWL.begin (), initWL.end (), - OpFuncSet (graph, sobjInfoVec, newEvents, nevents), - Galois::wl()); - - initWL.clear (); - - std::vector::iterator p = getGlobalMin (sobjInfoVec); - - if (p == sobjInfoVec.end ()) { - break; - - } else { - initWL.push_back (p->getMin ()); - } - - } - - std::cout << "Number of rounds = " << round << std::endl; - std::cout << "Number of events processed= " << - nevents.reduce () << std::endl; - } - -}; - - -} // namespace des_ord -#endif // DES_ORDERED_HAND_SET_H diff --git a/maxflow/galois/apps/des/ordered/DESorderedSerial.cpp b/maxflow/galois/apps/des/ordered/DESorderedSerial.cpp deleted file mode 100644 index 9f774df..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedSerial.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESorderedSerial -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESorderedSerial.h" - -int main (int argc, char* argv[]) { - - des_ord::DESorderedSerial s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/ordered/DESorderedSerial.h b/maxflow/galois/apps/des/ordered/DESorderedSerial.h deleted file mode 100644 index ee6e73b..0000000 --- a/maxflow/galois/apps/des/ordered/DESorderedSerial.h +++ /dev/null @@ -1,129 +0,0 @@ -/** DES serial ordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - - -#ifndef DES_ORDERED_SERIAL_H -#define DES_ORDERED_SERIAL_H - -#include -#include -#include -#include - -#include - -#include "Galois/Runtime/ll/CompilerSpecific.h" - -#include "abstractMain.h" -#include "SimInit.h" -#include "TypeHelper.h" - -namespace des_ord { - -class DESorderedSerial: - public des::AbstractMain, public TypeHelper { - - typedef std::priority_queue, des::EventRecvTimeLocalTieBrkCmp::RevCmp> MinHeap; - typedef std::set > OrdSet; - - std::vector nodes; - -protected: - - - virtual std::string getVersion () const { return "Ordered serial"; } - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - nodes.clear (); - nodes.resize (graph.size ()); - - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - BaseSimObj_ty* so = graph.getData (*n, Galois::MethodFlag::NONE); - nodes[so->getID ()] = *n; - } - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE static Event_ty removeMin (MinHeap& pq) { - Event_ty ret = pq.top (); - pq.pop (); - return ret; - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE static Event_ty removeMin (OrdSet& pq) { - Event_ty ret = *pq.begin (); - pq.erase (pq.begin ()); - return ret; - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE static void add (MinHeap& pq, const Event_ty& event) { - pq.push (event); - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE static void add (OrdSet& pq, const Event_ty& event) { - pq.insert (event); - } - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - - // MinHeap pq; - OrdSet pq; - - for (std::vector::const_iterator i = simInit.getInitEvents ().begin () - , endi = simInit.getInitEvents ().end (); i != endi; ++i) { - add (pq, *i); - } - - std::vector newEvents; - - size_t numEvents = 0;; - while (!pq.empty ()) { - ++numEvents; - - newEvents.clear (); - - Event_ty event = removeMin (pq); - - SimObj_ty* recvObj = static_cast (event.getRecvObj ()); - GNode recvNode = nodes[recvObj->getID ()]; - - recvObj->execEvent (event, graph, recvNode, newEvents); - - for (std::vector::const_iterator a = newEvents.begin () - , enda = newEvents.end (); a != enda; ++a) { - - add (pq, *a); - } - } - - std::cout << "Number of events processed = " << numEvents << std::endl; - - } - -}; - -} // namespace des_ord -#endif // DES_ORDERED_SERIAL_H diff --git a/maxflow/galois/apps/des/ordered/SimObject.h b/maxflow/galois/apps/des/ordered/SimObject.h deleted file mode 100644 index 8c712b2..0000000 --- a/maxflow/galois/apps/des/ordered/SimObject.h +++ /dev/null @@ -1,81 +0,0 @@ -/** SimObject for Ordered algorithm -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef ORDERED_SIM_OBJECT_H -#define ORDERED_SIM_OBJECT_H - -#include -#include -#include - -#include "comDefs.h" -#include "BaseSimObject.h" -#include "Event.h" - -namespace des_ord { - -template -class SimObject: public des::BaseSimObject { - - typedef des::BaseSimObject Base; - typedef Event_tp Event_ty; - - template - struct SendAddList: public Base::SendWrapper { - Cont& container; - - SendAddList (Cont& _container): Base::SendWrapper (), container (_container) {} - - virtual void send (Base* dstObj, const Event_ty& event) { - container.push_back (event); - } - }; - - -public: - - SimObject (size_t id, unsigned numOutputs, unsigned numInputs) - : Base (id) - {} - - template - void execEvent ( - const Event_ty& event, - G& graph, - typename G::GraphNode& mynode, - C& newEvents) { - - assert (event.getRecvObj () == this); - - typename Base::template OutDegIterator beg = this->make_begin (graph, mynode); - typename Base::template OutDegIterator end = this->make_end (graph, mynode); - - SendAddList addListWrap (newEvents); - this->execEventIntern (event, addListWrap, beg, end); - } - -}; - -} // end namespace des_ord - -#endif // ORDERED_SIM_OBJECT_H diff --git a/maxflow/galois/apps/des/ordered/TypeHelper.h b/maxflow/galois/apps/des/ordered/TypeHelper.h deleted file mode 100644 index 61f7939..0000000 --- a/maxflow/galois/apps/des/ordered/TypeHelper.h +++ /dev/null @@ -1,47 +0,0 @@ -/** DES ordered type helper -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef DES_ORD_TYPE_HELPER_H -#define DES_ORD_TYPE_HELPER_H - -#include "abstractMain.h" -#include "SimInit.h" - - -namespace des_ord { - -struct TypeHelper { - typedef des::Event Event_ty; - typedef Event_ty::BaseSimObj_ty BaseSimObj_ty; - typedef des_ord::SimObject SimObj_ty; - - typedef des::SimGate SimGate_ty; - typedef des::Input Input_ty; - typedef des::Output Output_ty; - - typedef des::SimInit SimInit_ty; - -}; - -} -#endif // DES_ORD_TYPE_HELPER_H diff --git a/maxflow/galois/apps/des/unordered/DESunordered.cpp b/maxflow/galois/apps/des/unordered/DESunordered.cpp deleted file mode 100644 index b0ef11b..0000000 --- a/maxflow/galois/apps/des/unordered/DESunordered.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESunordered -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESunordered.h" - -int main (int argc, char* argv[]) { - des_unord::DESunordered m; - m.run (argc, argv); - - return 0; -} diff --git a/maxflow/galois/apps/des/unordered/DESunordered.h b/maxflow/galois/apps/des/unordered/DESunordered.h deleted file mode 100644 index 884bbc4..0000000 --- a/maxflow/galois/apps/des/unordered/DESunordered.h +++ /dev/null @@ -1,219 +0,0 @@ -/** DES unordered Galois version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#ifndef _DES_UNORDERED_H_ -#define _DES_UNORDERED_H_ - -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Atomic.h" -#include "Galois/WorkList/WorkList.h" -#include "Galois/Runtime/ll/gio.h" - -#include "DESunorderedBase.h" - - - -namespace des_unord { -static const bool DEBUG = false; - - -class DESunordered: public DESunorderedBase { - typedef Galois::GAccumulator Accumulator; - typedef Galois::GReduceMax ReduceMax; - typedef Galois::GAtomicPadded AtomicBool; - typedef std::vector VecAtomicBool; - - - - /** - * contains the loop body, called - * by @see for_each - */ - struct Process { - Graph& graph; - VecAtomicBool& onWLflags; - Accumulator& numEvents; - Accumulator& numIter; - ReduceMax& maxPending; - - - Process ( - Graph& graph, - VecAtomicBool& onWLflags, - Accumulator& numEvents, - Accumulator& numIter, - ReduceMax& maxPending) - : - graph (graph), - onWLflags (onWLflags), - numEvents (numEvents), - numIter (numIter), - maxPending (maxPending) - {} - - - void lockNeighborhood (GNode& activeNode) { - // acquire locks on neighborhood: one shot - graph.getData (activeNode, Galois::MethodFlag::CHECK_CONFLICT); - - // for (Graph::edge_iterator i = graph.edge_begin (activeNode, Galois::CHECK_CONFLICT) - // , ei = graph.edge_end (activeNode, Galois::CHECK_CONFLICT); i != ei; ++i) { - // GNode dst = graph.getEdgeDst (i); - // graph.getData (dst, Galois::CHECK_CONFLICT); - // } - - } - - - /** - * - * Called by @see Galois::Runtime::for_each during - * every iteration - * - * @param activeNode: the current active element - * @param lwl: the worklist type - */ - - template - void operator () (GNode& activeNode, WL& lwl) { - - lockNeighborhood (activeNode); - - SimObj_ty* srcObj = static_cast (graph.getData (activeNode, Galois::MethodFlag::NONE)); - // should be past the fail-safe point by now - - if (DEBUG) { - Galois::Runtime::LL::gDebug("processing : ", srcObj->str ().c_str ()); - } - - maxPending.update (srcObj->numPendingEvents ()); - - size_t proc = srcObj->simulate(graph, activeNode); // number of events processed - numEvents += proc; - - for (Graph::edge_iterator i = graph.edge_begin (activeNode, Galois::MethodFlag::NONE) - , ei = graph.edge_end (activeNode, Galois::MethodFlag::NONE); i != ei; ++i) { - - const GNode dst = graph.getEdgeDst(i); - SimObj_ty* dstObj = static_cast (graph.getData (dst, Galois::MethodFlag::NONE)); - - if (dstObj->isActive () - && !bool (onWLflags [dstObj->getID ()]) - && onWLflags[dstObj->getID ()].cas (false, true)) { - if (DEBUG) { - Galois::Runtime::LL::gDebug ("Added %d neighbor: ", - bool (onWLflags[dstObj->getID ()]), dstObj->str ().c_str ()); - } - - lwl.push (dst); - - } - - - } - - - if (srcObj->isActive()) { - lwl.push (activeNode); - - if (DEBUG) { - Galois::Runtime::LL::gDebug ("Added %d self: " - , bool (onWLflags[srcObj->getID ()]), srcObj->str ().c_str ()); - } - - } else { - onWLflags[srcObj->getID ()] = false; - - if (DEBUG) { - Galois::Runtime::LL::gDebug ("not adding %d self: ", - bool (onWLflags[srcObj->getID ()]), srcObj->str ().c_str ()); - } - } - - - numIter += 1; - - - } - }; - - /** - * Run loop. - * - * Galois worklists, currently, do not support set semantics, therefore, duplicates can be present on the workset. - * To ensure uniqueness of items on the worklist, we keep a list of boolean flags for each node, - * which indicate whether the node is on the worklist. When adding a node to the worklist, the - * flag corresponding to a node is set to True if it was previously False. The flag reset to False - * when the node is removed from the worklist. This list of flags provides a cheap way of - * implementing set semantics. - * - */ - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - std::vector initialActive; - VecAtomicBool onWLflags; - - initWorkList (graph, initialActive, onWLflags); - - - Accumulator numEvents; - Accumulator numIter; - ReduceMax maxPending; - - - - Process p(graph, onWLflags, numEvents, numIter, maxPending); - - typedef Galois::WorkList::dChunkedFIFO WL_ty; - // typedef Galois::Runtime::WorkList::GFIFO WL_ty; - - Galois::for_each(initialActive.begin (), initialActive.end (), p, Galois::wl()); - - std::cout << "Number of events processed = " << numEvents.reduce () << std::endl; - std::cout << "Number of iterations performed = " << numIter.reduce () << std::endl; - std::cout << "Maximum size of pending events = " << maxPending.reduce() << std::endl; - } - - void checkPostState (Graph& graph, VecAtomicBool& onWLflags) { - for (Graph::iterator n = graph.begin (), - endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - if (so->isActive ()) { - std::cout << "ERROR: Found Active: " << so->str () << std::endl - << "onWLflags = " << onWLflags[so->getID ()] << ", numPendingEvents = " << so->numPendingEvents () - << std::endl; - } - } - - } - - virtual std::string getVersion () const { return "Unordered (Chandy-Misra) parallel"; } -}; -} // end namespace des_unord - - -#endif // _DES_UNORDERED_H_ - diff --git a/maxflow/galois/apps/des/unordered/DESunorderedBase.h b/maxflow/galois/apps/des/unordered/DESunorderedBase.h deleted file mode 100644 index 85e0232..0000000 --- a/maxflow/galois/apps/des/unordered/DESunorderedBase.h +++ /dev/null @@ -1,99 +0,0 @@ -/** DES unordered, common typedefs -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#include "SimInit.h" -#include "abstractMain.h" -#include "SimObject.h" - - -namespace des_unord { - -namespace cll = llvm::cl; -static cll::opt eventsPerIter("epi", - cll::desc ("number of events processed per iteration (max.)"), cll::init (0)); - -struct TypeHelper { - typedef des::Event Event_ty; - typedef Event_ty::BaseSimObj_ty BaseSimObj_ty; - typedef des_unord::SimObject SimObj_ty; - - typedef des::SimGate SimGate_ty; - typedef des::Input Input_ty; - typedef des::Output Output_ty; - - typedef des::SimInit SimInit_ty; - -}; - -class DESunorderedBase: - public des::AbstractMain - , public des_unord::TypeHelper { - -protected: - - virtual void initRemaining (const SimInit_ty& simInit, Graph& graph) { - - SimObj_ty::NEVENTS_PER_ITER = eventsPerIter; - if (SimObj_ty::NEVENTS_PER_ITER == 0) { - SimObj_ty::NEVENTS_PER_ITER = DEFAULT_EPI; - } - - - // post the initial events on their stations - for (std::vector::const_iterator i = simInit.getInitEvents ().begin () - , endi = simInit.getInitEvents ().end (); i != endi; ++i) { - - SimObj_ty* so = static_cast (i->getRecvObj ()); - so->recv (*i); - } - - } - - template - void initWorkList (Graph& graph, WL& workList, std::vector& onWLflags) { - onWLflags.clear (); - workList.clear (); - - onWLflags.resize (graph.size (), B (false)); - - // set onWLflags for input objects - for (Graph::iterator n = graph.begin () - , endn = graph.end (); n != endn; ++n) { - - SimObj_ty* so = static_cast (graph.getData (*n, Galois::MethodFlag::NONE)); - - if (so->isActive ()) { - workList.push_back (*n); - onWLflags[so->getID ()] = true; - } - - } - - std::cout << "Initial workList size = " << workList.size () << std::endl; - } - - - -}; - -} // end namespace des_unord diff --git a/maxflow/galois/apps/des/unordered/DESunorderedSerial.cpp b/maxflow/galois/apps/des/unordered/DESunorderedSerial.cpp deleted file mode 100644 index 162abd6..0000000 --- a/maxflow/galois/apps/des/unordered/DESunorderedSerial.cpp +++ /dev/null @@ -1,32 +0,0 @@ -/** main function for DESunorderedSerial -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#include "DESunorderedSerial.h" - -int main (int argc, char* argv[]) { - - des_unord::DESunorderedSerial s; - s.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/des/unordered/DESunorderedSerial.h b/maxflow/galois/apps/des/unordered/DESunorderedSerial.h deleted file mode 100644 index 7780412..0000000 --- a/maxflow/galois/apps/des/unordered/DESunorderedSerial.h +++ /dev/null @@ -1,115 +0,0 @@ -/** DES serial unordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - - -#ifndef _DES_UNORDERED_SERIAL_H_ -#define _DES_UNORDERED_SERIAL_H_ - -#include -#include - -#include - -#include "DESunorderedBase.h" - -namespace des_unord { - - -class DESunorderedSerial: public des_unord::DESunorderedBase { - - virtual std::string getVersion () const { return "Unordered (Chandy-Misra) serial"; } - - /** - * Run loop. - * Does not use Galois::Runtime or Galois worklists - * - * To ensure uniqueness of items on the workList, we keep a list of boolean flags for each node, - * which indicate whether the node is on the workList. When adding a node to the workList, the - * flag corresponding to a node is set to True if it was previously False. The flag reset to False - * when the node is removed from the workList. This list of flags provides a cheap way of - * implementing set semantics. - * - */ - - virtual void runLoop (const SimInit_ty& simInit, Graph& graph) { - - std::deque workList; - std::vector onWLflags; - - initWorkList (graph, workList, onWLflags); - - size_t maxPending = 0; - size_t numEvents = 0; - size_t numIter = 0; - - while (!workList.empty ()) { - - GNode activeNode = workList.front (); - workList.pop_front (); - - SimObj_ty* srcObj = static_cast (graph.getData (activeNode, Galois::MethodFlag::NONE)); - - maxPending = std::max (maxPending, srcObj->numPendingEvents ()); - - numEvents += srcObj->simulate(graph, activeNode); - - - for (Graph::edge_iterator i = graph.edge_begin (activeNode, Galois::MethodFlag::NONE) - , ei = graph.edge_end (activeNode, Galois::MethodFlag::NONE); i != ei; ++i) { - - GNode dst = graph.getEdgeDst(i); - SimObj_ty* dstObj = static_cast (graph.getData (dst, Galois::MethodFlag::NONE)); - - if (dstObj->isActive ()) { - if (!onWLflags[dstObj->getID ()]) { - // set the flag to indicate presence on the workList - onWLflags[dstObj->getID ()] = true; - workList.push_back (dst); - } - } - } - - if (srcObj->isActive()) { - workList.push_back (activeNode); - - } else { - // reset the flag to indicate absence on the workList - onWLflags[srcObj->getID ()] = false; - } - - ++numIter; - - } - - - std::cout << "Simulation ended" << std::endl; - std::cout << "Number of events processed = " << numEvents << " Iterations = " << numIter << std::endl; - std::cout << "Max size of pending events = " << maxPending << std::endl; - - } - -}; - -} // end namspace des_unord -#endif // _DES_UNORDERED_SERIAL_H_ diff --git a/maxflow/galois/apps/des/unordered/SimObject.h b/maxflow/galois/apps/des/unordered/SimObject.h deleted file mode 100644 index 1e5c105..0000000 --- a/maxflow/galois/apps/des/unordered/SimObject.h +++ /dev/null @@ -1,285 +0,0 @@ -/** SimObject: the abstract interface to be implemented by any simulation object -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - - -#ifndef SIMOBJECT_H_ -#define SIMOBJECT_H_ - -#include -#include - -#include - - -#include "comDefs.h" - -#include "BaseSimObject.h" -#include "Event.h" - -#include "Galois/PriorityQueue.h" -#include "Galois/Runtime/ll/gio.h" - -//TODO: modeling one output for now. Need to extend for multiple outputs -/** - * @section Description - * - * The Class SimObject represents an abstract simulation object (processing station). A simulation application - * would inherit from this class. - */ - -namespace des_unord { - -template -class SimObject: public des::BaseSimObject { - - typedef typename des::BaseSimObject Base; - typedef Event_tp Event_ty; - - -protected: - - struct SendWrapperImpl: public Base::SendWrapper { - virtual void send (Base* rs, const Event_ty& e) { - SimObject* recvObj = static_cast (rs); - recvObj->recv (e); - } - }; - - typedef des::EventRecvTimeLocalTieBrkCmp Cmp; - typedef typename Galois::ThreadSafeOrderedSet PQ; - // typedef typename Galois::ThreadSafeMinHeap PQ; - - - static const bool DEBUG = false; - - unsigned numOutputs; - unsigned numInputs; - - std::vector inputTimes; - PQ pendingEvents; - -public: - static size_t NEVENTS_PER_ITER; - - SimObject (size_t id, unsigned numOutputs, unsigned numInputs) - : - Base (id), - numOutputs (numOutputs), - numInputs (numInputs) - { - assert (numOutputs == 1); - inputTimes.resize (numInputs, 0); - } - - - virtual ~SimObject () {} - - - void recv (const Event_ty& e) { - size_t inIdx = this->getInputIndex (e); - assert (inIdx < numInputs); - - // GALOIS_DEBUG ("%s, Received : %s\n", this->str ().c_str (), e.str ().c_str ()); - - if (inputTimes[inIdx] > e.getRecvTime () - && e.getRecvTime () < des::INFINITY_SIM_TIME ) { - - Galois::Runtime::LL::gDebug ("Non-FIFO order on input[",inIdx,"], last msg time=",inputTimes[inIdx],", current message =", e.str ().c_str ()); - - assert (inputTimes[inIdx] <= e.getRecvTime ()); - - } - - - // assert (inputTimes[inIdx] <= e.getRecvTime ()); - inputTimes[inIdx] = e.getRecvTime (); - - pendingEvents.push (e); - } - - /** - * Simulate. - * - * @param graph: the graph composed of simulation objects/stations and communication links - * @param myNode the node in the graph that has this SimObject as its node data - * @return number of events that were processed during the call - */ - template - size_t simulate(G& graph, typename G::GraphNode& myNode) { - assert (isActive ()); - // if (!isActive ()) { return 0; } - - - size_t nevents = 0; - - if (isActive ()) { - - des::SimTime clock = this->getClock (); - while ((!pendingEvents.empty()) - && (pendingEvents.top ().getRecvTime () <= clock) - && (nevents < NEVENTS_PER_ITER)) { - - Event_ty event = pendingEvents.pop (); - - // GALOIS_DEBUG ("%s, Processing: %s\n", this->str ().c_str (), event.str ().c_str ()); - - - //DEBUG - if (DEBUG && !pendingEvents.empty ()) { - Event_ty curr = event; - Event_ty next = pendingEvents.top (); - - if (curr.getRecvTime () > next.getRecvTime ()) { - std::cerr << "ERROR: curr > next" << std::endl; - std::cerr << "curr = " << curr.str () << std::endl << "next = " << next.str () << std::endl; - } - } - - - assert (graph.getData(myNode, Galois::MethodFlag::NONE) == this); // should already own a lock - assert (event.getRecvObj () == this); - - typename Base::template OutDegIterator beg = Base::make_begin (graph, myNode); - typename Base::template OutDegIterator end = Base::make_end (graph, myNode); - - SendWrapperImpl sendWrap; - - this->execEventIntern(event, sendWrap, beg, end); - - ++nevents; - } - } - - return nevents; - } - - - - /** - * Checks if is active. - * i.e. can process some of its pending events - * - * - * @return true, if is active - */ - bool isActive() const { - // not active if pendingEvents is empty - // not active if earliest pending event has a time stamp less than - // the latest time on an input i.e. possibly waiting for an earlier - // event on some input - bool notActive = true; - - if (!pendingEvents.empty ()) { - notActive = false; - - const des::SimTime& min_time = pendingEvents.top ().getRecvTime (); - - for (std::vector::const_iterator t = inputTimes.begin () - , endt = inputTimes.end (); t != endt; ++t) { - - if ((*t < des::INFINITY_SIM_TIME) && (*t < min_time)) { - // not active if waiting for an earlier message on an input - // input considered dead if last message on the input had a time stamp - // of INFINITY_SIM_TIME or greater - notActive = true; - break; - } - } - - } - - return !notActive; - } - - size_t numPendingEvents () const { - return pendingEvents.size (); - } - - /** - * string representation for printing - */ - virtual std::string str() const { - - std::ostringstream ss; - ss << Base::str (); - - for (size_t i = 0; i < numInputs; ++i) { - ss << ", inputTimes[" << i << "] = " << inputTimes[i]; - } - - if (DEBUG) { - for (size_t i = 0; i < numInputs; ++i) { - ss << ", inputTimes[" << i << "] = " << inputTimes[i]; - } - ss << std::endl; - - ss << ", active = " << isActive () << ", pendingEvents.size() = " << pendingEvents.size () - << ", pendingEvent.top () = " << pendingEvents.top ().str () << std::endl; - - - } - - return ss.str (); - } - - -protected: - /** - * @return the min of the time stamps of the latest message recieved on each - * input - * An input becomes dead when a message with time INFINITY_SIM_TIME is received - * on it, - * such dead inputs are not included in clock computation - */ - des::SimTime getClock () const { - assert (inputTimes.size () == numInputs); - - des::SimTime min_t = 2 * des::INFINITY_SIM_TIME; // to ensure a value of INFINITY_SIM_TIME + any small delay - - for (std::vector::const_iterator i = inputTimes.begin () - , endi = inputTimes.end (); i != endi; ++i) { - - if (*i < des::INFINITY_SIM_TIME) { // - min_t = std::min (*i, min_t); - } - } - - return min_t; - - // std::vector::const_iterator min_pos = std::min_element (inputTimes.begin (), inputTimes.end ()); - // return *min_pos; - } - - - - -}; // end class - - -} // end namespace des_unord - -template -size_t des_unord::SimObject::NEVENTS_PER_ITER = 1; - - -#endif /* SIMOBJECT_H_ */ diff --git a/maxflow/galois/apps/gmetis/CMakeLists.txt b/maxflow/galois/apps/gmetis/CMakeLists.txt deleted file mode 100644 index 8e7f049..0000000 --- a/maxflow/galois/apps/gmetis/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(gmetis) diff --git a/maxflow/galois/apps/gmetis/Coarsening.cpp b/maxflow/galois/apps/gmetis/Coarsening.cpp deleted file mode 100644 index 37734e3..0000000 --- a/maxflow/galois/apps/gmetis/Coarsening.cpp +++ /dev/null @@ -1,443 +0,0 @@ -/** GMetis -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Nikunj Yadav - * @author Andrew Lenharth - */ - - -#include "Metis.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Runtime/PerThreadStorage.h" - -#include - -namespace { - -void assertAllMatched(GNode node, GGraph* graph) { - for (auto jj = graph->edge_begin(node), eejj = graph->edge_end(node); - jj != eejj; ++jj) - assert(node == graph->getEdgeDst(jj) || graph->getData(graph->getEdgeDst(jj)).isMatched()); -} - -void assertNoMatched(GGraph* graph) { - for (auto nn = graph->begin(), en = graph->end(); nn != en; ++nn) - assert(!graph->getData(*nn).isMatched()); -} - -struct HEMmatch { - std::pair operator()(GNode node, GGraph* graph, bool tag) { - GNode retval = node; // match self if nothing else - int maxwgt = std::numeric_limits::min(); - // nume += std::distance(graph->edge_begin(node), graph->edge_end(node)); - for (auto jj = graph->edge_begin(node, Galois::MethodFlag::NONE), eejj = graph->edge_end(node); - jj != eejj; ++jj) { - // ++checked; - GNode neighbor = graph->getEdgeDst(jj); - MetisNode& neighMNode = graph->getData(neighbor, Galois::MethodFlag::NONE); - int edgeData = graph->getEdgeData(jj, Galois::MethodFlag::NONE); - if (!neighMNode.isMatched() && neighbor != node && maxwgt < edgeData) { - maxwgt = edgeData; - retval = neighbor; - } - } - return std::make_pair(retval, maxwgt);; - } - GNode operator()(GNode node, GGraph* graph) { - return operator()(node, graph, true).first; - } -}; - - -struct RMmatch { - GNode operator()(GNode node, GGraph* graph) { - for (auto jj = graph->edge_begin(node, Galois::MethodFlag::NONE), eejj = graph->edge_end(node); - jj != eejj; ++jj) { - GNode neighbor = graph->getEdgeDst(jj); - if (!graph->getData(neighbor, Galois::MethodFlag::NONE).isMatched() && neighbor != node) - return neighbor; - } - return node; - //Don't actually do random, just choose first - } - std::pair operator()(GNode node, GGraph* graph, bool tag) { - return std::make_pair(operator()(node, graph), 0); - } -}; - -template -struct TwoHopMatcher { - MatchingPolicy matcher; - GNode operator()(GNode node, GGraph* graph) { - std::pair retval(node, std::numeric_limits::min()); - for (auto jj = graph->edge_begin(node, Galois::MethodFlag::NONE), eejj = graph->edge_end(node); - jj != eejj; ++jj) { - GNode neighbor = graph->getEdgeDst(jj); - std::pair tval = matcher(neighbor, graph, true); - if (tval.first != node && tval.first != neighbor && tval.second > retval.second) - retval = tval; - } - return retval.first; - } -}; - -/* - *This operator is responsible for matching. - 1. There are two types of matching. Random and Heavy Edge matching - 2. Random matching picks any random node above a threshold and matches the nodes. RM.h - 3. Heavy Edge Matching matches the vertex which is connected by the heaviest edge. HEM.h - 4. This operator can also create the multinode, i.e. the node which is created on combining two matched nodes. - 5. You can enable/disable 4th by changing variantMetis::mergeMatching -*/ - -typedef Galois::InsertBag NodeBag; -typedef Galois::GReducible > > Pcounter; - - -template -struct parallelMatchAndCreateNodes { - MatchingPolicy matcher; - GGraph *fineGGraph; - GGraph *coarseGGraph; - Pcounter& pc; - NodeBag& noEdgeBag; - bool selfMatch; - - parallelMatchAndCreateNodes(MetisGraph* Graph, Pcounter& pc, NodeBag& edBag, bool selfMatch) - : matcher(), - fineGGraph(Graph->getFinerGraph()->getGraph()), - coarseGGraph(Graph->getGraph()), - pc(pc), - noEdgeBag(edBag), - selfMatch(selfMatch) { - assert(fineGGraph != coarseGGraph); - } - - void operator()(GNode item, Galois::UserContext &lwl) { - if (fineGGraph->getData(item).isMatched()) - return; - if(fineGGraph->edge_begin(item, Galois::MethodFlag::NONE) == fineGGraph->edge_end(item, Galois::MethodFlag::NONE)){ - noEdgeBag.push(item); - return; - } - GNode ret; - do { - ret = matcher(item, fineGGraph); - //lock ret, since we found it lock-free it may be matched, so try again - } while (fineGGraph->getData(ret).isMatched()); - - //at this point both ret and item (and failed matches) are locked. - //We do not leave the above loop until we both have the lock on - //the node and check the matched status of the locked node. the - //lock before (final) read ensures that we will see any write to matched - - unsigned numEdges = std::distance(fineGGraph->edge_begin(item, Galois::MethodFlag::NONE), fineGGraph->edge_end(item, Galois::MethodFlag::NONE)); - //assert(numEdges == std::distance(fineGGraph->edge_begin(item), fineGGraph->edge_end(item))); - - GNode N; - if (ret != item) { - //match found - numEdges += std::distance(fineGGraph->edge_begin(ret, Galois::MethodFlag::NONE), fineGGraph->edge_end(ret, Galois::MethodFlag::NONE)); - //Cautious point - N = coarseGGraph->createNode(numEdges, - fineGGraph->getData(item).getWeight() + - fineGGraph->getData(ret).getWeight(), - item, ret); - fineGGraph->getData(item).setMatched(); - fineGGraph->getData(ret).setMatched(); - fineGGraph->getData(item).setParent(N); - fineGGraph->getData(ret).setParent(N); - } else { - //assertAllMatched(item, fineGGraph); - //Cautious point - //no match - if (selfMatch) { - pc.update(1); - N = coarseGGraph->createNode(numEdges, fineGGraph->getData(item).getWeight(), item); - fineGGraph->getData(item).setMatched(); - fineGGraph->getData(item).setParent(N); - } - } - } -}; - -/* - * This operator is responsible for doing a union find of the edges - * between matched nodes and populate the edges in the coarser graph - * node. - */ - -struct parallelPopulateEdges { - typedef int tt_does_not_need_push; - typedef int tt_needs_per_iter_alloc; - - GGraph *coarseGGraph; - GGraph *fineGGraph; - parallelPopulateEdges(MetisGraph *Graph) - :coarseGGraph(Graph->getGraph()), fineGGraph(Graph->getFinerGraph()->getGraph()) { - assert(fineGGraph != coarseGGraph); - } - - template - void goSort(GNode node, Context& lwl) { - // std::cout << 'p'; - //fineGGraph is read only in this loop, so skip locks - MetisNode &nodeData = coarseGGraph->getData(node, Galois::MethodFlag::NONE); - - typedef std::deque, Galois::PerIterAllocTy::rebind >::other> GD; - //copy and translate all edges - GD edges(GD::allocator_type(lwl.getPerIterAlloc())); - - for (unsigned x = 0; x < nodeData.numChildren(); ++x) - for (auto ii = fineGGraph->edge_begin(nodeData.getChild(x), Galois::MethodFlag::NONE), ee = fineGGraph->edge_end(nodeData.getChild(x)); ii != ee; ++ii) { - GNode dst = fineGGraph->getEdgeDst(ii); - GNode p = fineGGraph->getData(dst, Galois::MethodFlag::NONE).getParent(); - edges.emplace_back(p, fineGGraph->getEdgeData(ii, Galois::MethodFlag::NONE)); - } - - //slightly faster not ordering by edge weight - std::sort(edges.begin(), edges.end(), [] (const std::pair& lhs, const std::pair& rhs) { return lhs.first < rhs.first; } ); - - //insert edges - for (auto pp = edges.begin(), ep = edges.end(); pp != ep;) { - GNode dst = pp->first; - unsigned sum = pp->second; - ++pp; - if (node != dst) { //no self edges - while (pp != ep && pp->first == dst) { - sum += pp->second; - ++pp; - } - coarseGGraph->addEdgeWithoutCheck(node, dst, Galois::MethodFlag::NONE, sum); - } - } - // assert(e); - //nodeData.setNumEdges(e); - } - - template - void operator()(GNode node, Context& lwl) { - // MetisNode &nodeData = coarseGGraph->getData(node, Galois::MethodFlag::NONE); - // if (std::distance(fineGGraph->edge_begin(nodeData.getChild(0), Galois::MethodFlag::NONE), - // fineGGraph->edge_begin(nodeData.getChild(0), Galois::MethodFlag::NONE)) - // < 256) - // goSort(node,lwl); - // else - // goHM(node,lwl); - goSort(node, lwl); - //goHeap(node,lwl); - } -}; - -struct HighDegreeIndexer: public std::unary_function { - static GGraph* indexgraph; - unsigned int operator()(const GNode& val) const { - return indexgraph->getData(val, Galois::MethodFlag::NONE).isFailedMatch() ? - std::numeric_limits::max() : - (std::numeric_limits::max() - - ((std::distance(indexgraph->edge_begin(val, Galois::MethodFlag::NONE), - indexgraph->edge_end(val, Galois::MethodFlag::NONE))) >> 2)); - } -}; - -GGraph* HighDegreeIndexer::indexgraph = 0; - -struct LowDegreeIndexer: public std::unary_function { - unsigned int operator()(const GNode& val) const { - unsigned x =std::distance(HighDegreeIndexer::indexgraph->edge_begin(val, Galois::MethodFlag::NONE), - HighDegreeIndexer::indexgraph->edge_end(val, Galois::MethodFlag::NONE)); - return x; // >> 2; - // int targetlevel = 0; - // while (x >>= 1) ++targetlevel; - // return targetlevel; - - } -}; - -struct WeightIndexer: public std::unary_function { - int operator()(const GNode& val) const { - return HighDegreeIndexer::indexgraph->getData(val, Galois::MethodFlag::NONE).getWeight(); - } -}; - -unsigned minRuns(unsigned coarsenTo, unsigned size) { - unsigned num = 0; - while (coarsenTo < size) { - ++num; - size /= 2; - } - return num; -} - -unsigned fixupLoners(NodeBag& b, GGraph* coarseGGraph, GGraph* fineGGraph) { - unsigned count = 0; - auto ii = b.begin(), ee = b.end(); - while (ii != ee) { - auto i2 = ii; - ++i2; - if (i2 != ee) { - GNode N = coarseGGraph->createNode(0, - fineGGraph->getData(*ii).getWeight() + - fineGGraph->getData(*i2).getWeight(), - *ii, *i2); - fineGGraph->getData(*ii).setMatched(); - fineGGraph->getData(*i2).setMatched(); - fineGGraph->getData(*ii).setParent(N); - fineGGraph->getData(*i2).setParent(N); - ++ii; - ++count; - } else { - GNode N = coarseGGraph->createNode(0, - fineGGraph->getData(*ii).getWeight(), - *ii); - fineGGraph->getData(*ii).setMatched(); - fineGGraph->getData(*ii).setParent(N); - } - ++ii; - } - return count; -} - -unsigned findMatching(MetisGraph* coarseMetisGraph, bool useRM, bool use2Hop, bool verbose) { - MetisGraph* fineMetisGraph = coarseMetisGraph->getFinerGraph(); - - /* - * Different worklist versions tried, dChunkedFIFO 256 works best with LC_MORPH_graph. - * Another good type would be Lazy Iter. - */ - //typedef Galois::WorkList::ChunkedLIFO<64, GNode> WL; - //typedef Galois::WorkList::LazyIterlocal_begin()),false> WL; - - NodeBag bagOfLoners; - Pcounter pc; - - bool useOBIM = true; - - typedef decltype(fineMetisGraph->getGraph()->local_begin()) ITY; - typedef Galois::WorkList::StableIterator WL; - //typedef Galois::WorkList::Random<> WL; - if(useRM) { - parallelMatchAndCreateNodes pRM(coarseMetisGraph, pc, bagOfLoners, !use2Hop); - Galois::for_each_local(*fineMetisGraph->getGraph(), pRM, Galois::loopname("match"), Galois::wl()); - } else { - //FIXME: use obim for SHEM matching - typedef Galois::WorkList::dChunkedLIFO<16> Chunk; - typedef Galois::WorkList::OrderedByIntegerMetric pW; - typedef Galois::WorkList::OrderedByIntegerMetric pLD; - typedef Galois::WorkList::OrderedByIntegerMetric pHD; - - HighDegreeIndexer::indexgraph = fineMetisGraph->getGraph(); - parallelMatchAndCreateNodes pHEM(coarseMetisGraph, pc, bagOfLoners, !use2Hop); - if (useOBIM) - Galois::for_each_local(*fineMetisGraph->getGraph(), pHEM, Galois::loopname("match"), Galois::wl()); - else - Galois::for_each_local(*fineMetisGraph->getGraph(), pHEM, Galois::loopname("match"), Galois::wl()); - } - unsigned c = fixupLoners(bagOfLoners, coarseMetisGraph->getGraph(), fineMetisGraph->getGraph()); - if (verbose && c) - std::cout << "\n\tLone Matches " << c; - if (use2Hop) { - typedef Galois::WorkList::dChunkedLIFO<16> Chunk; - typedef Galois::WorkList::OrderedByIntegerMetric pW; - typedef Galois::WorkList::OrderedByIntegerMetric pLD; - typedef Galois::WorkList::OrderedByIntegerMetric pHD; - - HighDegreeIndexer::indexgraph = fineMetisGraph->getGraph(); - Pcounter pc2; - parallelMatchAndCreateNodes > p2HEM(coarseMetisGraph, pc2, bagOfLoners, true); - if (useOBIM) - Galois::for_each_local(*fineMetisGraph->getGraph(), p2HEM, Galois::loopname("match"), Galois::wl()); - else - Galois::for_each_local(*fineMetisGraph->getGraph(), p2HEM, Galois::loopname("match"), Galois::wl()); - return pc2.reduce(); - } - return pc.reduce(); -} - -void createCoarseEdges(MetisGraph *coarseMetisGraph) { - MetisGraph* fineMetisGraph = coarseMetisGraph->getFinerGraph(); - GGraph* fineGGraph = fineMetisGraph->getGraph(); - typedef Galois::WorkList::StableIteratorlocal_begin()), true> WL; - parallelPopulateEdges pPE(coarseMetisGraph); - Galois::for_each_local(*coarseMetisGraph->getGraph(), pPE, Galois::loopname("popedge"), Galois::wl()); -} - -MetisGraph* coarsenOnce(MetisGraph *fineMetisGraph, unsigned& rem, bool useRM, bool with2Hop, bool verbose) { - MetisGraph *coarseMetisGraph = new MetisGraph(fineMetisGraph); - Galois::Timer t, t2; - if (verbose) - t.start(); - rem = findMatching(coarseMetisGraph, useRM, with2Hop, verbose); - if (verbose) { - t.stop(); - std::cout << "\n\tTime Matching " << t.get() << "\n"; - t2.start(); - } - createCoarseEdges(coarseMetisGraph); - if (verbose) { - t2.stop(); - std::cout << "\tTime Creating " << t2.get() << "\n"; - } - return coarseMetisGraph; -} - -} // anon namespace - -MetisGraph* coarsen(MetisGraph* fineMetisGraph, unsigned coarsenTo, bool verbose) { - MetisGraph* coarseGraph = fineMetisGraph; - unsigned size = std::distance(fineMetisGraph->getGraph()->begin(), fineMetisGraph->getGraph()->end()); - unsigned iterNum = 0; - bool with2Hop = false; - unsigned stat; - while (true) {//overflow - if (verbose) { - std::cout << "Coarsening " << iterNum << "\t"; - stat = graphStat(coarseGraph->getGraph()); - } - unsigned rem = 0; - coarseGraph = coarsenOnce(coarseGraph, rem, false, with2Hop, verbose); - unsigned newSize = size / 2 + rem / 2; - if (verbose) { - std::cout << "\tTO\t"; - unsigned stat2 = graphStat(coarseGraph->getGraph()); - std::cout << "\n\tRatio " << (double)stat2 / (double)stat << " REM " << rem << " new size " << newSize << "\n"; - } - - if (size * 3 < newSize * 4) { - with2Hop = true; - if (verbose) - std::cout << "** Enabling 2 hop matching\n"; - } else { - with2Hop = false; - } - - size = newSize; - if (newSize * 4 < coarsenTo) { //be more exact near the end - size = std::distance(coarseGraph->getGraph()->begin(), coarseGraph->getGraph()->end()); - if (size < coarsenTo) - break; - } - ++iterNum; - } - - return coarseGraph; -} diff --git a/maxflow/galois/apps/gmetis/GMetis.cpp b/maxflow/galois/apps/gmetis/GMetis.cpp deleted file mode 100644 index 2aac3b7..0000000 --- a/maxflow/galois/apps/gmetis/GMetis.cpp +++ /dev/null @@ -1,180 +0,0 @@ -/** GMetis -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Andrew Lenharth - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "Metis.h" -#include "Galois/Graph/Util.h" -#include "Galois/Statistic.h" -//#include "GraphReader.h" -#include "Lonestar/BoilerPlate.h" - -namespace cll = llvm::cl; - -static const char* name = "GMetis"; -static const char* desc = "Partitions a graph into K parts and minimizing the graph cut"; -static const char* url = "gMetis"; - - -static cll::opt partMode(cll::desc("Choose a inital part mode:"), - cll::values( - clEnumVal(GGP, "GGP."), - clEnumVal(GGGP, "GGGP, default."), - clEnumVal(MGGGP, "MGGGP."), - clEnumValEnd), cll::init(GGGP)); -static cll::opt refineMode(cll::desc("Choose a refinement mode:"), - cll::values( - clEnumVal(BKL, "BKL"), - clEnumVal(BKL2, "BKL2, default."), - clEnumVal(ROBO, "ROBO"), - clEnumVal(GRACLUS, "GRACLUS"), - clEnumValEnd), cll::init(BKL2)); - -static cll::opt mtxInput("mtxinput", cll::desc("Use text mtx files instead binary based ones"), cll::init(false)); -static cll::opt weighted("weighted", cll::desc("weighted"), cll::init(false)); -static cll::opt verbose("verbose", cll::desc("verbose output (debugging mode, takes extra time)"), cll::init(false)); -static cll::opt outfile("output", cll::desc("output file name")); - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt numPartitions(cll::Positional, cll::desc(""), cll::Required); -static cll::opt imbalance("balance", cll::desc("Fraction deviated from mean partition size"), cll::init(0.01)); -const double COARSEN_FRACTION = 0.9; - -/** - * KMetis Algorithm - */ -void Partition(MetisGraph* metisGraph, unsigned nparts) { - Galois::StatTimer TM; - TM.start(); - unsigned meanWeight = ( (double)metisGraph->getTotalWeight()) / (double)nparts; - //unsigned coarsenTo = std::max(metisGraph->getNumNodes() / (40 * intlog2(nparts)), 20 * (nparts)); - unsigned coarsenTo = 20 * nparts; - - if (verbose) std::cout << "Starting coarsening: \n"; - Galois::StatTimer T("Coarsen"); - T.start(); - MetisGraph* mcg = coarsen(metisGraph, coarsenTo, verbose); - T.stop(); - if (verbose) std::cout << "Time coarsen: " << T.get() << "\n"; - - Galois::StatTimer T2("Partition"); - T2.start(); - std::vector parts; - parts = partition(mcg, nparts, partMode); - T2.stop(); - - if (verbose) std::cout << "Init edge cut : " << computeCut(*mcg->getGraph()) << "\n\n"; - - std::vector initParts = parts; - std::cout << "Time clustering: "<getCoarserGraph()) - coarseGraph = coarseGraph->getCoarserGraph(); - std::ofstream outFile(outfile.c_str()); - for (auto it = graph->begin(), ie = graph->end(); it!=ie; it++) - { - unsigned gPart = graph->getData(*it).getPart(); - outFile<< gPart<< '\n'; - } - } - return 0; -} - diff --git a/maxflow/galois/apps/gmetis/GraphReader.h b/maxflow/galois/apps/gmetis/GraphReader.h deleted file mode 100644 index 6d64dbd..0000000 --- a/maxflow/galois/apps/gmetis/GraphReader.h +++ /dev/null @@ -1,168 +0,0 @@ -/* @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Nikunj Yadav nikunj@cs.utexas.edu - */ - -#ifndef GRAPHREADER_H_ -#define GRAPHREADER_H_ -#include -#include -using namespace std; - -typedef Galois::Graph::LC_CSR_Graph InputGraph; -typedef Galois::Graph::LC_CSR_Graph::GraphNode InputGNode; - - - - while (true) { - int index = strtol(items, &remaining,10) - 1; - if(index < 0) break; - items = remaining; - GNode n2 = nodes[index]; - if(n1==n2){ - continue; - } - graph->addEdge(n1, n2, Galois::MethodFlag::ALL, 1); - graph->getData(n1).setEdgeWeight(graph->getData(n1).getEdgeWeight() + 1); - graph->getData(n1).setNumEdges(graph->getData(n1).getNumEdges() + 1); - countEdges++; - } - } - - - -parallelMakeNodes(GGraph *g,vector &gn,InputGraph *in,Galois::GAccumulator &numNodes): - graph(g),inputGraph(in),gnodes(gn),pnumNodes(numNodes) {} - void operator()(InputGNode node,Galois::UserContext &ctx) { - int id = inputGraph->getData(node); - GNode item = graph->createNode(100,1); // FIXME: edge num - // graph->addNode(item); - gnodes[id]=item; - pnumNodes+=1; - } -}; - -struct parallelMakeEdges { - GGraph *graph; - InputGraph *inputGraph; - vector &gnodes; - bool weighted; - bool directed; - Galois::GAccumulator &pnumEdges; - - parallelMakeEdges(GGraph *g,vector &gn,InputGraph *in,Galois::GAccumulator &numE,bool w=false,bool dir = true) - :graph(g),inputGraph(in),gnodes(gn),pnumEdges(numE) { - weighted = w; - directed = dir; -} - - void operator()(InputGNode inNode,Galois::UserContext &ctx) { - int nodeId = inputGraph->getData(inNode); - GNode node = gnodes[nodeId]; - MetisNode& nodeData = graph->getData(node); - for (InputGraph::edge_iterator jj = inputGraph->edge_begin(inNode), eejj = inputGraph->edge_end(inNode); jj != eejj; ++jj) { - InputGNode inNeighbor = inputGraph->getEdgeDst(jj); - if(inNode == inNeighbor) continue; - int neighId = inputGraph->getData(inNeighbor); - int weight = 1; - if(weighted){ - weight = inputGraph->getEdgeData(jj); - } - graph->addEdge(node, gnodes[neighId], Galois::MethodFlag::ALL, weight); - nodeData.setNumEdges(nodeData.getNumEdges() + 1); - nodeData.setEdgeWeight(nodeData.getEdgeWeight() + weight); - /*if(!directed){ - graph->getEdgeData(graph->addEdge(node, gnodes[neighId])) = weight;// - nodeData.incNumEdges(); - nodeData.addEdgeWeight(weight); - }else{ - graph->getEdgeData(graph->addEdge(node, gnodes[neighId])) = weight; - graph->getEdgeData(graph->addEdge(gnodes[neighId], node)) = weight; - }*/ - pnumEdges+=1; - } - - } -}; - -void readGraph(MetisGraph* metisGraph, const char* filename, bool weighted = false, bool directed = true){ - InputGraph inputGraph; - Galois::Graph::readGraph(inputGraph, filename); - cout<<"start to transfer data to GGraph"<getGraph(); - vector gnodes(inputGraph.size()); - id = 0; - /*for(uint64_t i=0;icreateNode(MetisNode(id, 1)); - graph->addNode(node); - gnodes[id++] = node; - }*/ - - - typedef Galois::WorkList::dChunkedFIFO<256> WL; - Galois::GAccumulator pnumNodes; - Galois::GAccumulator pnumEdges; - - - Galois::Timer t; - t.start(); - Galois::for_each(inputGraph.begin(),inputGraph.end(),parallelMakeNodes(graph,gnodes,&inputGraph,pnumNodes),"NodesLoad"); - t.stop(); - cout<(inputGraph.begin(),inputGraph.end(),parallelMakeEdges(graph,gnodes,&inputGraph,pnumEdges,weighted,true),"EdgesLoad"); - t.stop(); - cout<(inputGraph,parallelMakeNodes(graph,gnodes,&inputGraph,pnumNodes),"NodesLoad"); - t.stop(); - cout<(inputGraph,parallelMakeEdges(graph,gnodes,&inputGraph,pnumEdges,weighted,true),"EdgesLoad"); - t.stop(); - cout<setNumNodes(numNodes); - // metisGraph->setNumEdges(numEdges/2); - - cout<<"Done Reading Graph "; - cout<<"numNodes: "< - * @author Nikunj Yadav - * @author Andrew Lenharth - */ - -#ifndef METIS_H_ -#define METIS_H_ - -#include "Galois/Graph/LC_Morph_Graph.h" - -class MetisNode; -typedef Galois::Graph::LC_Morph_Graph GGraph; -typedef Galois::Graph::LC_Morph_Graph::GraphNode GNode; - -//algorithms -enum InitialPartMode {GGP, GGGP, MGGGP}; -enum refinementMode {BKL, BKL2, ROBO, GRACLUS}; -//Nodes in the metis graph -class MetisNode { - - struct coarsenData { - int matched:1; - int failedmatch:1; - GNode parent; - }; - struct refineData { - unsigned partition; - unsigned oldPartition; - bool maybeBoundary; - }; - - void initCoarsen(){ - data.cd.matched = false; - data.cd.failedmatch = false; - data.cd.parent = NULL; - } - -public: - //int num; - explicit MetisNode(int weight) :_weight(weight) { - initCoarsen(); - } - - MetisNode(unsigned weight, GNode child0, GNode child1 = NULL) - : _weight(weight) { - initCoarsen(); - children[0] = child0; - children[1] = child1; - } - - MetisNode():_weight(1) { initCoarsen(); } - - //call to switch data to refining - void initRefine(unsigned part = 0, bool bound = false) { - refineData rd = {part, part, bound}; - data.rd = rd; - } - - int getWeight() const { return _weight; } - void setWeight(int weight) { _weight = weight; } - - void setParent(GNode p) { data.cd.parent = p; } - GNode getParent() const { assert(data.cd.parent); return data.cd.parent; } - - void setMatched() { data.cd.matched = true; } - bool isMatched() const { return data.cd.matched; } - - void setFailedMatch() { data.cd.failedmatch = true; } - bool isFailedMatch() const { return data.cd.failedmatch; } - - GNode getChild(unsigned x) const { return children[x]; } - unsigned numChildren() const { return children[1] ? 2 : 1; } - - unsigned getPart() const { return data.rd.partition; } - void setPart(unsigned val) { data.rd.partition = val; } - - int getOldPart() const {return data.rd.oldPartition;} - void OldPartCpyNew(){ data.rd.oldPartition = data.rd.partition; } - - bool getmaybeBoundary() const {return data.rd.maybeBoundary; } - void setmaybeBoundary(bool val){ data.rd.maybeBoundary = val; } - -private: - union { - coarsenData cd; - refineData rd; - } data; - - GNode children[2]; - unsigned _weight; -}; - -//Structure to keep track of graph hirarchy -class MetisGraph{ - MetisGraph* coarser; - MetisGraph* finer; - - GGraph graph; - -public: - MetisGraph() :coarser(0), finer(0) { } - - explicit MetisGraph(MetisGraph* finerGraph) - :coarser(0), finer(finerGraph) { - finer->coarser = this; - } - - const GGraph* getGraph() const { return &graph; } - GGraph* getGraph() { return &graph; } - MetisGraph* getFinerGraph() const { return finer; } - MetisGraph* getCoarserGraph() const { return coarser; } - - unsigned getNumNodes() { - return std::distance(graph.begin(), graph.end()); - } - - unsigned getTotalWeight() { - MetisGraph* f = this; - while (f->finer) - f = f->finer; - return std::distance(f->graph.begin(), f->graph.end()); - } -}; - - -//Structure to store working partition information -struct partInfo { - unsigned partNum; - unsigned partMask; - unsigned partWeight; - - explicit partInfo(unsigned mw) - :partNum(0), partMask(1), partWeight(mw) {} - - partInfo() :partNum(~0), partMask(~0), partWeight(~0) {} - - partInfo(unsigned pn, unsigned pm, unsigned pw) :partNum(pn), partMask(pm), partWeight(pw) {} - - unsigned splitID() const { - return partNum | partMask; - } - - std::pair splitRatio(unsigned numParts) { - unsigned L = 0, R = 0; - unsigned LM = partMask - 1; // 00100 -> 00011 - for (unsigned x = 0; x < numParts; ++x) - if ((x & LM) == partNum) { - if (x & partMask) - ++R; - else - ++L; - } - return std::make_pair(L, R); - } - - partInfo split() { - partInfo np(splitID(), partMask << 1, 0); - partMask <<= 1; - return np; - } -}; - -std::ostream& operator<<(std::ostream& os, const partInfo& p); - -//Metrics -void printPartStats(std::vector&); -unsigned graphStat(GGraph* graph); -std::vector edgeCut(GGraph& g, unsigned nparts); -void printCuts(const char* str, MetisGraph* g, unsigned numPartitions); -unsigned computeCut(GGraph& g); - -//Coarsening -MetisGraph* coarsen(MetisGraph* fineMetisGraph, unsigned coarsenTo, bool verbose); - -//Partitioning -std::vector partition(MetisGraph* coarseMetisGraph, unsigned numPartitions, InitialPartMode partMode); -std::vector BisectAll(MetisGraph* mcg, unsigned numPartitions, unsigned maxSize); -//Refinement -void refine(MetisGraph* coarseGraph, std::vector& parts, unsigned minSize, unsigned maxSize, refinementMode refM, bool verbose); -//void refinePart(GGraph& g, std::vector& parts, unsigned maxSize); -//Balancing -void balance(MetisGraph* Graph, std::vector& parts, unsigned maxSize); - -#endif diff --git a/maxflow/galois/apps/gmetis/Metric.cpp b/maxflow/galois/apps/gmetis/Metric.cpp deleted file mode 100644 index be42a7a..0000000 --- a/maxflow/galois/apps/gmetis/Metric.cpp +++ /dev/null @@ -1,131 +0,0 @@ -/** GMetis -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Andrew Lenharth - */ - -#include "Metis.h" - -#include -#include -#include - -struct onlineStat { - unsigned num; - unsigned val; - double valSQ; - unsigned mmin; - unsigned mmax; - - onlineStat() :num(0), val(0), valSQ(0), mmin(std::numeric_limits::max()), mmax(0) {} - - void add(unsigned v) { - ++num; - val += v; - valSQ += (double)v*(double)v; - mmin = std::min(v, mmin); - mmax = std::max(v, mmax); - } - - double mean() { - return (double)val / (double)num; - } - - double variance() { - double t = valSQ / (double)num; - double m = mean(); - return t - m*m; - } - - unsigned count() { return num; } - unsigned total() { return val; } - unsigned min() { return mmin; } - unsigned max() { return mmax; } -}; - - -unsigned graphStat(GGraph* graph) { - onlineStat e; - for (auto ii = graph->begin(), ee = graph->end(); ii != ee; ++ii) { - unsigned val = std::distance(graph->edge_begin(*ii), graph->edge_end(*ii)); - e.add(val); - } - std::cout << "Nodes " << e.count() - << " Edges(total, var, min, max) " - << e.total() << " " - << e.variance() << " " - << e.min() << " " - << e.max(); - return e.count(); -} - -std::vector edgeCut(GGraph& g, unsigned nparts) { - std::vector cuts(nparts); - - //find boundary nodes with positive gain - for (auto nn = g.begin(), en = g.end(); nn != en; ++nn) { - unsigned gPart = g.getData(*nn).getPart(); - for (auto ii = g.edge_begin(*nn), ee = g.edge_end(*nn); ii != ee; ++ii) { - auto& m = g.getData(g.getEdgeDst(ii)); - if (m.getPart() != gPart) { - cuts.at(gPart) += g.getEdgeData(ii); - } - } - } - return cuts; -} - -unsigned computeCut(GGraph& g) { - unsigned cuts=0; - for (auto nn = g.begin(), en = g.end(); nn != en; ++nn) { - unsigned gPart = g.getData(*nn).getPart(); - for (auto ii = g.edge_begin(*nn), ee = g.edge_end(*nn); ii != ee; ++ii) { - auto& m = g.getData(g.getEdgeDst(ii)); - if (m.getPart() != gPart) - cuts += g.getEdgeData(ii); - } - } - return cuts/2; -} - - -void printPartStats(std::vector& parts) { - onlineStat e; - for (unsigned x = 0; x < parts.size(); ++x) { - e.add(parts[x].partWeight); - } - std::cout << "target " << e.total() / e.count() << " var " << e.variance() << " min " << e.min() << " max " << e.max(); -} - -std::ostream& operator<<(std::ostream& os, const partInfo& p) { - os << "Num " << std::setw(3) << p.partNum << "\tmask " << std::setw(5) << std::hex << p.partMask << std::dec << "\tweight " << p.partWeight; - return os; -} - -void printCuts(const char* str, MetisGraph* g, unsigned numPartitions) { - std::vector ec = edgeCut(*g->getGraph(), numPartitions); - std::cout << str << " Edge Cuts:\n"; - for (unsigned x = 0; x < ec.size(); ++x) - std::cout << (x == 0 ? "" : " " ) << ec[x]; - std::cout << "\n"; - std::cout << str << " Average Edge Cut: " << (std::accumulate(ec.begin(), ec.end(), 0) / ec.size()) << "\n"; - std::cout << str << " Minimum Edge Cut: " << *std::min_element(ec.begin(), ec.end()) << "\n"; -} diff --git a/maxflow/galois/apps/gmetis/Partitioning.cpp b/maxflow/galois/apps/gmetis/Partitioning.cpp deleted file mode 100644 index 15d876e..0000000 --- a/maxflow/galois/apps/gmetis/Partitioning.cpp +++ /dev/null @@ -1,330 +0,0 @@ -/** GMetis -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Xin Sui - * @author Nikunj Yadav - * @author Andrew Lenharth - */ - -#include "Galois/Galois.h" -#include "Galois/Statistic.h" -#include "Metis.h" -#include -#include -#include - -const bool multiSeed = true; - -namespace { - -//gain of moving n from it's current part to new part -int gain_limited(GGraph& g, GNode n, unsigned newpart, Galois::MethodFlag flag) { - int retval = 0; - unsigned nPart = g.getData(n,flag).getPart(); - for (auto ii = g.edge_begin(n,flag), ee =g.edge_end(n,flag); ii != ee; ++ii) { - GNode neigh = g.getEdgeDst(ii); - if (g.getData(neigh,flag).getPart() == nPart) - retval -= g.getEdgeData(ii,flag); - else if (g.getData(neigh,flag).getPart() == newpart) - retval += g.getEdgeData(ii,flag); - } - return retval; -} - - -GNode findSeed(GGraph& g, unsigned partNum, int partWeight, Galois::MethodFlag flag) { - //pick a seed - int rWeight = (int)(drand48()*(partWeight)); - GNode seed; - - for (auto ii = g.begin(), ee = g.end(); ii != ee; ++ii) { - if (g.getData(*ii, flag).getPart() == partNum) { - seed = *ii; - rWeight -= g.getData(*ii, flag).getWeight(); - if(rWeight <0) return *ii; - } - } - - return seed; -} - - -struct bisect_GGP { - partInfo operator()(GGraph& g, partInfo& oldPart, std::pair ratio) { - partInfo newPart = oldPart.split(); - std::deque boundary; - unsigned& newWeight = newPart.partWeight = 0; - unsigned targetWeight = oldPart.partWeight * ratio.second / (ratio.first + ratio.second); - - auto flag = Galois::MethodFlag::NONE; - - do { - boundary.push_back(findSeed(g, oldPart.partNum, oldPart.partWeight, flag)); - - //grow partition - while (newWeight < targetWeight && !boundary.empty()) { - GNode n = boundary.front(); - boundary.pop_front(); - if (g.getData(n, flag).getPart() == newPart.partNum) - continue; - newWeight += g.getData(n, flag).getWeight(); - g.getData(n, flag).setPart(newPart.partNum); - for (auto ii = g.edge_begin(n, flag), ee = g.edge_end(n, flag); ii != ee; ++ii) - if (g.getData(g.getEdgeDst(ii), flag).getPart() == oldPart.partNum) - boundary.push_back(g.getEdgeDst(ii)); - } - } while (newWeight < targetWeight && multiSeed); - - oldPart.partWeight -= newWeight; - return newPart; - } -}; - - -struct bisect_GGGP { - partInfo operator()(GGraph& g, partInfo& oldPart, std::pair ratio) { - partInfo newPart = oldPart.split(); - std::map gains; - std::map> boundary; - - unsigned& newWeight = newPart.partWeight = 0; - unsigned targetWeight = oldPart.partWeight * ratio.second / (ratio.first + ratio.second); - //pick a seed - - auto flag = Galois::MethodFlag::NONE; - - do { - boundary[0].insert(findSeed(g, oldPart.partNum, oldPart.partWeight, flag)); - - //grow partition - while (newWeight < targetWeight && !boundary.empty()) { - auto bi = boundary.rbegin(); - GNode n = *bi->second.begin(); - bi->second.erase(bi->second.begin()); - if (bi->second.empty()) - boundary.erase(bi->first); - if (g.getData(n, flag).getPart() == newPart.partNum) - continue; - newWeight += g.getData(n, flag).getWeight(); - g.getData(n, flag).setPart(newPart.partNum); - for (auto ii = g.edge_begin(n, flag), ee = g.edge_end(n, flag); ii != ee; ++ii) { - GNode dst = g.getEdgeDst(ii); - auto gi = gains.find(dst); - if (gi != gains.end()) { //update - boundary[gi->second].erase(dst); - if (boundary[gi->second].empty()) - boundary.erase(gi->second); - gains.erase(dst); - } - if (g.getData(dst, flag).getPart() == oldPart.partNum) { - int newgain = gains[dst] = gain_limited(g, dst, newPart.partNum, flag); - boundary[newgain].insert(dst); - } - } - } - } while (newWeight < targetWeight && multiSeed); - - oldPart.partWeight -= newWeight; - return newPart; - } -}; - - -template -struct parallelBisect { - unsigned totalWeight; - unsigned nparts; - GGraph* graph; - bisector bisect; - std::vector& parts; - - parallelBisect(MetisGraph* mg, unsigned parts, std::vector& pb, bisector b = bisector()) - :totalWeight(mg->getTotalWeight()), nparts(parts), graph(mg->getGraph()), bisect(b), parts(pb) - {} - void operator()(partInfo* item, Galois::UserContext &cnx) { - if (item->splitID() >= nparts) //when to stop - return; - std::pair ratio = item->splitRatio(nparts); - //std::cout << "Splitting " << item->partNum << ":" << item->partMask << " L " << ratio.first << " R " << ratio.second << "\n"; - partInfo newPart = bisect(*graph, *item, ratio); - //std::cout << "Result " << item->partNum << " " << newPart.partNum << "\n"; - parts[newPart.partNum] = newPart; - cnx.push(&parts[newPart.partNum]); - cnx.push(item); - } -}; - -struct initPart { - GGraph& g; - initPart(GGraph& _g): g(_g) {} - void operator()(GNode item) { - g.getData(item, Galois::MethodFlag::NONE).initRefine(0,true); - } -}; - -} //anon namespace - - -std::vector partition(MetisGraph* mcg, unsigned numPartitions, InitialPartMode partMode) { - std::vector parts(numPartitions); - parts[0] = partInfo(mcg->getTotalWeight()); - Galois::do_all_local(*mcg->getGraph(), initPart(*mcg->getGraph())); - switch (partMode) { - case GGP: - std::cout <<"\n Sarting initial partitioning using GGP:\n"; - Galois::for_each(&parts[0], parallelBisect(mcg, numPartitions, parts), Galois::wl>()); - break; - case GGGP: - std::cout <<"\n Sarting initial partitioning using GGGP:\n"; - Galois::for_each(&parts[0], parallelBisect(mcg, numPartitions, parts), Galois::wl>()); - break; - default: abort(); - } - printPartStats(parts); -#if 0 - if (!multiSeed) { - printPartStats(parts); - unsigned maxWeight = 1.01 * mcg->getTotalWeight() / numPartitions; - balance(mcg, parts, maxWeight); - } -#endif - static_assert(multiSeed, "not yet implemented"); - - return parts; -} -namespace { -int computeEdgeCut(GGraph& g) { - int cuts=0; - for (auto nn = g.begin(), en = g.end(); nn != en; ++nn) { - unsigned gPart = g.getData(*nn).getPart(); - for (auto ii = g.edge_begin(*nn), ee = g.edge_end(*nn); ii != ee; ++ii) { - auto& m = g.getData(g.getEdgeDst(ii)); - if (m.getPart() != gPart) { - cuts += g.getEdgeData(ii); - } - } - } - - return cuts/2; -} - -int edgeCount(GGraph& g) { - int count=0; - for (auto nn = g.begin(), en = g.end(); nn != en; ++nn) { - for (auto ii = g.edge_begin(*nn), ee = g.edge_end(*nn); ii != ee; ++ii) - count += g.getEdgeData(ii); - } - return count/2; -} -} -std::vector BisectAll(MetisGraph* mcg, unsigned numPartitions, unsigned maxSize) -{ - std::cout <<"\n Sarting initial partitioning using MGGGP:\n"; - auto flag = Galois::MethodFlag::NONE; - GGraph& g = *mcg->getGraph(); - - int bestCut = edgeCount(g); - std::map bestParts; - std::vector bestPartInfos(numPartitions); - - for(int nbTry =0; nbTry <20; nbTry ++){ - std::vector partInfos(numPartitions); - std::vector>> boundary(numPartitions); - std::map> partitions; - for(GGraph::iterator ii = g.begin(),ee = g.end();ii!=ee;ii++) - g.getData(*ii).setPart(numPartitions+1); - auto seedIter = g.begin(); - int k =0; - //find one seed for each partition and do initialization - for (unsigned int i =0; igetNumNodes())) +1; - bool goodseed = true; - while(seed--) - if(++seedIter== g.end())seedIter = g.begin(); - GNode n = *seedIter; - - for(unsigned int j=0; jsecond.begin(); - bb->second.erase(bb->second.begin()); - if (bb->second.empty()) - partitions.erase(bb->first); - - //find the node to add to the partition - GNode n = *g.begin(); - do{ - if(boundary[partToMod].empty()) break; - auto bi = boundary[partToMod].rbegin(); - n = *bi->second.begin(); - bi->second.erase(bi->second.begin()); - if (bi->second.empty()) - boundary[partToMod].erase(bi->first); - }while(g.getData(n, flag).getPart() - * @author Nikunj Yadav - * @author Andrew Lenharth - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Statistic.h" -#include "Metis.h" -#include -#include - -namespace { - -struct gainIndexer : public std::unary_function { - static GGraph* g; - - int operator()(GNode n) { - int retval = 0; - Galois::MethodFlag flag = Galois::NONE; - unsigned int nPart = g->getData(n, flag).getPart(); - for (auto ii = g->edge_begin(n, flag), ee = g->edge_end(n); ii != ee; ++ii) { - GNode neigh = g->getEdgeDst(ii); - if (g->getData(neigh, flag).getPart() == nPart) - retval -= g->getEdgeData(ii, flag); - else - retval += g->getEdgeData(ii, flag); - } - return -retval / 16; - } -}; - -GGraph* gainIndexer::g; - -bool isBoundary(GGraph& g, GNode n) { - unsigned int nPart = g.getData(n).getPart(); - for (auto ii = g.edge_begin(n), ee =g.edge_end(n); ii != ee; ++ii) - if (g.getData(g.getEdgeDst(ii)).getPart() != nPart) - return true; - return false; -} - -//This is only used on the terminal graph (find graph) -struct findBoundary { - Galois::InsertBag& b; - GGraph& g; - findBoundary(Galois::InsertBag& _b, GGraph& _g) : b(_b), g(_g) {} - void operator()(GNode n) { - auto& cn = g.getData(n, Galois::MethodFlag::NONE); - if (cn.getmaybeBoundary()) - cn.setmaybeBoundary(isBoundary(g,n)); - if (cn.getmaybeBoundary()) - b.push(n); - } -}; - -//this is used on the coarse graph to project to the fine graph -struct findBoundaryAndProject { - Galois::InsertBag& b; - GGraph& cg; - GGraph& fg; - findBoundaryAndProject(Galois::InsertBag& _b, GGraph& _cg, GGraph& _fg) :b(_b), cg(_cg), fg(_fg) {} - void operator()(GNode n) { - auto& cn = cg.getData(n, Galois::MethodFlag::NONE); - if (cn.getmaybeBoundary()) - cn.setmaybeBoundary(isBoundary(cg,n)); - - //project part and maybe boundary - //unsigned part = cn.getPart(); - for (unsigned x = 0; x < cn.numChildren(); ++x) { - fg.getData(cn.getChild(x), Galois::MethodFlag::NONE).initRefine(cn.getPart(), cn.getmaybeBoundary()); - } - if (cn.getmaybeBoundary()) - b.push(n); - } -}; - - -template -struct refine_BKL2 { - unsigned meanSize; - unsigned minSize; - unsigned maxSize; - GGraph& cg; - GGraph* fg; - std::vector& parts; - - typedef int tt_needs_per_iter_alloc; - - refine_BKL2(unsigned mis, unsigned mas, GGraph& _cg, GGraph* _fg, std::vector& _p) : minSize(mis), maxSize(mas), cg(_cg), fg(_fg), parts(_p) {} - - //Find the partition n is most connected to - template - unsigned pickPartitionEC(GNode n, Context& cnx) { - std::vector::other> edges(parts.size(), 0, cnx.getPerIterAlloc()); - unsigned P = cg.getData(n).getPart(); - for (auto ii = cg.edge_begin(n), ee = cg.edge_end(n); ii != ee; ++ii) { - GNode neigh = cg.getEdgeDst(ii); - auto& nd = cg.getData(neigh); - if (parts[nd.getPart()].partWeight < maxSize - || nd.getPart() == P) - edges[nd.getPart()] += cg.getEdgeData(ii); - } - return std::distance(edges.begin(), std::max_element(edges.begin(), edges.end())); - } - - //Find the smallest partition n is connected to - template - unsigned pickPartitionMP(GNode n, Context& cnx) { - unsigned P = cg.getData(n).getPart(); - unsigned W = parts[P].partWeight; - std::vector::other> edges(parts.size(), ~0, cnx.getPerIterAlloc()); - edges[P] = W; - W = (double)W * 0.9; - for (auto ii = cg.edge_begin(n), ee = cg.edge_end(n); ii != ee; ++ii) { - GNode neigh = cg.getEdgeDst(ii); - auto& nd = cg.getData(neigh); - if (parts[nd.getPart()].partWeight < W) - edges[nd.getPart()] = parts[nd.getPart()].partWeight; - } - return std::distance(edges.begin(), std::min_element(edges.begin(), edges.end())); - } - - - template - void operator()(GNode n, Context& cnx) { - auto& nd = cg.getData(n); - unsigned curpart = nd.getPart(); - unsigned newpart = balance ? pickPartitionMP(n, cnx) : pickPartitionEC(n, cnx); - if(parts[curpart].partWeight < minSize) return; - if (curpart != newpart) { - nd.setPart(newpart); - __sync_fetch_and_sub(&parts[curpart].partWeight, nd.getWeight()); - __sync_fetch_and_add(&parts[newpart].partWeight, nd.getWeight()); - for (auto ii = cg.edge_begin(n), ee = cg.edge_end(n); ii != ee; ++ii) { - GNode neigh = cg.getEdgeDst(ii); - auto& ned = cg.getData(neigh); - if (ned.getPart() != newpart && !ned.getmaybeBoundary()) { - ned.setmaybeBoundary(true); - if (fg) - for (unsigned x = 0; x < ned.numChildren(); ++x) - fg->getData(ned.getChild(x), Galois::MethodFlag::NONE).setmaybeBoundary(true); - } - //if (ned.getPart() != newpart) - //cnx.push(neigh); - } - if (fg) - for (unsigned x = 0; x < nd.numChildren(); ++x) - fg->getData(nd.getChild(x), Galois::MethodFlag::NONE).setPart(newpart); - } - } - - static void go(unsigned mins, unsigned maxs, GGraph& cg, GGraph* fg, std::vector& p) { - typedef Galois::WorkList::dChunkedFIFO<8> Chunk; - typedef Galois::WorkList::OrderedByIntegerMetric pG; - gainIndexer::g = &cg; - Galois::InsertBag boundary; - if (fg) - Galois::do_all_local(cg, findBoundaryAndProject(boundary, cg, *fg), Galois::loopname("boundary")); - else - Galois::do_all_local(cg, findBoundary(boundary, cg), Galois::loopname("boundary")); - Galois::for_each_local(boundary, refine_BKL2(mins, maxs, cg, fg, p), Galois::loopname("refine"), Galois::wl()); - if (false) { - Galois::InsertBag boundary; - Galois::do_all_local(cg, findBoundary(boundary, cg), Galois::loopname("boundary")); - Galois::for_each_local(boundary, refine_BKL2(mins, maxs, cg, fg, p), Galois::loopname("refine"), Galois::wl()); - } - - } -}; - -struct projectPart { - GGraph* fineGraph; - GGraph* coarseGraph; - std::vector& parts; - - projectPart(MetisGraph* Graph, std::vector& p) :fineGraph(Graph->getFinerGraph()->getGraph()), coarseGraph(Graph->getGraph()), parts(p) {} - - void operator()(GNode n) { - auto& cn = coarseGraph->getData(n); - unsigned part = cn.getPart(); - for (unsigned x = 0; x < cn.numChildren(); ++x) - fineGraph->getData(cn.getChild(x)).setPart(part); - } - - static void go(MetisGraph* Graph, std::vector& p) { - Galois::do_all_local(*Graph->getGraph(), projectPart(Graph, p), Galois::loopname("project")); - } -}; - -} //anon namespace - - - - -int gain(GGraph& g, GNode n) { - int retval = 0; - unsigned int nPart = g.getData(n).getPart(); - for (auto ii = g.edge_begin(n), ee =g.edge_end(n); ii != ee; ++ii) { - GNode neigh = g.getEdgeDst(ii); - if (g.getData(neigh).getPart() == nPart) - retval -= g.getEdgeData(ii); - else - retval += g.getEdgeData(ii); - } - return retval; -} - -struct parallelBoundary { - Galois::InsertBag &bag; - GGraph& g; - parallelBoundary(Galois::InsertBag &bag, GGraph& graph):bag(bag),g(graph) { - - } - void operator()(GNode n,Galois::UserContext&ctx) { - if (gain(g,n) > 0) - bag.push(n); - } -}; -void refineOneByOne(GGraph& g, std::vector& parts) { - std::vector boundary; - unsigned int meanWeight =0; - for (unsigned int i =0; i boundaryBag; - parallelBoundary pB(boundaryBag, g); - Galois::for_each(g.begin(), g.end(), pB, Galois::loopname("Get Boundary")); - - for (auto ii = boundaryBag.begin(), ie =boundaryBag.end(); ii!=ie;ii++){ - GNode n = (*ii) ; - unsigned nPart = g.getData(n).getPart(); - int part[parts.size()]; - for (unsigned int i =0; i t && parts[nPart].partWeight> parts[i].partWeight*(98)/(100) && parts[nPart].partWeight > meanWeight*98/100){ - t = part[i]; - p = i; - } - if(p != nPart){ - g.getData(n).setPart(p); - parts[p].partWeight += g.getData(n).getWeight(); - parts[nPart].partWeight -= g.getData(n).getWeight(); - } - } -} - - -void refine_BKL(GGraph& g, std::vector& parts) { - std::set boundary; - - //find boundary nodes with positive gain - Galois::InsertBag boundaryBag; - parallelBoundary pB(boundaryBag, g); - Galois::for_each(g.begin(), g.end(), pB, Galois::loopname("Get Boundary")); - for (auto ii = boundaryBag.begin(), ie =boundaryBag.end(); ii!=ie;ii++ ){ - boundary.insert(*ii);} - - //refine by swapping with a neighbor high-gain node - while (!boundary.empty()) { - GNode n = *boundary.begin(); - boundary.erase(boundary.begin()); - unsigned nPart = g.getData(n).getPart(); - for (auto ii = g.edge_begin(n), ee = g.edge_end(n); ii != ee; ++ii) { - GNode neigh = g.getEdgeDst(ii); - unsigned neighPart = g.getData(neigh).getPart(); - if (neighPart != nPart && boundary.count(neigh) && - gain(g, n) > 0 && gain(g, neigh) > 0 ) { - unsigned nWeight = g.getData(n).getWeight(); - unsigned neighWeight = g.getData(neigh).getWeight(); - //swap - g.getData(n).setPart(neighPart); - g.getData(neigh).setPart(nPart); - //update partinfo - parts[neighPart].partWeight += nWeight; - parts[neighPart].partWeight -= neighWeight; - parts[nPart].partWeight += neighWeight; - parts[nPart].partWeight -= nWeight; - //remove nodes - boundary.erase(neigh); - break; - } - } - } -} - -struct ChangePart {//move each node to its nearest cluster - GGraph& g; - int nbCluster; - double* Dist; - int* card; - - ChangePart(GGraph& g, int nb_cluster, double* Dist, int* card): g(g), nbCluster(nb_cluster), Dist(Dist), card(card){ - } - - void operator()(GNode n, Galois::UserContext& ctx) { - double dmin; - int partition =-1; - std::map degreein; - degreein[g.getData(n, Galois::MethodFlag::NONE).getOldPart()] +=1; - for (GGraph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii){ - int nclust = g.getData(g.getEdgeDst(ii), Galois::MethodFlag::NONE).getOldPart(); - degreein[nclust] += (int) g.getEdgeData(ii, Galois::MethodFlag::NONE); - } - - for(auto clust = degreein.begin(), ee = degreein.end(); clust != ee; clust++) - { - //the distance between the cluster clust and the noden is : - double d = Dist[clust->first]-(2.0*(double)clust->second/(double)card[clust->first]); - if(d < dmin || partition ==-1) - { - dmin = d; - partition = clust->first; - } - } - g.getData(n, Galois::MethodFlag::NONE).setPart(partition); - } - - -}; - - // Galois::GAccumulator count -struct ComputeClusterDist { - GGraph& g; - int nbCluster; - Galois::GAccumulator *card; - Galois::GAccumulator *degreeIn; - - ComputeClusterDist(GGraph& g, int nb_cluster): g(g), nbCluster(nb_cluster) { - card = new Galois::GAccumulator[nbCluster]; - degreeIn = new Galois::GAccumulator[nbCluster]; - } - - /*~ComputeClusterDist(){ - std::cout <<"destruct\n"; delete[] card; delete[] degreeIn; - }*/ - - void operator()(GNode n, Galois::UserContext& ctx) { - unsigned int clust = g.getData(n, Galois::MethodFlag::NONE).getPart(); - int degreet =0; - - g.getData(n, Galois::MethodFlag::NONE).OldPartCpyNew(); - for (GGraph::edge_iterator ii = g.edge_begin(n, Galois::MethodFlag::NONE), ei = g.edge_end(n, Galois::MethodFlag::NONE); ii != ei; ++ii) - if (g.getData(g.getEdgeDst(ii), Galois::MethodFlag::NONE).getPart() == clust) - degreet+=(int) g.getEdgeData(ii, Galois::MethodFlag::NONE); - card[clust]+=g.getData(n, Galois::MethodFlag::NONE).getWeight(); - degreeIn[clust] += degreet; - } -}; -double ratiocut(int nbClust, int* degree, int* card) -{ - double res=0; - for (int i=0; ibegin(), graph->end(), comp, Galois::loopname("compute dists")); - T3.stop(); - //std::cout << "Time calc: "<begin(), graph->end(), ChangePart(*graph, nbParti, Dist, card), Galois::loopname("make moves")); - T4.stop(); - //std::cout << "Time move: "<& parts, unsigned minSize, unsigned maxSize, - refinementMode refM, bool verbose) { - MetisGraph* tGraph = coarseGraph; - int nbIter=1; - if (refM == GRACLUS) { - while ((tGraph = tGraph->getFinerGraph())) nbIter*=2; - nbIter /=4; - } - do { - MetisGraph* fineGraph = coarseGraph->getFinerGraph(); - bool doProject = true; - if (verbose) { - std::cout << "Cut " << computeCut(*coarseGraph->getGraph()) << " Weights "; - printPartStats(parts); - std::cout << "\n"; - } - //refine nparts times - switch (refM) { - case BKL2: refine_BKL2::go(minSize, maxSize, *coarseGraph->getGraph(), fineGraph ? fineGraph->getGraph() : nullptr, parts); doProject = false; break; - case BKL: refine_BKL(*coarseGraph->getGraph(), parts); break; - case ROBO: refineOneByOne(*coarseGraph->getGraph(), parts); break; - case GRACLUS: GraclusRefining(coarseGraph->getGraph(), parts.size(), nbIter);nbIter =(nbIter+1)/2;break; - default: abort(); - } - //project up - if (fineGraph && doProject) { - projectPart::go(coarseGraph, parts); - } - } while ((coarseGraph = coarseGraph->getFinerGraph())); -} - -/* -void balance(MetisGraph* coarseGraph, std::vector& parts, unsigned meanSize) { - MetisGraph* fineGraph = coarseGraph->getFinerGraph(); - refine_BKL2::go(meanSize, *coarseGraph->getGraph(), fineGraph ? fineGraph->getGraph() : nullptr, parts); -} -*/ - diff --git a/maxflow/galois/apps/independentset/CMakeLists.txt b/maxflow/galois/apps/independentset/CMakeLists.txt deleted file mode 100644 index fe360c9..0000000 --- a/maxflow/galois/apps/independentset/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(independentset) diff --git a/maxflow/galois/apps/independentset/IndependentSet.cpp b/maxflow/galois/apps/independentset/IndependentSet.cpp deleted file mode 100644 index 0ee44dd..0000000 --- a/maxflow/galois/apps/independentset/IndependentSet.cpp +++ /dev/null @@ -1,476 +0,0 @@ -/** Maximal independent set application -*- C++ -*- - * @file - * - * A simple spanning tree algorithm to demostrate the Galois system. - * - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#ifdef GALOIS_USE_EXP -#include "Galois/Runtime/ParallelWorkInline.h" -#endif -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include -#include - -const char* name = "Maximal Independent Set"; -const char* desc = "Computes a maximal independent set (not maximum) of nodes in a graph"; -const char* url = "independent_set"; - -enum Algo { - serial, - pull, - nondet, - detBase, - detPrefix, - detDisjoint, - orderedBase, -}; - -namespace cll = llvm::cl; -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt algo(cll::desc("Choose an algorithm:"), - cll::values( - clEnumVal(serial, "Serial"), - clEnumVal(pull, "Pull-based (deterministic)"), - clEnumVal(nondet, "Non-deterministic"), - clEnumVal(detBase, "Base deterministic execution"), - clEnumVal(detPrefix, "Prefix deterministic execution"), - clEnumVal(detDisjoint, "Disjoint deterministic execution"), - clEnumVal(orderedBase, "Base ordered execution"), - clEnumValEnd), cll::init(nondet)); - -enum MatchFlag { - UNMATCHED, OTHER_MATCHED, MATCHED -}; - -struct Node { - MatchFlag flag; - MatchFlag pendingFlag; - Node() : flag(UNMATCHED), pendingFlag(UNMATCHED) { } -}; - - -struct SerialAlgo { - typedef Galois::Graph::LC_InlineEdge_Graph - ::with_numa_alloc::type - ::with_no_lockable::type - ::with_compressed_node_ptr::type Graph; - typedef Graph::GraphNode GNode; - - void operator()(Graph& graph) { - for (Graph::iterator ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - if (findUnmatched(graph, *ii)) - match(graph, *ii); - } - } - - bool findUnmatched(Graph& graph, GNode src) { - Node& me = graph.getData(src); - if (me.flag != UNMATCHED) - return false; - - for (Graph::edge_iterator ii = graph.edge_begin(src), - ei = graph.edge_end(src); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - if (data.flag == MATCHED) - return false; - } - - return true; - } - - void match(Graph& graph, GNode src) { - Node& me = graph.getData(src); - for (Graph::edge_iterator ii = graph.edge_begin(src), - ei = graph.edge_end(src); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - data.flag = OTHER_MATCHED; - } - - me.flag = MATCHED; - } -}; - -//! Basic operator for default and deterministic scheduling -template -struct Process { - typedef int tt_does_not_need_push; - typedef int tt_needs_per_iter_alloc; // For LocalState - - typedef Galois::Graph::LC_InlineEdge_Graph - ::with_numa_alloc::type - ::with_compressed_node_ptr::type Graph; - - typedef Graph::GraphNode GNode; - - struct LocalState { - bool mod; - LocalState(Process& self, Galois::PerIterAllocTy& alloc): mod(false) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - Graph& graph; - - Process(Graph& g): graph(g) { } - - template - bool build(GNode src) { - Node& me = graph.getData(src, Flag); - if (me.flag != UNMATCHED) - return false; - - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst, Flag); - if (data.flag == MATCHED) - return false; - } - - return true; - } - - void modify(GNode src) { - Node& me = graph.getData(src, Galois::MethodFlag::NONE); - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst, Galois::MethodFlag::NONE); - data.flag = OTHER_MATCHED; - } - - me.flag = MATCHED; - } - - void operator()(GNode src, Galois::UserContext& ctx) { - bool* modp; - if (Version == detDisjoint) { - bool used; - LocalState* localState = (LocalState*) ctx.getLocalState(used); - modp = &localState->mod; - if (used) { - if (*modp) - modify(src); - return; - } - } - - if (Version == detDisjoint) { - *modp = build(src); - } else { - bool mod = build(src); - if (Version == detPrefix) - return; - else - graph.getData(src, Galois::MethodFlag::WRITE); // Failsafe point - if (mod) - modify(src); - } - } -}; - -template -struct OrderedProcess { - typedef int tt_does_not_need_push; - - typedef typename Process<>::Graph Graph; - typedef Graph::GraphNode GNode; - - Graph& graph; - Process<> process; - - OrderedProcess(Graph& g): graph(g), process(g) { } - - template - void operator()(GNode src, C& ctx) { - (*this)(src); - } - - void operator()(GNode src) { - if (prefix) { - graph.edge_begin(src, Galois::MethodFlag::ALL); - } else { - if (process.build(src)) - process.modify(src); - } - } -}; - -template -struct Compare { - typedef typename Graph::GraphNode GNode; - Graph& graph; - - Compare(Graph& g): graph(g) { } - - bool operator()(const GNode& a, const GNode& b) const { - return &graph.getData(a, Galois::MethodFlag::NONE)< &graph.getData(b, Galois::MethodFlag::NONE); - } -}; - - -template -struct DefaultAlgo { - typedef typename Process<>::Graph Graph; - - void operator()(Graph& graph) { -#ifdef GALOIS_USE_EXP - typedef Galois::WorkList::BulkSynchronousInline<> WL; -#else - typedef Galois::WorkList::dChunkedFIFO<256> WL; -#endif - switch (algo) { - case nondet: - Galois::for_each(graph.begin(), graph.end(), Process<>(graph), Galois::wl()); - break; - case detBase: - Galois::for_each_det(graph.begin(), graph.end(), Process<>(graph)); - break; - case detPrefix: - Galois::for_each_det(graph.begin(), graph.end(), Process(graph), Process<>(graph)); - break; - case detDisjoint: - Galois::for_each_det(graph.begin(), graph.end(), Process(graph)); - break; - case orderedBase: - Galois::for_each_ordered(graph.begin(), graph.end(), Compare(graph), - OrderedProcess(graph), OrderedProcess(graph)); - break; - default: std::cerr << "Unknown algorithm" << algo << "\n"; abort(); - } - } -}; - -struct PullAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_numa_alloc::type - ::with_no_lockable::type - Graph; - typedef Graph::GraphNode GNode; - - struct Pull { - typedef int tt_does_not_need_push; - typedef int tt_does_not_need_aborts; - - typedef Galois::InsertBag Bag; - - Graph& graph; - Bag& tcur; - Bag& next; - - void operator()(GNode src, Galois::UserContext&) { - (*this)(src); - } - - void operator()(GNode src) { - Node& n = graph.getData(src, Galois::MethodFlag::NONE); - - MatchFlag f = MATCHED; - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& other = graph.getData(dst, Galois::MethodFlag::NONE); - if (dst >= src) - continue; - if (other.flag == MATCHED) { - f = OTHER_MATCHED; - break; - } else if (other.flag == UNMATCHED) { - f = UNMATCHED; - } - } - - if (f == UNMATCHED) { - next.push_back(src); - return; - } - - n.pendingFlag = f; - tcur.push_back(src); - } - }; - - struct Take { - Graph& graph; - void operator()(GNode src) { - Node& n = graph.getData(src, Galois::MethodFlag::NONE); - n.flag = n.pendingFlag; - } - }; - - void operator()(Graph& graph) { - Galois::Statistic rounds("Rounds"); - - typedef Galois::InsertBag Bag; - Bag bags[3]; - Bag *cur = &bags[0]; - Bag *tcur = &bags[1]; - Bag *next = &bags[2]; - uint64_t size = graph.size(); - uint64_t delta = graph.size() / 25; - - Graph::iterator ii = graph.begin(); - Graph::iterator ei = graph.begin(); - uint64_t remaining = std::min(size, delta); - std::advance(ei, remaining); - size -= remaining; - - while (ii != ei) { - Pull pull = { graph, *tcur, *next }; - - Galois::do_all(ii, ei, pull); - Take take = { graph }; - Galois::do_all_local(*tcur, take); - rounds += 1; - - while (!next->empty()) { - cur->clear(); - tcur->clear(); - std::swap(cur, next); - - Pull pull = { graph, *tcur, *next }; - Galois::do_all_local(*cur, pull); - Galois::do_all_local(*tcur, take); - rounds += 1; - } - ii = ei; - - remaining = std::min(size, delta); - std::advance(ei, remaining); - size -= remaining; - } - } -}; - -template -struct is_bad { - typedef typename Graph::GraphNode GNode; - Graph& graph; - - is_bad(Graph& g): graph(g) { } - - bool operator()(GNode n) const { - Node& me = graph.getData(n); - if (me.flag == MATCHED) { - for (typename Graph::edge_iterator ii = graph.edge_begin(n), - ei = graph.edge_end(n); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - if (dst != n && data.flag == MATCHED) { - std::cerr << "double match\n"; - return true; - } - } - } else if (me.flag == UNMATCHED) { - bool ok = false; - for (typename Graph::edge_iterator ii = graph.edge_begin(n), - ei = graph.edge_end(n); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - if (data.flag != UNMATCHED) { - ok = true; - } - } - if (!ok) { - std::cerr << "not maximal\n"; - return true; - } - } - return false; - } -}; - -template -struct is_matched { - typedef typename Graph::GraphNode GNode; - Graph& graph; - is_matched(Graph& g): graph(g) { } - - bool operator()(const GNode& n) const { - return graph.getData(n).flag == MATCHED; - } -}; - -template -bool verify(Graph& graph) { - return Galois::ParallelSTL::find_if( - graph.begin(), graph.end(), is_bad(graph)) - == graph.end(); -} - -template -void run() { - typedef typename Algo::Graph Graph; - - Algo algo; - Graph graph; - Galois::Graph::readGraph(graph, filename); - - // XXX Test if this matters - Galois::preAlloc(numThreads + (graph.size() * sizeof(Node) * numThreads / 8) / Galois::Runtime::MM::pageSize); - - Galois::reportPageAlloc("MeminfoPre"); - Galois::StatTimer T; - T.start(); - algo(graph); - T.stop(); - Galois::reportPageAlloc("MeminfoPost"); - - std::cout << "Cardinality of maximal independent set: " - << Galois::ParallelSTL::count_if(graph.begin(), graph.end(), is_matched(graph)) - << "\n"; - - if (!skipVerify && !verify(graph)) { - std::cerr << "verification failed\n"; - assert(0 && "verification failed"); - abort(); - } -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - switch (algo) { - case serial: run(); break; - case nondet: run >(); break; - case detBase: run >(); break; - case detPrefix: run >(); break; - case detDisjoint: run >(); break; - case orderedBase: run >(); break; - case pull: run(); break; - default: std::cerr << "Unknown algorithm" << algo << "\n"; abort(); - } - return 0; -} diff --git a/maxflow/galois/apps/kruskal/CMakeLists.txt b/maxflow/galois/apps/kruskal/CMakeLists.txt deleted file mode 100644 index c175fcd..0000000 --- a/maxflow/galois/apps/kruskal/CMakeLists.txt +++ /dev/null @@ -1,3 +0,0 @@ -app (KruskalSerial KruskalSerial.cpp) -app (KruskalHand KruskalHand.cpp) -app (KruskalOrdered KruskalOrdered.cpp) diff --git a/maxflow/galois/apps/kruskal/Kruskal.h b/maxflow/galois/apps/kruskal/Kruskal.h deleted file mode 100644 index 00e9083..0000000 --- a/maxflow/galois/apps/kruskal/Kruskal.h +++ /dev/null @@ -1,615 +0,0 @@ -/** Kruskal MST -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Kruskal MST. - * - * @author - */ - -#ifndef _KRUSKAL_H_ -#define _KRUSKAL_H_ - -#include "Galois/config.h" - -#include -#include -#include -#include -#include -#include -#include GALOIS_CXX11_STD_HEADER(unordered_set) - -#include -#include - -#include - -#include "Galois/Timer.h" -#include "Galois/Statistic.h" -#include "Galois/Galois.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/WorkList/WorkList.h" -#include "Galois/Runtime/Sampling.h" -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -namespace cll = llvm::cl; - -static const char* const name = "Kruskal's Minimum Spanning Tree Algorithm "; -static const char* const desc = "Computes minimum weight spanning tree of an undirected graph"; -static const char* const url = "mst"; - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); - -static cll::opt numPages ( - "preAlloc", - cll::desc ("number of pages(per thread) to pre-allocate from OS for Galois allocators "), - cll::init (32)); - -namespace kruskal { - -typedef unsigned Weight_ty; -typedef std::vector VecRep; -struct Edge; -typedef std::vector VecEdge; - -struct InEdge { - int src; - int dst; - Weight_ty weight; - - InEdge (int _src, int _dst, Weight_ty _weight) - : src (_src), dst (_dst), weight (_weight) - { - if (src == dst) { - fprintf (stderr, "Self edges not allowed\n"); - abort (); - } - - // nodes in an edge ordered so that reverse edges can be detected as duplicates - if (src > dst) { - std::swap (src, dst); - } - - assert (src <= dst); - } - - // equals function purely based on src and dst to detect duplicates - friend bool operator == (const InEdge& left, const InEdge& right) { - return (left.src == right.src) && (left.dst == right.dst); - } - - struct Hash { - - // hash function purely based on src and dst to find and remove duplicates - size_t operator () (const InEdge& edge) const { - return (size_t (edge.src) << 32) ^ size_t(edge.dst); - } - }; - -}; - -struct Edge: public InEdge { - unsigned id; - - Edge (unsigned _id, int _src, int _dst, Weight_ty _weight) - : InEdge (_src, _dst, _weight), id (_id) {} - - - friend bool operator == (const Edge& left, const Edge& right) { - return (left.id == right.id) - && (left.src == right.src) - && (left.dst == right.dst) - && (left.weight == right.weight); - } - - std::string str () const { - char s[256]; - sprintf (s, "(id=%d,src=%d,dst=%d,weight=%d)", id, src, dst, weight); - return std::string (s); - } - - friend std::ostream& operator << (std::ostream& out, const Edge& edge) { - return (out << edge.str ()); - } - - struct Comparator { - static inline int compare (const Edge& left, const Edge& right) { - int d = left.weight - right.weight; - return (d != 0) ? d : (left.id - right.id); - } - - bool operator () (const Edge& left, const Edge& right) const { - return compare (left, right) < 0; - } - }; -}; - -template -static void unionByRank_int (int rep1, int rep2, V& repVec) { - assert (rep1 >= 0 && size_t (rep1) < repVec.size ()); - assert (rep2 >= 0 && size_t (rep2) < repVec.size ()); - assert (repVec[rep1] < 0); - assert (repVec[rep2] < 0); - - if (repVec[rep2] < repVec[rep1]) { - std::swap (rep1, rep2); - } - assert (repVec[rep1] <= repVec[rep2]); - - repVec[rep1] += repVec[rep2]; - repVec[rep2] = rep1; - assert (repVec[rep1] < 0); -} - -template -static void linkUp_int (int other, int master, V& repVec) { - assert (other >= 0 && size_t (other) < repVec.size ()); - assert (master >= 0 && size_t (master) < repVec.size ()); - assert (repVec[other] < 0); - // assert (repVec[master] < 0); // can't check this in parallel - - repVec[other] = master; -} - -template -int findPCiter_int (const int node, V& repVec) { - assert (node >= 0 && size_t (node) < repVec.size ()); - - if (repVec[node] < 0) { return node; } - - assert (repVec[node] >= 0); - - int rep = repVec[node]; - - if (repVec[rep] < 0) { return rep; } - - while (repVec[rep] >= 0) { - rep = repVec[rep]; - } - - // path compress - for (int n = node; n != rep;) { - repVec[n] = rep; - n = repVec[n]; - } - - assert (rep >= 0 && size_t (rep) < repVec.size ()); - return rep; -} - -template -int getRep_int (const int node, const V& repVec) { - assert (node >= 0 && size_t (node) < repVec.size ()); - - if (repVec[node] < 0) { return node; } - - int rep = repVec[node]; - while (repVec[rep] >= 0) { - rep = repVec[rep]; - } - assert (repVec[rep] < 0); - return rep; -} - - -class Kruskal { - -public: - - typedef std::vector VecEdge; - - typedef std::unordered_set SetInEdge; - -protected: - - virtual const std::string getVersion () const = 0; - - //! doesn't do anything by default. Sub-classes may choose to override - //! in order to to specific initialization - virtual void initRemaining (const size_t numNodes, const VecEdge& edges) { }; - - virtual void runMST (const size_t numNodes, const VecEdge& edges, - size_t& mstWeight, size_t& totalIter) = 0; - - - void readGraph (const std::string& filename, size_t& numNodes, SetInEdge& edgeSet) { - - typedef Galois::Graph::LC_CSR_Graph InGraph; - typedef InGraph::GraphNode InGNode; - - InGraph ingraph; - Galois::Graph::readGraph (ingraph, filename); - - // numbering nodes 0..N-1, where N is number of nodes - // in the graph - unsigned idCntr = 0; - for (InGraph::iterator n = ingraph.begin (), endn = ingraph.end (); - n != endn; ++n) { - ingraph.getData (*n, Galois::MethodFlag::NONE) = idCntr++; - } - numNodes = ingraph.size (); - - - size_t numEdges = 0; - edgeSet.clear (); - - for (InGraph::iterator n = ingraph.begin (), endn = ingraph.end (); - n != endn; ++n) { - - unsigned src = ingraph.getData (*n, Galois::MethodFlag::NONE); - - - for (InGraph::edge_iterator e = ingraph.edge_begin (src, Galois::MethodFlag::NONE), - ende = ingraph.edge_end (src, Galois::MethodFlag::NONE); e != ende; ++e) { - - unsigned dst = ingraph.getData (ingraph.getEdgeDst (e), Galois::MethodFlag::NONE); - - if (src != dst) { - const Weight_ty& w = ingraph.getEdgeData (e); - InEdge ke (src, dst, w); - - std::pair res = edgeSet.insert (ke); - - if (res.second) { - ++numEdges; - } else if (w < res.first->weight) { - edgeSet.insert (edgeSet.erase (res.first), ke); - } - } else { - Galois::Runtime::LL::gDebug("Warning: Ignoring self edge (", - src, ",", dst, ",", ingraph.getEdgeData (*e), ")"); - } - } - - } - - std::cout << "Graph read with nodes=" << ingraph.size () << ", edges=" << numEdges << std::endl; - - - } - - - - virtual void readPBBSfile (const std::string& filename, size_t& numNodes, SetInEdge& edgeSet) { - - typedef unsigned NodeData; - typedef float EdgeData; - - static const unsigned WEIGHT_SCALE = 1000000; - - - std::cout << "Reading input from: " << filename << std::endl; - - // std::ifstream inFile (filename.c_str ()); - FILE* inFile = fopen (filename.c_str (), "r"); - - // std::string header; - char header[128]; - - // inFile >> header; - fscanf (inFile, "%s", header); - - // inFile.seekg (0, std::ios::beg); - - - size_t numEdges = 0; - numNodes = 0; - edgeSet.clear (); - - // while (!inFile.eof ()) { - while (!feof (inFile)) { - NodeData srcIdx; - NodeData dstIdx; - EdgeData w; - - // inFile >> srcIdx; - // inFile >> dstIdx; - // inFile >> w; - - fscanf (inFile, "%d", &srcIdx); - fscanf (inFile, "%d", &dstIdx); - fscanf (inFile, "%g", &w); - - Weight_ty integ_wt = (WEIGHT_SCALE * w); - - if (srcIdx != dstIdx) { - - InEdge ke (srcIdx, dstIdx, integ_wt); - - std::pair res = edgeSet.insert (ke); - //edges.push_back (ke); - if (res.second) { - ++numEdges; - } else if (integ_wt < res.first->weight) { - edgeSet.insert (edgeSet.erase (res.first), ke); - } - - } else { - std::fprintf (stderr, "Warning: Ignoring self edge (%d, %d, %d)\n", - srcIdx, dstIdx, integ_wt); - } - - // find max node id; - numNodes = std::max (numNodes, size_t (std::max (srcIdx, dstIdx))); - } - // inFile.close (); - fclose (inFile); - - ++numNodes; // nodes number from 0 ... N-1 - - - std::cout << "PBBS graph read with nodes = " << numNodes - << ", edges = " << numEdges << std::endl; - - - } - - - void writePBBSfile (const std::string& filename, const SetInEdge& edgeSet) { - - FILE* outFile = std::fopen (filename.c_str (), "w"); - assert (outFile != NULL); - - fprintf (outFile, "WeightedEdgeArray\n"); - - for (SetInEdge::const_iterator i = edgeSet.begin () - , endi = edgeSet.end (); i != endi; ++i) { - - fprintf (outFile, "%d %d %e\n", i->src, i->dst, double (i->weight)); - } - - fclose (outFile); - } - - -public: - - virtual void run (int argc, char* argv[]) { - Galois::StatManager stat; - LonestarStart (argc, argv, name, desc, url); - - size_t numNodes; - SetInEdge edgeSet; - - size_t mstWeight = 0; - size_t totalIter = 0; - - Galois::StatTimer t_read ("InitializeTime"); - - t_read.start (); - readGraph (filename, numNodes, edgeSet); - // readPBBSfile (filename, numNodes, edgeSet); - - - // writePBBSfile ("edgeList.pbbs", edgeSet); - // std::exit (0); - - VecEdge edges; - - unsigned edgeIDcntr = 0; - for (SetInEdge::const_iterator i = edgeSet.begin () - , endi = edgeSet.end (); i != endi; ++i) { - - edges.push_back (Edge (edgeIDcntr++, i->src, i->dst, i->weight)); - } - - assert (edges.size () == edgeSet.size ()); - t_read.stop (); - - - initRemaining (numNodes, edges); - - // pre allocate memory from OS for parallel runs - Galois::preAlloc (numPages*Galois::getActiveThreads ()); - - Galois::StatTimer t; - - t.start (); - // GaloisRuntime::beginSampling (); - runMST (numNodes, edges, mstWeight, totalIter); - // GaloisRuntime::endSampling (); - t.stop (); - - printResults (mstWeight, totalIter); - - if (!skipVerify) { - verify (numNodes, edgeSet, mstWeight); - } - - } - -private: - - void printResults (const size_t mstSum, const size_t iter) const { - std::cout << getVersion () << ", MST sum=" << mstSum << ", iterations=" << iter << std::endl; - } - - template - static void freeVecPtr (std::vector& vec) { - for (typename std::vector::iterator i = vec.begin (), ei = vec.end (); - i != ei; ++i) { - - delete *i; - *i = NULL; - } - - } - - struct PrimNode { - typedef std::vector VecPNode_ty; - typedef std::vector VecWeight_ty; - typedef boost::counting_iterator Adj_iterator_ty; - - - unsigned id; - Weight_ty weight; - bool inMST; - VecPNode_ty adj; - VecWeight_ty adjWts; - - - PrimNode (unsigned id): - id (id), - weight (std::numeric_limits::max ()), - inMST (false) {} - - void addEdge (PrimNode* pn, Weight_ty w) { - assert (pn != NULL); - assert (std::find (adj.begin (), adj.end (), pn) == adj.end ()); - - adj.push_back (pn); - adjWts.push_back (w); - assert (adj.size () == adjWts.size ()); - } - - Adj_iterator_ty adj_begin () const { - return Adj_iterator_ty (0); - } - - Adj_iterator_ty adj_end () const { - return Adj_iterator_ty (adj.size ()); - } - - PrimNode* getDst (Adj_iterator_ty i) const { - return adj[*i]; - } - - Weight_ty getWeight (Adj_iterator_ty i) const { - return adjWts[*i]; - } - - }; - - struct PrimUpdate { - PrimNode* dst; - Weight_ty weight; - - PrimUpdate (PrimNode* dst, Weight_ty weight) - : dst (dst), weight (weight) { - assert (dst != NULL); - } - - - bool operator < (const PrimUpdate& that) const { - if (this->weight == that.weight) { - return (this->dst->id < that.dst->id); - - } else { - return (this->weight < that.weight); - } - } - }; - - size_t runPrim (const size_t numNodes, const SetInEdge& edgeSet) const { - - std::vector primNodes (numNodes, NULL); - - for (size_t i = 0; i < numNodes; ++i) { - primNodes[i] = new PrimNode (i); - } - - for (SetInEdge::const_iterator e = edgeSet.begin (), ende = edgeSet.end (); - e != ende; ++e) { - - - assert (primNodes[e->src] != NULL); - assert (primNodes[e->dst] != NULL); - - // add undirected edge - primNodes[e->src]->addEdge (primNodes[e->dst], e->weight); - primNodes[e->dst]->addEdge (primNodes[e->src], e->weight); - - } - - std::set workset; - - PrimNode* root = primNodes[0]; - PrimUpdate upd (root, 0); - workset.insert (upd); - - - size_t iter = 0; - size_t mstSum = 0; - - while (!workset.empty ()) { - ++iter; - PrimUpdate upd = *(workset.begin ()); - workset.erase (workset.begin ()); - - PrimNode& src = *(upd.dst); - - if (!src.inMST) { - src.inMST = true; - src.weight = upd.weight; - - mstSum += upd.weight; - - for (PrimNode::Adj_iterator_ty i = src.adj_begin (), endi = src.adj_end (); i != endi; ++i) { - - PrimNode& dst = *(src.getDst (i)); - Weight_ty wt = src.getWeight (i); - - if (!dst.inMST) { - PrimUpdate addUpd (&dst, wt); - workset.insert (addUpd); - } - - } - } // end if; - } - - std::cout << "Number of iterations taken by Prim = " << iter << std::endl; - - freeVecPtr (primNodes); - return mstSum; - } - - - - - - bool verify (const size_t numNodes, const SetInEdge& edgeSet, const size_t kruskalSum) const { - Galois::StatTimer pt("PrimTime"); - pt.start (); - size_t primSum = runPrim (numNodes, edgeSet); - pt.stop (); - - if (primSum != kruskalSum) { - std::cerr << "ERROR. Incorrect MST weight=" << kruskalSum - << ", weight computed by Prim is=" << primSum << std::endl; - abort (); - - } else { - std::cout << "OK. Correct MST weight=" << kruskalSum << std::endl; - } - - return false; - } -}; - - -} // namespace kruskal - - -#endif // _KRUSKAL_H_ diff --git a/maxflow/galois/apps/kruskal/KruskalHand.cpp b/maxflow/galois/apps/kruskal/KruskalHand.cpp deleted file mode 100644 index a1f926f..0000000 --- a/maxflow/galois/apps/kruskal/KruskalHand.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/** Parallel Handwritten Kruskal -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Parallel Handwritten version of Kruskal. - * - * @author - */ - -#include "KruskalHand.h" - -int main (int argc, char* argv[]) { - kruskal::KruskalHand k; - k.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/kruskal/KruskalHand.h b/maxflow/galois/apps/kruskal/KruskalHand.h deleted file mode 100644 index 98d8ee2..0000000 --- a/maxflow/galois/apps/kruskal/KruskalHand.h +++ /dev/null @@ -1,64 +0,0 @@ -/** Kruskal MST -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Kruskal MST. - * - * @author - */ - -#ifndef KRUSKAL_HAND_H -#define KRUSKAL_HAND_H - -#include "Kruskal.h" -#include "KruskalParallel.h" - -static cll::opt maxRounds ( - "maxRounds", - cll::desc ("number of rounds for window executor"), - cll::init (600)); - -static cll::opt lowThresh ( - "lowThresh", - cll::desc ("low parallelism factor for workList refill in window executor"), - cll::init (16)); - -namespace kruskal { - - -class KruskalHand: public Kruskal { - protected: - - virtual const std::string getVersion () const { return "Handwritten using window-based two-phase union-find"; } - - virtual void runMST (const size_t numNodes, const VecEdge& edges, - size_t& mstWeight, size_t& totalIter) { - - runMSTsimple (numNodes, edges, mstWeight, totalIter, UnionFindWindow (maxRounds, lowThresh)); - - } -}; - - -}// end namespace kruskal - -#endif // KRUSKAL_HAND_H - diff --git a/maxflow/galois/apps/kruskal/KruskalOrdered.cpp b/maxflow/galois/apps/kruskal/KruskalOrdered.cpp deleted file mode 100644 index 7eb3f56..0000000 --- a/maxflow/galois/apps/kruskal/KruskalOrdered.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/** Parallel Kruskal -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Parallel version of Kruskal. - * - * @author - */ - -#include "KruskalOrdered.h" - -int main (int argc, char* argv[]) { - kruskal::KruskalOrdered k; - k.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/kruskal/KruskalOrdered.h b/maxflow/galois/apps/kruskal/KruskalOrdered.h deleted file mode 100644 index 96010da..0000000 --- a/maxflow/galois/apps/kruskal/KruskalOrdered.h +++ /dev/null @@ -1,107 +0,0 @@ -/** Kruskal MST -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Kruskal MST. - * - * @author - */ - -#ifndef KRUSKAL_ORDERED_H -#define KRUSKAL_ORDERED_H - -#include "Kruskal.h" -#include "KruskalParallel.h" - -namespace kruskal { - - -struct UnionFindUsingRuntime { - void operator () ( - EdgeCtxWL& perThrdWL, - VecRep_ty& repVec, - VecAtomicCtxPtr& repOwnerCtxVec, - size_t& mstWeight, - size_t& totalIter, - Galois::TimeAccumulator& sortTimer, - Galois::TimeAccumulator& findTimer, - Galois::TimeAccumulator& linkUpTimer, - Accumulator& findIter, - Accumulator& linkUpIter) const { - - - EdgeCtxWL* nextWL = NULL; // not used actually - Accumulator mstSum; - - Galois::for_each_ordered (perThrdWL.begin_all (), perThrdWL.end_all (), - Edge::Comparator (), - FindLoop (repVec, repOwnerCtxVec, findIter), - LinkUpLoop (repVec, repOwnerCtxVec, *nextWL, mstSum, linkUpIter)); - - - totalIter += findIter.reduce (); - mstWeight += mstSum.reduce (); - - - } -}; - - - -class KruskalOrdered: public Kruskal { - protected: - - virtual const std::string getVersion () const { return "Parallel Kruskal using Ordered Runtime"; } - - virtual void runMST (const size_t numNodes, const VecEdge& edges, - size_t& mstWeight, size_t& totalIter) { - - runMSTsimple (numNodes, edges, mstWeight, totalIter, UnionFindUsingRuntime ()); - - } -}; - - - - - - - - - - - - - - - - - - - - -}// end namespace kruskal - - - - -#endif // KRUSKAL_ORDERED_H - diff --git a/maxflow/galois/apps/kruskal/KruskalParallel.h b/maxflow/galois/apps/kruskal/KruskalParallel.h deleted file mode 100644 index 6c83524..0000000 --- a/maxflow/galois/apps/kruskal/KruskalParallel.h +++ /dev/null @@ -1,601 +0,0 @@ -/** Kruskal MST -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Kruskal MST. - * - * @author - */ - -#ifndef KRUSKAL_PARALLEL_H -#define KRUSKAL_PARALLEL_H - -#include "Galois/Atomic.h" -#include "Galois/Accumulator.h" -#include "Galois/Runtime/PerThreadWorkList.h" -#include "Galois/Runtime/ll/CompilerSpecific.h" - -#include "Kruskal.h" - -namespace kruskal { - -struct EdgeCtx; - -typedef VecRep VecRep_ty; - -typedef Galois::Runtime::PerThreadVector EdgeWL; -typedef Galois::Runtime::PerThreadVector EdgeCtxWL; -typedef Galois::Runtime::MM::FSBGaloisAllocator EdgeCtxAlloc; -typedef Edge::Comparator Cmp; -typedef Galois::GAccumulator Accumulator; - -// typedef Galois::GAtomicPadded AtomicCtxPtr; -typedef Galois::GAtomic AtomicCtxPtr; -typedef std::vector VecAtomicCtxPtr; - -static const int NULL_EDGE_ID = -1; - -struct EdgeCtx: public Edge { - - bool srcFail; - bool dstFail; - - EdgeCtx (const Edge& e) - : Edge (e) - , srcFail (false), dstFail(false) - {} - - void setFail (const int rep) { - assert (rep != NULL_EDGE_ID); - - if (rep == src) { - srcFail = true; - - } else if (rep == dst) { - dstFail = true; - - } else { - abort (); - } - } - - void resetStatus () { - srcFail = false; - dstFail = false; - } - - bool isSrcFail () const { - return srcFail; - } - - bool isDstFail () const { - return dstFail; - } - - bool isSelf () const { - return src == dst; - } - - bool statusIsReset () const { - return (!srcFail && !dstFail); - } - -}; - - -struct FindLoop { - - // typedef char tt_does_not_need_push; - - VecRep_ty& repVec; - VecAtomicCtxPtr& repOwnerCtxVec; - Accumulator& findIter; - - FindLoop ( - VecRep_ty& repVec, - VecAtomicCtxPtr& repOwnerCtxVec, - Accumulator& findIter) - : - repVec (repVec), - repOwnerCtxVec (repOwnerCtxVec), - findIter (findIter) - {} - - GALOIS_ATTRIBUTE_PROF_NOINLINE void claimAsMin (EdgeCtx& ctx, const int rep) { - - - bool succ = repOwnerCtxVec[rep].cas (NULL, &ctx); - - assert (repOwnerCtxVec[rep] != NULL); - - if (!succ) { - for (EdgeCtx* curr = repOwnerCtxVec[rep]; - Cmp::compare (*curr, ctx) > 0; curr = repOwnerCtxVec[rep]) { - - assert (curr != NULL); - succ = repOwnerCtxVec[rep].cas (curr, &ctx); - - if (succ) { - curr->setFail (rep); - assert (Cmp::compare (*repOwnerCtxVec[rep], ctx) <= 0); - break; - } - } - } - - if (!succ) { - ctx.setFail (rep); - } - - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx) { - findIter += 1; - - assert (ctx.statusIsReset ()); - - ctx.src = kruskal::findPCiter_int (ctx.src, repVec); - ctx.dst = kruskal::findPCiter_int (ctx.dst, repVec); - - if (ctx.src != ctx.dst) { - claimAsMin (ctx, ctx.src); - claimAsMin (ctx, ctx.dst); - - } - - } - - template - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx, C&) { - (*this) (ctx); - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx) const { - const_cast(this)->operator () (ctx); - } -}; - -template -struct LinkUpLoop { - // typedef char tt_does_not_need_push; - - VecRep_ty& repVec; - VecAtomicCtxPtr& repOwnerCtxVec; - EdgeCtxWL& nextWL; - Accumulator& mstSum; - Accumulator& linkUpIter; - - LinkUpLoop ( - VecRep_ty& repVec, - VecAtomicCtxPtr& repOwnerCtxVec, - EdgeCtxWL& nextWL, - Accumulator& mstSum, - Accumulator& linkUpIter) - : - repVec (repVec), - repOwnerCtxVec (repOwnerCtxVec), - nextWL (nextWL), - mstSum (mstSum), - linkUpIter (linkUpIter) - {} - - - GALOIS_ATTRIBUTE_PROF_NOINLINE bool updateODG_test (EdgeCtx& ctx, const int rep) { - assert (rep >= 0 && size_t (rep) < repOwnerCtxVec.size ()); - - if (usingOrderedRuntime) { - return ((EdgeCtx*) repOwnerCtxVec[rep])->id == ctx.id; - - } else { - return (repOwnerCtxVec[rep] == &ctx); - } - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE void updateODG_reset (EdgeCtx& ctx, const int rep) { - - assert (rep >= 0 && size_t (rep) < repOwnerCtxVec.size ()); - assert (updateODG_test (ctx, rep)); - - repOwnerCtxVec[rep] = NULL; - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx) { - - - if (!ctx.isSelf () ) { - - - if (ctx.isSrcFail () && ctx.isDstFail ()) { - - ctx.resetStatus (); - - if (usingOrderedRuntime) { - Galois::Runtime::signalConflict (NULL); - - } else { - nextWL.get ().push_back (ctx); - } - - } else { - - - if (!ctx.isSrcFail ()) { - assert (updateODG_test (ctx, ctx.src)); - linkUp_int (ctx.src, ctx.dst, repVec); - - } else if (!ctx.isDstFail ()) { - assert (updateODG_test (ctx, ctx.dst)); - linkUp_int (ctx.dst, ctx.src, repVec); - } - - linkUpIter += 1; - mstSum += ctx.weight; - - if (!ctx.isSrcFail ()) { - updateODG_reset (ctx, ctx.src); - } - - if (!ctx.isDstFail ()) { - updateODG_reset (ctx, ctx.dst); - } - - } // end else - } - - - - } - - template - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx, C&) { - (*this) (ctx); - } - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (EdgeCtx& ctx) const { - const_cast(this)->operator () (ctx); - } -}; - -template -struct PreSort { - WL& wl; - - PreSort (WL& wl): wl (wl) {} - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (unsigned tid, unsigned numT) { - assert (tid < wl.numRows ()); - for (unsigned i = numT; i < wl.numRows (); ++i) { - assert (wl[i].empty ()); - } - std::sort (wl[tid].begin (), wl[tid].end (), Cmp ()); - } -}; - -template -void presort (WL& wl, Galois::TimeAccumulator& sortTimer) { - sortTimer.start (); - Galois::on_each (PreSort (wl), "pre_sort"); - sortTimer.stop (); -} - -template -struct Range { - typedef typename std::iterator_traits::difference_type difference_type; - typedef typename std::iterator_traits::value_type value_type; - - typedef Galois::Runtime::PerThreadStorage PTS; - - Iter m_beg; - Iter m_end; - - Range (): m_beg (), m_end () {} - - Range (Iter b, Iter e): m_beg (b), m_end (e) {} - - // TODO: improve for non-random iterators - const value_type* atOffset (difference_type d) { - if (m_beg == m_end) { - return NULL; - - } else { - if (d >= std::distance (m_beg, m_end)) { - d = std::distance (m_beg, m_end) - 1; - } - Iter i (m_beg); - std::advance (i, d); - return &(*i); - } - } -}; - - - - -template -struct RefillWorkList { - - const T* windowLimit; - typename Range::PTS& ranges; - WL& wl; - - RefillWorkList ( - const T* windowLimit, - typename Range::PTS& ranges, - WL& wl) - : - windowLimit (windowLimit), - ranges (ranges), - wl (wl) - {} - - void operator () (unsigned tid, unsigned numT) { - - assert (tid < ranges.size ()); - - for (unsigned i = numT; i < ranges.size (); ++i) { - Range& r = *ranges.getRemote (i); - assert (r.m_beg == r.m_end); - } - - Range& r = *ranges.getRemote (tid); - - for (;r.m_beg != r.m_end; ++r.m_beg) { - if (Cmp::compare (*r.m_beg, *windowLimit) <= 0) { - wl.get ().push_back (*r.m_beg); - } else { - break; - } - } - } // end method -}; - - -template -void refillWorkList (WL& wl, typename Range::PTS& ranges, const size_t windowSize, const size_t numT) { - - - typedef typename Range::value_type T; - - size_t perThrdSize = windowSize / numT; - - const T* windowLimit = NULL; - - for (unsigned i = 0; i < numT; ++i) { - assert (i < ranges.size ()); - const T* lim = ranges.getRemote (i)->atOffset (perThrdSize); - - if (lim != NULL) { - if (windowLimit == NULL || (Cmp::compare (*lim, *windowLimit) > 0)) { - windowLimit = lim; - } - } - } - - Galois::Runtime::LL::gDebug("size before refill: ", wl.size_all ()); - - if (windowLimit != NULL) { - Galois::Runtime::LL::gDebug("new window limit: ", windowLimit->str ().c_str ()); - - Galois::on_each (RefillWorkList (windowLimit, ranges, wl), "refill"); - - for (unsigned i = 0; i < ranges.size (); ++i) { - Range& r = *ranges.getRemote (i); - - if (r.m_beg != r.m_end) { - // assuming that ranges are sorted - // after refill, the first element in each range should be bigger - // than windowLimit - assert (Cmp::compare (*r.m_beg, *windowLimit) > 0); - } - } - } else { - - for (unsigned i = 0; i < ranges.size (); ++i) { - Range& r = *ranges.getRemote (i); - assert (r.m_beg == r.m_end); - } - - - } - - Galois::Runtime::LL::gDebug("size after refill: ", wl.size_all ()); -} - - -struct UnionFindWindow { - - size_t maxRounds; - size_t lowThresh; - - UnionFindWindow (): maxRounds (64), lowThresh (2) {} - - UnionFindWindow (size_t maxRounds, size_t lowThresh) - : maxRounds (maxRounds), lowThresh (lowThresh) - {} - - void operator () ( - EdgeCtxWL& perThrdWL, - VecRep_ty& repVec, - VecAtomicCtxPtr& repOwnerCtxVec, - size_t& mstWeight, - size_t& totalIter, - Galois::TimeAccumulator& sortTimer, - Galois::TimeAccumulator& findTimer, - Galois::TimeAccumulator& linkUpTimer, - Accumulator& findIter, - Accumulator& linkUpIter) const { - - - typedef EdgeCtxWL::local_iterator Iter; - typedef Range Range_ty; - typedef Range_ty::PTS PerThrdRange; - typedef Range_ty::difference_type Diff_ty; - - PerThrdRange ranges; - - presort (perThrdWL, sortTimer); - - for (unsigned i = 0; i < ranges.size (); ++i) { - *ranges.getRemote (i) = Range_ty (perThrdWL[i].begin (), perThrdWL[i].end ()); - } - - - const size_t numT = Galois::getActiveThreads (); - - const size_t totalDist = perThrdWL.size_all (); - const size_t windowSize = totalDist / maxRounds; - - const size_t lowThreshSize = windowSize / lowThresh; - - unsigned round = 0; - size_t numUnions = 0; - Accumulator mstSum; - - EdgeCtxWL* currWL = new EdgeCtxWL (); - EdgeCtxWL* nextWL = new EdgeCtxWL (); - - while (true) { - ++round; - std::swap (nextWL, currWL); - nextWL->clear_all (); - - if (currWL->size_all () <= lowThreshSize) { - // size_t s = lowThreshSize - currWL->size_all () + 1; - sortTimer.start (); - refillWorkList (*currWL, ranges, windowSize, numT); - sortTimer.stop (); - } - - if (currWL->empty_all ()) { - break; - } - - // Galois::Runtime::beginSampling (); - findTimer.start (); - Galois::do_all (currWL->begin_all (), currWL->end_all (), - FindLoop (repVec, repOwnerCtxVec, findIter), - Galois::loopname("find_loop")); - findTimer.stop (); - // Galois::Runtime::endSampling (); - - - // Galois::Runtime::beginSampling (); - linkUpTimer.start (); - Galois::do_all (currWL->begin_all (), currWL->end_all (), - LinkUpLoop (repVec, repOwnerCtxVec, *nextWL, mstSum, linkUpIter), - Galois::loopname("link_up_loop")); - linkUpTimer.stop (); - // Galois::Runtime::endSampling (); - - int u = linkUpIter.reduce () - numUnions; - numUnions = linkUpIter.reduce (); - - if (!nextWL->empty_all ()) { - assert (u > 0 && "no unions, no progress?"); - } - - } - - totalIter += findIter.reduce (); - mstWeight += mstSum.reduce (); - - std::cout << "Number of rounds: " << round << std::endl; - - delete currWL; - delete nextWL; - - } - -}; - - -struct FillUp { - EdgeCtxWL& wl; - - explicit FillUp (EdgeCtxWL& wl): wl (wl) {} - - GALOIS_ATTRIBUTE_PROF_NOINLINE void operator () (const Edge& edge) { - wl.get ().push_back (edge); - } -}; - - - -template -void runMSTsimple (const size_t numNodes, const VecEdge& edges, - size_t& mstWeight, size_t& totalIter, UF ufLoop) { - - totalIter = 0; - mstWeight = 0; - - Galois::TimeAccumulator runningTime; - Galois::TimeAccumulator sortTimer; - Galois::TimeAccumulator findTimer; - Galois::TimeAccumulator linkUpTimer; - Galois::TimeAccumulator fillUpTimer; - - Accumulator findIter; - Accumulator linkUpIter; - - - VecRep_ty repVec (numNodes, -1); - VecAtomicCtxPtr repOwnerCtxVec (numNodes, AtomicCtxPtr (NULL)); - - - fillUpTimer.start (); - EdgeCtxWL initWL; - unsigned numT = Galois::getActiveThreads (); - for (unsigned i = 0; i < numT; ++i) { - initWL[i].reserve ((edges.size () + numT - 1) / numT); - } - - Galois::do_all (edges.begin (), edges.end (), FillUp (initWL), Galois::loopname("fill_init")); - - fillUpTimer.stop (); - - runningTime.start (); - - ufLoop (initWL, repVec, repOwnerCtxVec, - mstWeight, totalIter, sortTimer, findTimer, linkUpTimer, findIter, linkUpIter); - - runningTime.stop (); - - std::cout << "Number of FindLoop iterations = " << findIter.reduce () << std::endl; - std::cout << "Number of LinkUpLoop iterations = " << linkUpIter.reduce () << std::endl; - - std::cout << "MST running time without initialization/destruction: " << runningTime.get () << std::endl; - std::cout << "Time taken by sortTimer: " << sortTimer.get () << std::endl; - std::cout << "Time taken by FindLoop: " << findTimer.get () << std::endl; - std::cout << "Time taken by LinkUpLoop: " << linkUpTimer.get () << std::endl; - std::cout << "Time taken by FillUp: " << fillUpTimer.get () << std::endl; - -} - - - - - -}// end namespace kruskal - - - -#endif // KRUSKAL_PARALLEL_H - diff --git a/maxflow/galois/apps/kruskal/KruskalSerial.cpp b/maxflow/galois/apps/kruskal/KruskalSerial.cpp deleted file mode 100644 index a3ca583..0000000 --- a/maxflow/galois/apps/kruskal/KruskalSerial.cpp +++ /dev/null @@ -1,34 +0,0 @@ -/** Serial Kruskal -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Serial version of Kruskal. - * - * @author - */ - -#include "KruskalSerial.h" - -int main (int argc, char* argv[]) { - kruskal::KruskalSerial ks; - ks.run (argc, argv); - return 0; -} diff --git a/maxflow/galois/apps/kruskal/KruskalSerial.h b/maxflow/galois/apps/kruskal/KruskalSerial.h deleted file mode 100644 index de60fd1..0000000 --- a/maxflow/galois/apps/kruskal/KruskalSerial.h +++ /dev/null @@ -1,199 +0,0 @@ -/** Kruskal Serial ordered version -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Kruskal Serial ordered version. - * - * @author - */ - -#ifndef KRUSKAL_SERIAL_H_ -#define KRUSKAL_SERIAL_H_ - -#include "Kruskal.h" - -namespace kruskal { -class KruskalSerial: public Kruskal { -protected: -// #define EDGE_FRAC (4/3); - - virtual const std::string getVersion () const { return "Serial Ordered Kruskal"; } - - - virtual void runMSTsplit (const size_t numNodes, const VecEdge& in_edges, - size_t& mstWeight, size_t& totalIter) { - - Galois::TimeAccumulator t_run; - Galois::TimeAccumulator t_init; - Galois::TimeAccumulator t_sort; - Galois::TimeAccumulator t_loop; - - t_init.start (); - VecEdge edges (in_edges); - VecRep repVec (numNodes, -1); - t_init.stop (); - - t_run.start (); - - // size_t splitSize = EDGE_FRAC * numNodes; - size_t splitSize = numNodes; - - t_sort.start (); - VecEdge::iterator splitPoint = edges.begin () + splitSize; - - std::nth_element (edges.begin (), splitPoint, edges.end (), Edge::Comparator ()); - - std::sort (edges.begin (), splitPoint, Edge::Comparator ()); - t_sort.stop (); - - - size_t mstSum = 0; - size_t iter = 0; - - t_loop.start (); - for (VecEdge::const_iterator i = edges.begin (), ei = splitPoint; - i != ei; ++i) { - - ++iter; - - int rep1 = findPCiter_int (i->src, repVec); - int rep2 = findPCiter_int (i->dst, repVec); - - if (rep1 != rep2) { - unionByRank_int (rep1, rep2, repVec); - - mstSum += i->weight; - } - - } - - VecEdge remaining; - - for (VecEdge::const_iterator i = splitPoint, ei = edges.end (); - i != ei; ++i) { - - int rep1 = findPCiter_int (i->src, repVec); - int rep2 = findPCiter_int (i->dst, repVec); - - if (rep1 != rep2) { - remaining.push_back (*i); - } - } - t_loop.stop (); - - std::cout << "Number of remaining edges needing to be processed: " << remaining.size () << std::endl; - - t_sort.start (); - std::sort (remaining.begin (), remaining.end (), Edge::Comparator ()); - t_sort.stop (); - - t_loop.start (); - for (VecEdge::const_iterator i = remaining.begin (), ei = remaining.end (); - i != ei; ++i) { - - ++iter; - - int rep1 = findPCiter_int (i->src, repVec); - int rep2 = findPCiter_int (i->dst, repVec); - - if (rep1 != rep2) { - unionByRank_int (rep1, rep2, repVec); - - mstSum += i->weight; - } - - } - t_loop.stop (); - - - mstWeight = mstSum; - totalIter = iter; - - t_run.stop (); - - std::cout << "Running time excluding initialization and destruction: " << t_run.get () << std::endl; - std::cout << "Initialization time: " << t_init.get () << std::endl; - std::cout << "Sorting time: " << t_sort.get () << std::endl; - std::cout << "Loop time: " << t_loop.get () << std::endl; - - } - - virtual void runMSTsimple (const size_t numNodes, const VecEdge& in_edges, - size_t& mstWeight, size_t& totalIter) { - - - Galois::StatTimer t_run("Running time excluding initialization & destruction: "); - Galois::StatTimer t_init("initialization time: "); - Galois::StatTimer t_sort("serial sorting time: "); - Galois::StatTimer t_loop("serial loop time: "); - - t_init.start (); - VecRep repVec (numNodes, -1); - VecEdge edges (in_edges); - t_init.stop (); - - - - t_run.start (); - - t_sort.start (); - std::sort (edges.begin (), edges.end (), Edge::Comparator ()); - t_sort.stop (); - - - size_t mstSum = 0; - size_t iter = 0; - - t_loop.start (); - for (VecEdge::const_iterator i = edges.begin (), ei = edges.end (); - i != ei; ++i) { - - ++iter; - - int rep1 = findPCiter_int (i->src, repVec); - int rep2 = findPCiter_int (i->dst, repVec); - - if (rep1 != rep2) { - unionByRank_int (rep1, rep2, repVec); - - mstSum += i->weight; - } - - } - - mstWeight = mstSum; - totalIter = iter; - - t_loop.stop (); - - t_run.stop (); - } - - virtual void runMST (const size_t numNodes, const VecEdge& edges, - size_t& mstWeight, size_t& totalIter) { - - runMSTsplit (numNodes, edges, mstWeight, totalIter); - } - -}; - -} // end namespace kruskal -#endif // KRUSKAL_SERIAL_H_ diff --git a/maxflow/galois/apps/matching/CMakeLists.txt b/maxflow/galois/apps/matching/CMakeLists.txt deleted file mode 100644 index dacc694..0000000 --- a/maxflow/galois/apps/matching/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(bipartite-mcm bipartite-mcm.cpp) diff --git a/maxflow/galois/apps/matching/bipartite-mcm.cpp b/maxflow/galois/apps/matching/bipartite-mcm.cpp deleted file mode 100644 index 3c34ab8..0000000 --- a/maxflow/galois/apps/matching/bipartite-mcm.cpp +++ /dev/null @@ -1,1099 +0,0 @@ -/** Maximum Cardinality Matching in Bipartite Graphs -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Maximum cardinality matching in bipartite graphs. For more information see - * - * K. Mehlhorn and S. Naeher. LEDA: A Platform for Combinatorial and Geometric - * Computing. Cambridge University Press, 1999. - * - * @author Donald Nguyen - */ - -// TODO(ddn): Needs a graph implementation that supports reversing edges more efficiently - -#include "Galois/Galois.h" -#include "Galois/Timer.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Graph/FileGraph.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" -#ifdef GALOIS_USE_EXP -#include "Galois/PriorityScheduling.h" -#endif - -#include -#include -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Maximum cardinality matching in bipartite graphs"; -static const char* desc = - "Computes maximum cardinality matching in bipartite graphs. " - "A matching of G is a subset of edges that do not share an endpoint. " - "The maximum cardinality matching is the matching with the most number of edges."; -static const char* url = "bipartite_mcm"; - -enum MatchingAlgo { - pfpAlgo, - ffAlgo, - abmpAlgo -}; - -enum ExecutionType { - serial, - parallel -}; - -static cll::opt algo(cll::desc("Choose an algorithm:"), - cll::values( - clEnumVal(pfpAlgo, "Preflow-push"), - clEnumVal(ffAlgo, "Ford-Fulkerson augmenting paths"), - clEnumVal(abmpAlgo, "Alt-Blum-Mehlhorn-Paul"), - clEnumValEnd), cll::init(abmpAlgo)); -static cll::opt executionType(cll::desc("Choose execution type:"), - cll::values( - clEnumVal(serial, "Serial"), - clEnumVal(parallel, "Parallel"), - clEnumValEnd), cll::init(parallel)); -static cll::opt N(cll::Positional, cll::desc(""), cll::Required); -static cll::opt numEdges(cll::Positional, cll::desc(""), cll::Required); -static cll::opt numGroups(cll::Positional, cll::desc(""), cll::Required); -static cll::opt seed(cll::Positional, cll::desc(""), cll::Required); - -template -struct BipartiteGraph: public Galois::Graph::FirstGraph { - typedef Galois::Graph::FirstGraph Super; - typedef std::vector NodeList; - typedef NodeTy node_type; - typedef EdgeTy edge_type; - - NodeList A; - NodeList B; - - void addNode(const typename Super::GraphNode& n, bool isA, Galois::MethodFlag mflag = Galois::MethodFlag::ALL) { - if (isA) { - A.push_back(n); - } else { - B.push_back(n); - } - Super::addNode(n, mflag); - } - - void addNode(const typename Super::GraphNode& n, Galois::MethodFlag mflag = Galois::MethodFlag::ALL) { - Super::addNode(n, mflag); - } -}; - -//******************************** Common ************************ - -template class Algo> -struct Exists { - bool operator()(G& g, const typename G::edge_iterator&) { return true; } -}; - -template -struct GraphTypes { - typedef typename G::GraphNode GraphNode; - typedef std::pair Edge; - typedef std::vector Matching; -}; - -struct BaseNode { - size_t id; - int degree; - bool covered; - bool free; - bool reachable; // for preparing node cover - BaseNode(): id(-1) { } - BaseNode(size_t i): id(i), degree(0), covered(false), free(true), reachable(false) { } -}; - -template -struct MarkReachable { - typedef typename G::GraphNode GraphNode; - typedef typename G::edge_iterator edge_iterator; - - void operator()(G& g, const GraphNode& root) { - std::deque queue; - queue.push_back(root); - - while (!queue.empty()) { - GraphNode cur = queue.front(); - queue.pop_front(); - if (g.getData(cur).reachable) - continue; - g.getData(cur).reachable = true; - for (edge_iterator ii = g.edge_begin(cur), ei = g.edge_end(cur); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - queue.push_back(dst); - } - } - } -}; - -template class Algo> -struct PrepareForVerifier { - typedef typename GraphTypes::Edge Edge; - typedef typename GraphTypes::Matching Matching; - typedef typename G::GraphNode GraphNode; - typedef typename G::NodeList NodeList; - typedef typename G::node_type node_type; - typedef typename G::edge_iterator edge_iterator; - - void operator()(G& g, Matching* matching) { - Exists exists; - - for (typename NodeList::iterator src = g.B.begin(), esrc = g.B.end(); src != esrc; ++src) { - for (edge_iterator ii = g.edge_begin(*src), ei = g.edge_end(*src); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - if (exists(g, ii)) { - matching->push_back(Edge(*src, dst)); - } - } - } - - for (typename NodeList::iterator ii = g.A.begin(), ei = g.A.end(); ii != ei; ++ii) { - if (g.getData(*ii).free) - MarkReachable()(g, *ii); - } - - for (typename Matching::iterator ii = matching->begin(), ei = matching->end(); ii != ei; ++ii) { - if (g.getData(ii->first).reachable) { - // Reachable from a free node in A - g.getData(ii->first).covered = true; - } else { - g.getData(ii->second).covered = true; - } - } - } -}; - -//********************** FF Algorithm ************************** - -struct FFNode: public BaseNode { - int pred; - bool reached; - FFNode(): BaseNode() { } - FFNode(size_t i): BaseNode(i), pred(-1), reached(false) { } -}; - -//! Switch between concurrent and serial instances -template struct InstanceWrapper; -template -struct InstanceWrapper { - T1& m_t1; - T2& m_t2; - typedef T2 Type; - InstanceWrapper(T1& t1, T2& t2): m_t1(t1), m_t2(t2) { } - T2& get() { return m_t2; } -}; -template -struct InstanceWrapper { - T1& m_t1; - T2& m_t2; - typedef T1 Type; - InstanceWrapper(T1& t1, T2& t2): m_t1(t1), m_t2(t2) { } - T1& get() { return m_t1; } -}; - -//! Switch between concurrent and serial types -template struct TypeWrapper; -template -struct TypeWrapper { - typedef T2 Type; -}; -template -struct TypeWrapper { - typedef T1 Type; -}; - - -//! Matching algorithm of Ford and Fulkerson -template -struct MatchingFF { - typedef typename G::GraphNode GraphNode; - typedef typename G::NodeList NodeList; - typedef typename G::node_type node_type; - typedef typename G::edge_iterator edge_iterator; - typedef typename GraphTypes::Edge Edge; - - typedef std::vector SerialRevs; - typedef std::vector SerialReached; - - typedef std::vector::other> ParallelRevs; - typedef std::vector::other> ParallelReached; - - typedef InstanceWrapper RevsWrapper; - typedef InstanceWrapper ReachedWrapper; - - typedef std::deque::other> Queue; - typedef std::vector::other> Preds; - - static const Galois::MethodFlag flag = Concurrent ? Galois::MethodFlag::CHECK_CONFLICT : Galois::MethodFlag::NONE; - - std::string name() { return std::string(Concurrent ? "Concurrent" : "Serial") + " Ford-Fulkerson"; } - - bool findAugmentingPath(G& g, const GraphNode& root, Galois::UserContext& ctx, - typename RevsWrapper::Type& revs, typename ReachedWrapper::Type& reached) { - Queue queue(ctx.getPerIterAlloc()); - Preds preds(ctx.getPerIterAlloc()); - - // Order matters between (1) and (2) - g.getData(root, flag).reached = true; // (1) - reached.push_back(root); // (2) - - queue.push_back(root); - - while (!queue.empty()) { - GraphNode src = queue.front(); - queue.pop_front(); - - for (edge_iterator ii = g.edge_begin(src, flag), ei = g.edge_end(src, flag); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - node_type& ddst = g.getData(dst, Galois::MethodFlag::NONE); - if (ddst.reached) - continue; - - ddst.reached = true; - reached.push_back(dst); - - ddst.pred = preds.size(); - preds.push_back(src); - - if (ddst.free) { - // Fail-safe point modulo ``reached'' which is handled separately - ddst.free = false; - GraphNode cur = dst; - while (cur != root) { - GraphNode pred = preds[g.getData(cur, Galois::MethodFlag::NONE).pred]; - revs.push_back(Edge(pred, cur)); - cur = pred; - } - return true; - } else { - assert(std::distance(g.edge_begin(dst), g.edge_end(dst)) == 1); - for (edge_iterator jj = g.edge_begin(dst, flag), ej = g.edge_end(dst, flag); jj != ej; ++jj) { - GraphNode cur = g.getEdgeDst(jj); - - g.getData(cur, Galois::MethodFlag::NONE).pred = preds.size(); - preds.push_back(dst); - - g.getData(cur, Galois::MethodFlag::NONE).reached = true; - reached.push_back(cur); - - queue.push_back(cur); - } - } - } - } - return false; - } - - //! Makes sure that ``reached'' to properly reset even if we get aborted - struct ReachedCleanup: public Galois::Runtime::Releasable { - G& g; - typename ReachedWrapper::Type& reached; - - ReachedCleanup(G& _g, typename ReachedWrapper::Type& r): g(_g), reached(r) { } - - ~ReachedCleanup() { - cleanup(); - } - - virtual void release() { cleanup(); } - - void cleanup() { - // In non-concurrent case, we can continue reusing reached - if (Concurrent) - clear(); - } - - void clear() { - for (typename ReachedWrapper::Type::iterator ii = reached.begin(), ei = reached.end(); ii != ei; ++ii) { - assert(g.getData(*ii, Galois::MethodFlag::NONE).reached); - g.getData(*ii, Galois::MethodFlag::NONE).reached = false; - } - reached.clear(); - } - }; - - void operator()(G& g, const GraphNode& src, Galois::UserContext& ctx, - typename RevsWrapper::Type& revs, typename ReachedWrapper::Type& reached) { - - ReachedCleanup cleanup(g, reached); - - if (findAugmentingPath(g, src, ctx, revs, reached)) { - g.getData(src, Galois::MethodFlag::NONE).free = false; - - // Reverse edges in augmenting path - for (typename RevsWrapper::Type::iterator jj = revs.begin(), ej = revs.end(); jj != ej; ++jj) { - g.removeEdge(jj->first, g.findEdge(jj->first, jj->second, Galois::MethodFlag::NONE), Galois::MethodFlag::NONE); - g.addEdge(jj->second, jj->first, Galois::MethodFlag::NONE); - } - revs.clear(); - - cleanup.clear(); - } - } - - //! Main entry point for Galois::for_each - struct Process { - typedef int tt_needs_per_iter_alloc; - MatchingFF& parent; - G& g; - SerialRevs& serialRevs; - SerialReached& serialReached; - - Process(MatchingFF& _parent, G& _g, SerialRevs& revs, SerialReached& reached): - parent(_parent), g(_g), serialRevs(revs), serialReached(reached) { } - - void operator()(const GraphNode& node, Galois::UserContext& ctx) { - if (!g.getData(node, flag).free) - return; - - ParallelRevs parallelRevs(ctx.getPerIterAlloc()); - ParallelReached parallelReached(ctx.getPerIterAlloc()); - - parent(g, node, ctx, - RevsWrapper(serialRevs, parallelRevs).get(), - ReachedWrapper(serialReached, parallelReached).get()); - } - }; - - void operator()(G& g) { - SerialRevs revs; - SerialReached reached; - - Galois::setActiveThreads(Concurrent ? numThreads : 1); - Galois::for_each(g.A.begin(), g.A.end(), Process(*this, g, revs, reached)); - } -}; - - -//********************** ABMP Algorithm ************************** - -struct ABMPNode: public FFNode { - unsigned layer; - int next; - ABMPNode(): FFNode() { } - ABMPNode(size_t i): FFNode(i), layer(0), next(0) { } -}; - -//! Matching algorithm of Alt, Blum, Mehlhorn and Paul -template -struct MatchingABMP { - typedef typename G::NodeList NodeList; - typedef typename G::GraphNode GraphNode; - typedef typename G::edge_iterator edge_iterator; - typedef typename G::node_type node_type; - typedef typename GraphTypes::Edge Edge; - typedef std::vector::other> Revs; - typedef std::pair WorkItem; - - static const Galois::MethodFlag flag = Concurrent ? Galois::MethodFlag::CHECK_CONFLICT : Galois::MethodFlag::NONE; - - struct Indexer: public std::unary_function { - unsigned operator()(const WorkItem& n) const { - return n.second; - } - }; - - struct Less: public std::binary_function { - bool operator()(const WorkItem& n1, const WorkItem& n2) const { - if (n1.second < n2.second) return true; - if (n1.second > n2.second) return false; - return n1.first < n2.first; - } - }; - - struct Greater: public std::binary_function { - bool operator()(const WorkItem& n1, const WorkItem& n2) const { - if (n1.second > n2.second) return true; - if (n1.second < n2.second) return false; - return n1.first > n2.first; - } - }; - - std::string name() { - return std::string(Concurrent ? "Concurrent" : "Serial") + " Alt-Blum-Mehlhorn-Paul"; - } - - bool nextEdge(G& g, const GraphNode& src, GraphNode& next) { - node_type& dsrc = g.getData(src, Galois::MethodFlag::NONE); - unsigned l = dsrc.layer - 1; - - // Start search where we last left off - edge_iterator ii = g.edge_begin(src, flag); - std::advance(ii, dsrc.next); - edge_iterator ei = g.edge_end(src, flag); - for (; ii != ei && g.getData(g.getEdgeDst(ii), Galois::MethodFlag::NONE).layer != l; - ++ii, ++dsrc.next) { - ; - } - - if (ii == ei) { - return false; - } else { - next = g.getEdgeDst(ii); - return true; - } - } - - //! Returns true if we've added a new element - bool operator()(G& g, const GraphNode& root, Galois::UserContext& ctx) { - Revs revs(ctx.getPerIterAlloc()); - - GraphNode cur = root; - - while (true) { - GraphNode next; - if (g.getData(cur, Galois::MethodFlag::NONE).free && g.getData(cur, Galois::MethodFlag::NONE).layer == 0) { - assert(g.getData(root, Galois::MethodFlag::NONE).free); - // (1) Breakthrough - g.getData(cur, Galois::MethodFlag::NONE).free = g.getData(root, Galois::MethodFlag::NONE).free = false; - - // Reverse edges in augmenting path - for (typename Revs::iterator ii = revs.begin(), ei = revs.end(); ii != ei; ++ii) { - g.removeEdge(ii->first, g.findEdge(ii->first, ii->second, Galois::MethodFlag::NONE), Galois::MethodFlag::NONE); - g.addEdge(ii->second, ii->first, Galois::MethodFlag::NONE); - } - //revs.clear(); - if (revs.size() > 1024) { - std::cout << "WARNING: allocating large amounts in parallel: " - << revs.size() << "elements\n"; - } - return false; - } else if (nextEdge(g, cur, next)) { - // (2) Advance - revs.push_back(Edge(cur, next)); - cur = next; - } else { - // (3) Retreat - unsigned& layer = g.getData(cur, Galois::MethodFlag::NONE).layer; - layer += 2; - g.getData(cur, Galois::MethodFlag::NONE).next = 0; - if (revs.empty()) { - ctx.push(std::make_pair(cur, layer)); - return true; - } - cur = revs.back().first; - revs.pop_back(); - } - } - } - - struct Process { - typedef int tt_needs_parallel_break; - typedef int tt_needs_per_iter_alloc; - MatchingABMP& parent; - G& g; - unsigned& maxLayer; - size_t& size; - - Process(MatchingABMP& p, G& _g, unsigned& m, size_t& s): - parent(p), g(_g), maxLayer(m), size(s) { } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) { - unsigned curLayer = item.second; - if (curLayer > maxLayer) { - std::cout << "Reached max layer: " << curLayer << "\n"; - ctx.breakLoop(); - return; - } - //if (size <= 50 * curLayer) { - // std::cout << "Reached min size: " << size << "\n"; - // ctx.breakLoop(); - //} - if (!parent(g, item.first, ctx)) { - //__sync_fetch_and_add(&size, -1); - } - } - }; - - void operator()(G& g) { - Galois::StatTimer t("serial"); - t.start(); - std::vector initial; - for (typename NodeList::iterator ii = g.A.begin(), ei = g.A.end(); ii != ei; ++ii) { - g.getData(*ii).layer = 1; - if (g.getData(*ii).free) - initial.push_back(std::make_pair(*ii, 1)); - } - t.stop(); - - unsigned maxLayer = (unsigned) (0.1*sqrt(g.size())); - size_t size = initial.size(); - Galois::setActiveThreads(Concurrent ? numThreads : 1); - - using namespace Galois::WorkList; - - typedef ChunkedFIFO<1024> Chunk; - typedef dChunkedFIFO<1024> dChunk; - typedef OrderedByIntegerMetric OBIM; - -#ifdef GALOIS_USE_EXP - Exp::PriAuto<1024,Indexer,OBIM,Less,Greater>::for_each( - initial.begin(), initial.end(), Process(*this, g, maxLayer, size)); -#else - Galois::for_each(initial.begin(), initial.end(), Process(*this, g, maxLayer, size), Galois::wl()); -#endif - - t.start(); - MatchingFF algo; - std::cout << "Switching to " << algo.name() << "\n"; - algo(g); - t.stop(); - } -}; - -// *************************** MaxFlow Algorithm ******************************* -struct MFNode: public BaseNode { - size_t excess; - unsigned height; - int current; - MFNode(): BaseNode() { } - MFNode(size_t i): BaseNode(i), excess(0), height(1), current(0) { } -}; - -struct MFEdge { - int cap; - MFEdge(): cap(1) { } - MFEdge(int c): cap(c) { } -}; - - -//! Matching via reduction to maxflow -template -struct MatchingMF { - typedef typename G::NodeList NodeList; - typedef typename G::GraphNode GraphNode; - typedef typename G::edge_iterator edge_iterator; - typedef typename G::iterator iterator; - typedef typename G::node_type node_type; - typedef typename G::edge_type edge_type; - static const Galois::MethodFlag flag = Concurrent ? Galois::MethodFlag::CHECK_CONFLICT : Galois::MethodFlag::NONE; - /** - * Beta parameter the original Goldberg algorithm to control when global - * relabeling occurs. For comparison purposes, we keep them the same as - * before, but it is possible to achieve much better performance by adjusting - * the global relabel frequency. - */ - static const int BETA = 12; - /** - * Alpha parameter the original Goldberg algorithm to control when global - * relabeling occurs. For comparison purposes, we keep them the same as - * before, but it is possible to achieve much better performance by adjusting - * the global relabel frequency. - */ - static const int ALPHA = 6; - - std::string name() { return std::string(Concurrent ? "Concurrent" : "Serial") + " Max Flow"; } - - void reduceCapacity(edge_type& edge1, edge_type& edge2, int amount) { - edge1.cap -= amount; - edge2.cap += amount; - } - - bool discharge(G& g, const GraphNode& src, Galois::UserContext& ctx, - const GraphNode& source, const GraphNode& sink, unsigned numNodes) { - node_type& node = g.getData(src, flag); - //unsigned prevHeight = node.height; - bool relabeled = false; - - if (node.excess == 0) { - return false; - } - - while (true) { - Galois::MethodFlag f = relabeled ? Galois::MethodFlag::NONE : flag; - bool finished = false; - int current = 0; - - for (edge_iterator ii = g.edge_begin(src, f), ei = g.edge_end(src, f); ii != ei; ++ii, ++current) { - GraphNode dst = g.getEdgeDst(ii); - edge_type& edge = g.getEdgeData(ii); - if (edge.cap == 0 || current < node.current) - continue; - - node_type& dnode = g.getData(dst, Galois::MethodFlag::NONE); - if (node.height - 1 != dnode.height) - continue; - - // Push flow - int amount = std::min(static_cast(node.excess), edge.cap); - reduceCapacity(edge, g.getEdgeData(g.findEdge(dst, src, Galois::MethodFlag::NONE)), amount); - - // Only add once - if (dst != sink && dst != source && dnode.excess == 0) - ctx.push(dst); - - node.excess -= amount; - dnode.excess += amount; - - if (node.excess == 0) { - finished = true; - node.current = current; - break; - } - } - - if (finished) - break; - - relabel(g, src, numNodes); - relabeled = true; - - //prevHeight = node.height; - } - - return relabeled; - } - - void relabel(G& g, const GraphNode& src, unsigned numNodes) { - unsigned minHeight = std::numeric_limits::max(); - int minEdge; - - int current = 0; - for (edge_iterator ii = g.edge_begin(src, Galois::MethodFlag::NONE), ei = g.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii, ++current) { - GraphNode dst = g.getEdgeDst(ii); - int cap = g.getEdgeData(ii).cap; - if (cap > 0) { - node_type& dnode = g.getData(dst, Galois::MethodFlag::NONE); - if (dnode.height < minHeight) { - minHeight = dnode.height; - minEdge = current; - } - } - } - - assert(minHeight != std::numeric_limits::max()); - ++minHeight; - - node_type& node = g.getData(src, Galois::MethodFlag::NONE); - node.height = minHeight; - node.current = minEdge; - } - - struct Process { - typedef int tt_needs_parallel_break; - - MatchingMF& parent; - G& g; - const GraphNode& source; - const GraphNode& sink; - unsigned numNodes; - unsigned globalRelabelInterval; - bool& shouldGlobalRelabel; - unsigned counter; - - Process(MatchingMF& p, - G& _g, - const GraphNode& _source, - const GraphNode& _sink, - unsigned _numNodes, - unsigned i, - bool& s): - parent(p), g(_g), source(_source), sink(_sink), numNodes(_numNodes), - globalRelabelInterval(i), shouldGlobalRelabel(s), counter(0) { } - - void operator()(const GraphNode& src, Galois::UserContext& ctx) { - int increment = 1; - if (parent.discharge(g, src, ctx, source, sink, numNodes)) { - increment += BETA; - } - - counter += increment; - if (globalRelabelInterval && counter >= globalRelabelInterval) { - shouldGlobalRelabel = true; - ctx.breakLoop(); - return; - } - } - }; - - template - struct UpdateHeights { - typedef int tt_does_not_need_stats; - G& g; - - UpdateHeights(G& _g): g(_g) { } - //! Do reverse BFS on residual graph. - void operator()(const GraphNode& src, Galois::UserContext& ctx) { - for (edge_iterator - ii = g.edge_begin(src, useCAS ? Galois::MethodFlag::NONE : flag), - ei = g.edge_end(src, useCAS ? Galois::MethodFlag::NONE : flag); - ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - if (g.getEdgeData(g.findEdge(dst, src, Galois::MethodFlag::NONE)).cap > 0) { - node_type& node = g.getData(dst, Galois::MethodFlag::NONE); - unsigned newHeight = g.getData(src, Galois::MethodFlag::NONE).height + 1; - if (useCAS) { - unsigned oldHeight; - while (newHeight < (oldHeight = node.height)) { - if (__sync_bool_compare_and_swap(&node.height, oldHeight, newHeight)) { - ctx.push(dst); - break; - } - } - } else { - if (newHeight < node.height) { - node.height = newHeight; - ctx.push(dst); - } - } - } - } - } - }; - - void globalRelabel(G& g, const GraphNode& source, const GraphNode& sink, unsigned numNodes, - std::vector& incoming) { - - for (iterator ii = g.begin(), ei = g.end(); ii != ei; ++ii) { - GraphNode src = *ii; - node_type& node = g.getData(src, Galois::MethodFlag::NONE); - node.height = numNodes; - node.current = 0; - if (src == sink) - node.height = 0; - } - - Galois::StatTimer T("BfsTime"); - T.start(); - Galois::for_each(sink, UpdateHeights(g)); - T.stop(); - - for (iterator ii = g.begin(), ei = g.end(); ii != ei; ++ii) { - GraphNode src = *ii; - node_type& node = g.getData(src, Galois::MethodFlag::NONE); - if (src == sink || src == source) - continue; - if (node.excess > 0) - incoming.push_back(src); - } - } - - void initializePreflow(G& g, const GraphNode& source, std::vector& initial) { - for (edge_iterator ii = g.edge_begin(source), ei = g.edge_end(source); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - edge_type& edge = g.getEdgeData(ii); - int cap = edge.cap; - if (cap > 0) - initial.push_back(dst); - reduceCapacity(edge, g.getEdgeData(g.findEdge(dst, source)), cap); - g.getData(dst).excess += cap; - } - } - - //! Adds reverse edges, - void initializeGraph(G& g, GraphNode& source, GraphNode& sink, unsigned& numNodes, - unsigned& interval) { - size_t numEdges = 0; - - numNodes = g.size(); - source = g.createNode(node_type(numNodes++)); - sink = g.createNode(node_type(numNodes++)); - g.getData(source).height = numNodes; - g.addNode(source); - g.addNode(sink); - - // Add reverse edge - for (typename NodeList::iterator src = g.A.begin(), esrc = g.A.end(); src != esrc; ++src) { - for (edge_iterator ii = g.edge_begin(*src), ei = g.edge_end(*src); - ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - g.getEdgeData(g.addMultiEdge(dst, *src, Galois::MethodFlag::ALL)) = edge_type(0); - ++numEdges; - } - } - - // Add edge from source to each node in A - for (typename NodeList::iterator src = g.A.begin(), esrc = g.A.end(); src != esrc; ++src) { - g.getEdgeData(g.addMultiEdge(source, *src, Galois::MethodFlag::ALL)) = edge_type(); - g.getEdgeData(g.addMultiEdge(*src, source, Galois::MethodFlag::ALL)) = edge_type(0); - ++numEdges; - } - - // Add edge to sink from each node in B - for (typename NodeList::iterator src = g.B.begin(), esrc = g.B.end(); src != esrc; ++src) { - g.getEdgeData(g.addMultiEdge(*src, sink, Galois::MethodFlag::ALL)) = edge_type(); - g.getEdgeData(g.addMultiEdge(sink, *src, Galois::MethodFlag::ALL)) = edge_type(0); - ++numEdges; - } - - interval = numNodes * ALPHA + numEdges; - } - - //! Extract matching from saturated edges - void extractMatching(G& g) { - for (typename NodeList::iterator src = g.A.begin(), esrc = g.A.end(); src != esrc; ++src) { - for (edge_iterator ii = g.edge_begin(*src), ei = g.edge_end(*src); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - if (g.getEdgeData(ii).cap == 0) { - g.getData(*src).free = g.getData(dst).free = false; - } - } - } - } - - void operator()(G& g) { - Galois::StatTimer t("serial"); - - t.start(); - GraphNode source; - GraphNode sink; - unsigned numNodes; - unsigned interval; - initializeGraph(g, source, sink, numNodes, interval); - - std::vector initial; - initializePreflow(g, source, initial); - t.stop(); - - bool shouldGlobalRelabel = false; - Galois::setActiveThreads(Concurrent ? numThreads : 1); - while (!initial.empty()) { - Galois::for_each(initial.begin(), initial.end(), - Process(*this, g, source, sink, numNodes, interval, shouldGlobalRelabel)); - - if (!shouldGlobalRelabel) - break; - - t.start(); - std::cout << "Starting global relabel, current excess at sink " - << g.getData(sink).excess << "\n"; - initial.clear(); - globalRelabel(g, source, sink, numNodes, initial); - shouldGlobalRelabel = false; - t.stop(); - } - - t.start(); - std::cout << "Final excess at sink " << g.getData(sink).excess << "\n"; - g.removeNode(sink); - g.removeNode(source); - extractMatching(g); - t.stop(); - } -}; - -template -struct Exists { - typedef typename G::edge_iterator edge_iterator; - - bool operator()(G& g, const edge_iterator& ii) { - //assert(g.getEdgeData(src, dst).cap + g.getEdgeData(dst, src).cap == 1); - //assert(g.getEdgeData(src, dst).cap != g.getEdgeData(dst, src).cap); - return g.getEdgeData(ii).cap == 1; - } -}; - -// ******************* Verification *************************** - -template -struct Verifier { - typedef typename G::GraphNode GraphNode; - typedef typename G::node_type node_type; - typedef typename G::edge_iterator edge_iterator; - typedef typename G::NodeList NodeList; - typedef typename GraphTypes::Matching Matching; - - bool hasCoveredNeighbors(G& g, const GraphNode& src) { - for (edge_iterator ii = g.edge_begin(src), ei = g.edge_end(src); ii != ei; ++ii) { - GraphNode dst = g.getEdgeDst(ii); - if (!g.getData(dst).covered) - return false; - } - return true; - } - - void check(G& g, typename NodeList::iterator ii, typename NodeList::iterator ei, - size_t& count, bool& retval) { - for (; ii != ei; ++ii) { - node_type& dii = g.getData(*ii); - if (dii.degree > 1) { - std::cerr << "Error: not a matching, node " << dii.id << " incident to " << dii.degree << " edges\n"; - retval = false; - } - - if (dii.covered) { - count++; - } - - if (dii.covered || hasCoveredNeighbors(g, *ii)) { - // Good - } else { - std::cerr << "Error: not a node cover, node " << dii.id - << " with degree " << dii.degree << " not covered nor incident to covered node\n"; - retval = false; - } - } - } - - bool operator()(G& g, const Matching& matching) { - for (typename Matching::const_iterator ii = matching.begin(), - ei = matching.end(); ii != ei; ++ii) { - g.getData(ii->first).degree++; - g.getData(ii->second).degree++; - } - - bool retval = true; - size_t count = 0; - check(g, g.A.begin(), g.A.end(), count, retval); - check(g, g.B.begin(), g.B.end(), count, retval); - - if (count != matching.size()) { - std::cerr << "Error: matching is different than node cover " << matching.size() << " vs " << count << "\n"; - retval = false; - } - - return retval; - } -}; - - -static double nextRand() { - return rand() / (double) RAND_MAX; -} - -/** - * Generate a random bipartite graph as used in LEDA evaluation and - * refererenced in [CGM+97]. Nodes are divided into numGroups groups of size - * numA/numGroups each. Each node in A has degree d = numEdges/numA and the - * edges out of a node in group i of A go to random nodes in groups i+1 and - * i-1 of B. If numGroups == 0, just randomly assign nodes of A to nodes of - * B. - */ -template -void generateInput(int numA, int numB, int numEdges, int numGroups, G* g) { - typedef typename G::node_type node_type; - - assert(numA > 0 && numB > 0); - - size_t id = 0; - - for (int i = 0; i < numA; ++i) - g->addNode(g->createNode(node_type(id++)), true); - for (int i = 0; i < numB; ++i) - g->addNode(g->createNode(node_type(id++)), false); - - int d = numEdges/numA; - if (numGroups > numA) - numGroups = numA; - if (numGroups > numB) - numGroups = numB; - - int count = 0; - if (numGroups > 0) { - int aSize = numA/numGroups; - int bSize = numB/numGroups; - - for (typename G::NodeList::iterator ii = g->A.begin(), ei = g->A.end(); - ii != ei; ++ii, ++count) { - int group = count/aSize; - if (group == numGroups) - break; - int base1 = group == 0 ? (numGroups-1)*bSize : (group-1)*bSize; - int base2 = group == numGroups-1 ? 0 : (group+1)*bSize; - for (int i = 0; i < d; ++i) { - int b = nextRand() < 0.5 ? base1 : base2; - int off = (int)(nextRand() * (bSize-1)); - g->addEdge(*ii, g->B[b+off]); - } - } - } - - int r = numEdges - count*d; - while (r--) { - int ind_a = (int)(nextRand()*(numA-1)); - int ind_b = (int)(nextRand()*(numB-1)); - g->addEdge(g->A[ind_a], g->B[ind_b]); - } -} - - -template class Algo, typename G, bool Concurrent> -void start(int N, int numEdges, int numGroups) { - typedef Algo A; - - G g; - generateInput(N, N, numEdges, numGroups, &g); - - A algo; - std::cout << "Starting " << algo.name() << "\n"; - - Galois::StatTimer t; - t.start(); - algo(g); - t.stop(); - - if (!skipVerify) { - typename GraphTypes::Matching matching; - PrepareForVerifier()(g, &matching); - if (!Verifier()(g, matching)) { - std::cerr << "Verification failed.\n"; - //assert(0 && "Verification failed"); - //abort(); - } else { - std::cout << "Verification succeeded.\n"; - } - std::cout << "Algorithm produced matching of cardinality: " << matching.size() << "\n"; - } -} - - -template -void start() { - switch (algo) { - case pfpAlgo: - start, Concurrent>(N, numEdges, numGroups); break; - case ffAlgo: - start, Concurrent>(N, numEdges, numGroups); break; - default: - case abmpAlgo: - start, Concurrent>(N, numEdges, numGroups); break; - } -} - -int main(int argc, char** argv) { - Galois::StatManager M; - LonestarStart(argc, argv, name, desc, url); - - std::cout << "N: " << N - << " numEdges: " << numEdges - << " numGroups: " << numGroups - << " seed: " << seed << "\n"; - - srand(seed); - - switch (executionType) { - case serial: start(); break; - default: - case parallel: start(); break; - } - - return 0; -} diff --git a/maxflow/galois/apps/pagerank/CMakeLists.txt b/maxflow/galois/apps/pagerank/CMakeLists.txt deleted file mode 100644 index 4bc9cfd..0000000 --- a/maxflow/galois/apps/pagerank/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -if(USE_EXP) - include_directories(../../exp/apps/pagerank .) -endif() -app(pagerank PageRank.cpp) diff --git a/maxflow/galois/apps/pagerank/PageRank.cpp b/maxflow/galois/apps/pagerank/PageRank.cpp deleted file mode 100644 index 991e145..0000000 --- a/maxflow/galois/apps/pagerank/PageRank.cpp +++ /dev/null @@ -1,430 +0,0 @@ -/** Page rank application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#include "Galois/config.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/TypeTraits.h" -#include "Lonestar/BoilerPlate.h" - -#include GALOIS_CXX11_STD_HEADER(atomic) -#include -#include -#include -#include -#include - -#include "PageRank.h" -#ifdef GALOIS_USE_EXP -#include "GraphLabAlgo.h" -#include "LigraAlgo.h" -#endif - -namespace cll = llvm::cl; - -static const char* name = "Page Rank"; -static const char* desc = "Computes page ranks a la Page and Brin"; -static const char* url = 0; - -enum Algo { - graphlab, - graphlabAsync, - ligra, - ligraChi, - pull, - serial -}; - -cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric")); -static cll::opt outputPullFilename("outputPull", cll::desc("Precompute data for Pull algorithm to file")); -cll::opt maxIterations("maxIterations", cll::desc("Maximum iterations"), cll::init(100)); -cll::opt memoryLimit("memoryLimit", - cll::desc("Memory limit for out-of-core algorithms (in MB)"), cll::init(~0U)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::pull, "pull", "Use precomputed data perform pull-based algorithm"), - clEnumValN(Algo::serial, "serial", "Compute PageRank in serial"), -#ifdef GALOIS_USE_EXP - clEnumValN(Algo::graphlab, "graphlab", "Use GraphLab programming model"), - clEnumValN(Algo::graphlabAsync, "graphlabAsync", "Use GraphLab-Asynchronous programming model"), - clEnumValN(Algo::ligra, "ligra", "Use Ligra programming model"), - clEnumValN(Algo::ligraChi, "ligraChi", "Use Ligra and GraphChi programming model"), -#endif - clEnumValEnd), cll::init(Algo::pull)); - -struct SerialAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "Serial"; } - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - g.getData(n).value = 1.0; - g.getData(n).accum.write(0.0); - } - }; - - void operator()(Graph& graph) { - unsigned int iteration = 0; - unsigned int numNodes = graph.size(); - - while (true) { - float max_delta = std::numeric_limits::min(); - unsigned int small_delta = 0; - - for (auto ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - GNode src = *ii; - PNode& sdata = graph.getData(src); - int neighbors = std::distance(graph.edge_begin(src), graph.edge_end(src)); - for (auto jj = graph.edge_begin(src), ej = graph.edge_end(src); jj != ej; ++jj) { - GNode dst = graph.getEdgeDst(jj); - PNode& ddata = graph.getData(dst); - float delta = sdata.value / neighbors; - ddata.accum.write(ddata.accum.read() + delta); - } - } - - for (auto ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - GNode src = *ii; - PNode& sdata = graph.getData(src, Galois::MethodFlag::NONE); - float value = (1.0 - alpha) * sdata.accum.read() + alpha; - float diff = std::fabs(value - sdata.value); - if (diff <= tolerance) - ++small_delta; - if (diff > max_delta) - max_delta = diff; - sdata.value = value; - sdata.accum.write(0); - } - - iteration += 1; - - std::cout << "iteration: " << iteration - << " max delta: " << max_delta - << " small delta: " << small_delta - << " (" << small_delta / (float) numNodes << ")" - << "\n"; - - if (max_delta <= tolerance || iteration >= maxIterations) - break; - } - - if (iteration >= maxIterations) { - std::cout << "Failed to converge\n"; - } - } -}; - -struct PullAlgo { - struct LNode { - float value[2]; - - float getPageRank() { return value[1]; } - float getPageRank(unsigned int it) { return value[it & 1]; } - void setPageRank(unsigned it, float v) { value[(it+1) & 1] = v; } - }; - typedef Galois::Graph::LC_InlineEdge_Graph - ::with_compressed_node_ptr::type - ::with_no_lockable::type - ::with_numa_alloc::type - Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "Pull"; } - - Galois::GReduceMax max_delta; - Galois::GAccumulator small_delta; - - void readGraph(Graph& graph) { - if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, transposeGraphName); - } else { - std::cerr << "Need to pass precomputed graph through -graphTranspose option\n"; - abort(); - } - } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - LNode& data = g.getData(n, Galois::MethodFlag::NONE); - data.value[0] = 1.0; - data.value[1] = 1.0; - } - }; - - struct Copy { - Graph& g; - Copy(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - LNode& data = g.getData(n, Galois::MethodFlag::NONE); - data.value[1] = data.value[0]; - } - }; - - struct Process { - PullAlgo* self; - Graph& graph; - unsigned int iteration; - - Process(PullAlgo* s, Graph& g, unsigned int i): self(s), graph(g), iteration(i) { } - - void operator()(const GNode& src, Galois::UserContext& ctx) { - (*this)(src); - } - - void operator()(const GNode& src) { - LNode& sdata = graph.getData(src, Galois::MethodFlag::NONE); - double sum = 0; - - for (auto jj = graph.edge_begin(src, Galois::MethodFlag::NONE), ej = graph.edge_end(src, Galois::MethodFlag::NONE); jj != ej; ++jj) { - GNode dst = graph.getEdgeDst(jj); - float w = graph.getEdgeData(jj); - - LNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - sum += ddata.getPageRank(iteration) * w; - } - - float value = sum * (1.0 - alpha) + alpha; - float diff = std::fabs(value - sdata.getPageRank(iteration)); - - if (diff <= tolerance) - self->small_delta += 1; - self->max_delta.update(diff); - sdata.setPageRank(iteration, value); - } - }; - - void operator()(Graph& graph) { - unsigned int iteration = 0; - - while (true) { - Galois::for_each_local(graph, Process(this, graph, iteration)); - iteration += 1; - - float delta = max_delta.reduce(); - size_t sdelta = small_delta.reduce(); - - std::cout << "iteration: " << iteration - << " max delta: " << delta - << " small delta: " << sdelta - << " (" << sdelta / (float) graph.size() << ")" - << "\n"; - - if (delta <= tolerance || iteration >= maxIterations) - break; - max_delta.reset(); - small_delta.reset(); - } - - if (iteration >= maxIterations) { - std::cout << "Failed to converge\n"; - } - - if (iteration & 1) { - // Result already in right place - } else { - Galois::do_all_local(graph, Copy(graph)); - } - } -}; - -//! Transpose in-edges to out-edges -static void precomputePullData() { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type InputGraph; - typedef InputGraph::GraphNode InputNode; - typedef Galois::Graph::FileGraphWriter OutputGraph; - //typedef OutputGraph::GraphNode OutputNode; - - InputGraph input; - OutputGraph output; - Galois::Graph::readGraph(input, filename); - - size_t node_id = 0; - for (auto ii = input.begin(), ei = input.end(); ii != ei; ++ii) { - InputNode src = *ii; - input.getData(src) = node_id++; - } - - output.setNumNodes(input.size()); - output.setNumEdges(input.sizeEdges()); - output.setSizeofEdgeData(sizeof(float)); - output.phase1(); - - for (auto ii = input.begin(), ei = input.end(); ii != ei; ++ii) { - InputNode src = *ii; - size_t sid = input.getData(src); - assert(sid < input.size()); - - //size_t num_neighbors = std::distance(input.edge_begin(src), input.edge_end(src)); - - for (auto jj = input.edge_begin(src), ej = input.edge_end(src); jj != ej; ++jj) { - InputNode dst = input.getEdgeDst(jj); - size_t did = input.getData(dst); - assert(did < input.size()); - - output.incrementDegree(did); - } - } - - output.phase2(); - std::vector edgeData; - edgeData.resize(input.sizeEdges()); - - for (auto ii = input.begin(), ei = input.end(); ii != ei; ++ii) { - InputNode src = *ii; - size_t sid = input.getData(src); - assert(sid < input.size()); - - size_t num_neighbors = std::distance(input.edge_begin(src), input.edge_end(src)); - - float w = 1.0/num_neighbors; - for (auto jj = input.edge_begin(src), ej = input.edge_end(src); jj != ej; ++jj) { - InputNode dst = input.getEdgeDst(jj); - size_t did = input.getData(dst); - assert(did < input.size()); - - size_t idx = output.addNeighbor(did, sid); - edgeData[idx] = w; - } - } - - float* t = output.finish(); - memcpy(t, &edgeData[0], sizeof(edgeData[0]) * edgeData.size()); - - output.structureToFile(outputPullFilename); - std::cout << "Wrote " << outputPullFilename << "\n"; -} - -//! Make values unique -template -struct TopPair { - float value; - GNode id; - - TopPair(float v, GNode i): value(v), id(i) { } - - bool operator<(const TopPair& b) const { - if (value == b.value) - return id > b.id; - return value < b.value; - } -}; - -template -static void printTop(Graph& graph, int topn) { - typedef typename Graph::GraphNode GNode; - typedef typename Graph::node_data_reference node_data_reference; - typedef TopPair Pair; - typedef std::map Top; - - Top top; - - for (auto ii = graph.begin(), ei = graph.end(); ii != ei; ++ii) { - GNode src = *ii; - node_data_reference n = graph.getData(src); - float value = n.getPageRank(); - Pair key(value, src); - - if ((int) top.size() < topn) { - top.insert(std::make_pair(key, src)); - continue; - } - - if (top.begin()->first < key) { - top.erase(top.begin()); - top.insert(std::make_pair(key, src)); - } - } - - int rank = 1; - std::cout << "Rank PageRank Id\n"; - for (typename Top::reverse_iterator ii = top.rbegin(), ei = top.rend(); ii != ei; ++ii, ++rank) { - std::cout << rank << ": " << ii->first.value << " " << ii->first.id << "\n"; - } -} - -template -void run() { - typedef typename Algo::Graph Graph; - - Algo algo; - Graph graph; - - algo.readGraph(graph); - - Galois::preAlloc(numThreads + (graph.size() * sizeof(typename Graph::node_data_type)) / Galois::Runtime::MM::pageSize); - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T; - std::cout << "Running " << algo.name() << " version\n"; - std::cout << "Target max delta: " << tolerance << "\n"; - T.start(); - Galois::do_all_local(graph, typename Algo::Initialize(graph)); - algo(graph); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify) - printTop(graph, 10); -} - -int main(int argc, char **argv) { - LonestarStart(argc, argv, name, desc, url); - Galois::StatManager statManager; - - if (outputPullFilename.size()) { - precomputePullData(); - return 0; - } - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::pull: run(); break; -#ifdef GALOIS_USE_EXP - case Algo::ligra: run >(); break; - case Algo::ligraChi: run >(); break; - case Algo::graphlab: run >(); break; - case Algo::graphlabAsync: run >(); break; -#endif - case Algo::serial: run(); break; - default: std::cerr << "Unknown algorithm\n"; abort(); - } - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/pagerank/PageRank.h b/maxflow/galois/apps/pagerank/PageRank.h deleted file mode 100644 index 6a9f49f..0000000 --- a/maxflow/galois/apps/pagerank/PageRank.h +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef APPS_PAGERANK_PAGERANK_H -#define APPS_PAGERANK_PAGERANK_H - -#include "llvm/Support/CommandLine.h" - -//! d is the damping factor. Alpha is the prob that user will do a random jump, i.e., 1 - d -static const float alpha = 1.0 - 0.85; - -//! maximum relative change until we deem convergence -static const float tolerance = 0.01; - -//ICC v13.1 doesn't yet support std::atomic completely, emmulate its -//behavor with std::atomic -struct atomic_float : public std::atomic { - static_assert(sizeof(int) == sizeof(float), "int and float must be the same size"); - - float atomicIncrement(float value) { - while (true) { - union { float as_float; int as_int; } oldValue = { read() }; - union { float as_float; int as_int; } newValue = { oldValue.as_float + value }; - if (this->compare_exchange_strong(oldValue.as_int, newValue.as_int)) - return newValue.as_float; - } - } - - float read() { - union { int as_int; float as_float; } caster = { this->load(std::memory_order_relaxed) }; - return caster.as_float; - } - - void write(float v) { - union { float as_float; int as_int; } caster = { v }; - this->store(caster.as_int, std::memory_order_relaxed); - } -}; - -struct PNode { - float value; - atomic_float accum; - PNode() { } - - float getPageRank() { return value; } -}; - -extern llvm::cl::opt memoryLimit; -extern llvm::cl::opt filename; -extern llvm::cl::opt maxIterations; - -#endif diff --git a/maxflow/galois/apps/preflowpush/CMakeLists.txt b/maxflow/galois/apps/preflowpush/CMakeLists.txt deleted file mode 100644 index 98c9f4a..0000000 --- a/maxflow/galois/apps/preflowpush/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(preflowpush Preflowpush.cpp) diff --git a/maxflow/galois/apps/preflowpush/Preflowpush.cpp b/maxflow/galois/apps/preflowpush/Preflowpush.cpp deleted file mode 100644 index f966898..0000000 --- a/maxflow/galois/apps/preflowpush/Preflowpush.cpp +++ /dev/null @@ -1,790 +0,0 @@ -/** Preflow-push application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Statistic.h" -#include "Galois/Bag.h" -#include "Galois/Graph/LCGraph.h" -#include "llvm/Support/CommandLine.h" - -#ifdef GALOIS_USE_EXP -#include "Galois/PriorityScheduling.h" -#endif - -#include "Lonestar/BoilerPlate.h" - -#include -#include - -namespace cll = llvm::cl; - -const char* name = "Preflow Push"; -const char* desc = "Finds the maximum flow in a network using the preflow push technique"; -const char* url = "preflow_push"; - -enum DetAlgo { - nondet, - detBase, - detDisjoint -}; - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt sourceId(cll::Positional, cll::desc("sourceID"), cll::Required); -static cll::opt sinkId(cll::Positional, cll::desc("sinkID"), cll::Required); -static cll::opt useHLOrder("useHLOrder", cll::desc("Use HL ordering heuristic"), cll::init(false)); -static cll::opt useUnitCapacity("useUnitCapacity", cll::desc("Assume all capacities are unit"), cll::init(false)); -static cll::opt useSymmetricDirectly("useSymmetricDirectly", - cll::desc("Assume input graph is symmetric and has unit capacities"), cll::init(false)); -static cll::opt relabelInt("relabel", - cll::desc("relabel interval: < 0 no relabeling, 0 use default interval, > 0 relabel every X iterations"), cll::init(0)); -static cll::opt detAlgo(cll::desc("Deterministic algorithm:"), - cll::values( - clEnumVal(nondet, "Non-deterministic"), - clEnumVal(detBase, "Base execution"), - clEnumVal(detDisjoint, "Disjoint execution"), - clEnumValEnd), cll::init(nondet)); - -/** - * Alpha parameter the original Goldberg algorithm to control when global - * relabeling occurs. For comparison purposes, we keep them the same as - * before, but it is possible to achieve much better performance by adjusting - * the global relabel frequency. - */ -const int ALPHA = 6; - -/** - * Beta parameter the original Goldberg algorithm to control when global - * relabeling occurs. For comparison purposes, we keep them the same as - * before, but it is possible to achieve much better performance by adjusting - * the global relabel frequency. - */ -const int BETA = 12; - -struct Node { - uint32_t id; - size_t excess; - int height; - int current; - - Node() : excess(0), height(1), current(0) { } -}; - -std::ostream& operator<<(std::ostream& os, const Node& n) { - os << "(" - << "id: " << n.id - << ", excess: " << n.excess - << ", height: " << n.height - << ", current: " << n.current - << ")"; - return os; -} - -typedef Galois::Graph::LC_Linear_Graph::with_numa_alloc::type Graph; -typedef Graph::GraphNode GNode; - -struct Config { - Graph graph; - GNode sink; - GNode source; - int global_relabel_interval; - bool should_global_relabel; - Config() : should_global_relabel(false) {} -}; - -Config app; - -struct Indexer :std::unary_function { - int operator()(const GNode& n) const { - return -app.graph.getData(n, Galois::MethodFlag::NONE).height; - } -}; - -struct GLess :std::binary_function { - bool operator()(const GNode& lhs, const GNode& rhs) const { - int lv = -app.graph.getData(lhs, Galois::MethodFlag::NONE).height; - int rv = -app.graph.getData(rhs, Galois::MethodFlag::NONE).height; - return lv < rv; - } -}; -struct GGreater :std::binary_function { - bool operator()(const GNode& lhs, const GNode& rhs) const { - int lv = -app.graph.getData(lhs, Galois::MethodFlag::NONE).height; - int rv = -app.graph.getData(rhs, Galois::MethodFlag::NONE).height; - return lv > rv; - } -}; - -void checkAugmentingPath() { - // Use id field as visited flag - for (Graph::iterator ii = app.graph.begin(), - ee = app.graph.end(); ii != ee; ++ii) { - GNode src = *ii; - app.graph.getData(src).id = 0; - } - - std::deque queue; - - app.graph.getData(app.source).id = 1; - queue.push_back(app.source); - - while (!queue.empty()) { - GNode& src = queue.front(); - queue.pop_front(); - for (Graph::edge_iterator ii = app.graph.edge_begin(src), - ee = app.graph.edge_end(src); ii != ee; ++ii) { - GNode dst = app.graph.getEdgeDst(ii); - if (app.graph.getData(dst).id == 0 - && app.graph.getEdgeData(ii) > 0) { - app.graph.getData(dst).id = 1; - queue.push_back(dst); - } - } - } - - if (app.graph.getData(app.sink).id != 0) { - assert(false && "Augmenting path exisits"); - abort(); - } -} - -void checkHeights() { - for (Graph::iterator ii = app.graph.begin(), - ei = app.graph.end(); ii != ei; ++ii) { - GNode src = *ii; - int sh = app.graph.getData(src).height; - for (Graph::edge_iterator jj = app.graph.edge_begin(src), - ej = app.graph.edge_end(src); jj != ej; ++jj) { - GNode dst = app.graph.getEdgeDst(jj); - int cap = app.graph.getEdgeData(jj); - int dh = app.graph.getData(dst).height; - if (cap > 0 && sh > dh + 1) { - std::cerr << "height violated at " << app.graph.getData(src) << "\n"; - abort(); - } - } - } -} - -Graph::edge_iterator findEdge(Graph& g, GNode src, GNode dst) { - Graph::edge_iterator ii = g.edge_begin(src, Galois::MethodFlag::NONE), ei = g.edge_end(src, Galois::MethodFlag::NONE); - for (; ii != ei; ++ii) { - if (g.getEdgeDst(ii) == dst) - break; - } - return ii; -} - -void checkConservation(Config& orig) { - std::vector map; - map.resize(app.graph.size()); - - // Setup ids assuming same iteration order in both graphs - uint32_t id = 0; - for (Graph::iterator ii = app.graph.begin(), - ei = app.graph.end(); ii != ei; ++ii, ++id) { - app.graph.getData(*ii).id = id; - } - id = 0; - for (Graph::iterator ii = orig.graph.begin(), - ei = orig.graph.end(); ii != ei; ++ii, ++id) { - orig.graph.getData(*ii).id = id; - map[id] = *ii; - } - - // Now do some checking - for (Graph::iterator ii = app.graph.begin(), ei = app.graph.end(); ii != ei; ++ii) { - GNode src = *ii; - const Node& node = app.graph.getData(src); - uint32_t srcId = node.id; - - if (src == app.source || src == app.sink) - continue; - - if (node.excess != 0 && node.height != (int) app.graph.size()) { - std::cerr << "Non-zero excess at " << node << "\n"; - abort(); - } - - size_t sum = 0; - for (Graph::edge_iterator jj = app.graph.edge_begin(src), - ej = app.graph.edge_end(src); jj != ej; ++jj) { - GNode dst = app.graph.getEdgeDst(jj); - uint32_t dstId = app.graph.getData(dst).id; - int ocap = orig.graph.getEdgeData(findEdge(orig.graph, map[srcId], map[dstId])); - int delta = 0; - if (ocap > 0) - delta -= ocap - app.graph.getEdgeData(jj); - else - delta += app.graph.getEdgeData(jj); - sum += delta; - } - - if (node.excess != sum) { - std::cerr << "Not pseudoflow: " << node.excess << " != " << sum << " at " << node << "\n"; - abort(); - } - } -} - -void verify(Config& orig) { - // FIXME: doesn't fully check result - checkHeights(); - checkConservation(orig); - checkAugmentingPath(); -} - -void reduceCapacity(const Graph::edge_iterator& ii, const GNode& src, const GNode& dst, int amount) { - Graph::edge_data_type& cap1 = app.graph.getEdgeData(ii); - Graph::edge_data_type& cap2 = app.graph.getEdgeData(findEdge(app.graph, dst, src)); - cap1 -= amount; - cap2 += amount; -} - -template -struct UpdateHeights { - //typedef int tt_does_not_need_aborts; - typedef int tt_needs_per_iter_alloc; // For LocalState - - struct LocalState { - LocalState(UpdateHeights& self, Galois::PerIterAllocTy& alloc) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - //struct IdFn { - // unsigned long operator()(const GNode& item) const { - // return app.graph.getData(item, Galois::MethodFlag::NONE).id; - // } - //}; - - /** - * Do reverse BFS on residual graph. - */ - void operator()(const GNode& src, Galois::UserContext& ctx) { - if (version != nondet) { - bool used = false; - if (version == detDisjoint) { - ctx.getLocalState(used); - } - - if (!used) { - for (Graph::edge_iterator - ii = app.graph.edge_begin(src, Galois::MethodFlag::CHECK_CONFLICT), - ee = app.graph.edge_end(src, Galois::MethodFlag::CHECK_CONFLICT); - ii != ee; ++ii) { - GNode dst = app.graph.getEdgeDst(ii); - int rdata = app.graph.getEdgeData(findEdge(app.graph, dst, src)); - if (rdata > 0) { - app.graph.getData(dst, Galois::MethodFlag::CHECK_CONFLICT); - } - } - } - - if (version == detDisjoint) { - if (!used) - return; - } else { - app.graph.getData(src, Galois::MethodFlag::WRITE); - } - } - - for (Graph::edge_iterator - ii = app.graph.edge_begin(src, useCAS ? Galois::MethodFlag::NONE : Galois::MethodFlag::CHECK_CONFLICT), - ee = app.graph.edge_end(src, useCAS ? Galois::MethodFlag::NONE : Galois::MethodFlag::CHECK_CONFLICT); - ii != ee; ++ii) { - GNode dst = app.graph.getEdgeDst(ii); - int rdata = app.graph.getEdgeData(findEdge(app.graph, dst, src)); - if (rdata > 0) { - Node& node = app.graph.getData(dst, Galois::MethodFlag::NONE); - int newHeight = app.graph.getData(src, Galois::MethodFlag::NONE).height + 1; - if (useCAS) { - int oldHeight; - while (newHeight < (oldHeight = node.height)) { - if (__sync_bool_compare_and_swap(&node.height, oldHeight, newHeight)) { - ctx.push(dst); - break; - } - } - } else { - if (newHeight < node.height) { - node.height = newHeight; - ctx.push(dst); - } - } - } - } - } -}; - -struct ResetHeights { - void operator()(const GNode& src) { - Node& node = app.graph.getData(src, Galois::MethodFlag::NONE); - node.height = app.graph.size(); - node.current = 0; - if (src == app.sink) - node.height = 0; - } -}; - -template -struct FindWork { - WLTy& wl; - FindWork(WLTy& w) : wl(w) {} - - void operator()(const GNode& src) { - Node& node = app.graph.getData(src, Galois::MethodFlag::NONE); - if (src == app.sink || src == app.source || node.height >= (int) app.graph.size()) - return; - if (node.excess > 0) - wl.push_back(src); - } -}; - -template -void globalRelabel(IncomingWL& incoming) { - Galois::StatTimer T1("ResetHeightsTime"); - T1.start(); - Galois::do_all_local(app.graph, ResetHeights(), Galois::loopname("ResetHeights")); - T1.stop(); - - Galois::StatTimer T("UpdateHeightsTime"); - T.start(); - - switch (detAlgo) { - case nondet: -#ifdef GALOIS_USE_EXP - Galois::for_each(app.sink, UpdateHeights(), Galois::loopname("UpdateHeights"), Galois::wl>()); -#else - Galois::for_each(app.sink, UpdateHeights(), Galois::loopname("UpdateHeights")); -#endif - break; - case detBase: - Galois::for_each_det(app.sink, UpdateHeights(), "UpdateHeights"); - break; - case detDisjoint: - Galois::for_each_det(app.sink, UpdateHeights(), "UpdateHeights"); - break; - default: std::cerr << "Unknown algorithm" << detAlgo << "\n"; abort(); - } - T.stop(); - - Galois::StatTimer T2("FindWorkTime"); - T2.start(); - Galois::do_all_local(app.graph, FindWork(incoming), Galois::loopname("FindWork")); - T2.stop(); -} - -void acquire(const GNode& src) { - // LC Graphs have a different idea of locking - for (Graph::edge_iterator - ii = app.graph.edge_begin(src, Galois::MethodFlag::CHECK_CONFLICT), - ee = app.graph.edge_end(src, Galois::MethodFlag::CHECK_CONFLICT); - ii != ee; ++ii) { - GNode dst = app.graph.getEdgeDst(ii); - app.graph.getData(dst, Galois::MethodFlag::CHECK_CONFLICT); - } -} - -void relabel(const GNode& src) { - int minHeight = std::numeric_limits::max(); - int minEdge; - - int current = 0; - for (Graph::edge_iterator - ii = app.graph.edge_begin(src, Galois::MethodFlag::NONE), - ee = app.graph.edge_end(src, Galois::MethodFlag::NONE); - ii != ee; ++ii, ++current) { - GNode dst = app.graph.getEdgeDst(ii); - int cap = app.graph.getEdgeData(ii); - if (cap > 0) { - const Node& dnode = app.graph.getData(dst, Galois::MethodFlag::NONE); - if (dnode.height < minHeight) { - minHeight = dnode.height; - minEdge = current; - } - } - } - - assert(minHeight != std::numeric_limits::max()); - ++minHeight; - - Node& node = app.graph.getData(src, Galois::MethodFlag::NONE); - if (minHeight < (int) app.graph.size()) { - node.height = minHeight; - node.current = minEdge; - } else { - node.height = app.graph.size(); - } -} - -bool discharge(const GNode& src, Galois::UserContext& ctx) { - //Node& node = app.graph.getData(src, Galois::MethodFlag::CHECK_CONFLICT); - Node& node = app.graph.getData(src, Galois::MethodFlag::NONE); - //int prevHeight = node.height; - bool relabeled = false; - - if (node.excess == 0 || node.height >= (int) app.graph.size()) { - return false; - } - - while (true) { - //Galois::MethodFlag flag = relabeled ? Galois::MethodFlag::NONE : Galois::MethodFlag::CHECK_CONFLICT; - Galois::MethodFlag flag = Galois::MethodFlag::NONE; - bool finished = false; - int current = node.current; - Graph::edge_iterator - ii = app.graph.edge_begin(src, flag), - ee = app.graph.edge_end(src, flag); - std::advance(ii, node.current); - for (; ii != ee; ++ii, ++current) { - GNode dst = app.graph.getEdgeDst(ii); - int cap = app.graph.getEdgeData(ii); - if (cap == 0)// || current < node.current) - continue; - - Node& dnode = app.graph.getData(dst, Galois::MethodFlag::NONE); - if (node.height - 1 != dnode.height) - continue; - - // Push flow - int amount = std::min(static_cast(node.excess), cap); - reduceCapacity(ii, src, dst, amount); - - // Only add once - if (dst != app.sink && dst != app.source && dnode.excess == 0) - ctx.push(dst); - - node.excess -= amount; - dnode.excess += amount; - - if (node.excess == 0) { - finished = true; - node.current = current; - break; - } - } - - if (finished) - break; - - relabel(src); - relabeled = true; - - if (node.height == (int) app.graph.size()) - break; - - //prevHeight = node.height; - } - - return relabeled; -} - -struct Counter { - Galois::GAccumulator accum; - Galois::Runtime::PerThreadStorage local; -}; - -template -struct Process { - typedef int tt_needs_parallel_break; - typedef int tt_needs_per_iter_alloc; // For LocalState - - struct LocalState { - LocalState(Process& self, Galois::PerIterAllocTy& alloc) { } - }; - typedef LocalState GaloisDeterministicLocalState; - static_assert(Galois::has_deterministic_local_state::value, "Oops"); - - uintptr_t galoisDeterministicId(const GNode& item) const { - return app.graph.getData(item, Galois::MethodFlag::NONE).id; - } - static_assert(Galois::has_deterministic_id::value, "Oops"); - - bool galoisDeterministicParallelBreak() { - if (app.global_relabel_interval > 0 && counter.accum.reduce() >= app.global_relabel_interval) { - app.should_global_relabel = true; - return true; - } - return false; - } - static_assert(Galois::has_deterministic_parallel_break::value, "Oops"); - - Counter& counter; - - Process(Counter& c): counter(c) { } - - void operator()(GNode& src, Galois::UserContext& ctx) { - if (version != nondet) { - bool used = false; - if (version == detDisjoint) { - ctx.getLocalState(used); - } - if (!used) { - acquire(src); - } - if (version == detDisjoint) { - if (!used) - return; - } else { - app.graph.getData(src, Galois::MethodFlag::WRITE); - } - } - - int increment = 1; - if (discharge(src, ctx)) { - increment += BETA; - } - - counter.accum += increment; - } -}; - -template<> -struct Process { - typedef int tt_needs_parallel_break; - - Counter& counter; - int limit; - Process(Counter& c): counter(c) { - limit = app.global_relabel_interval / numThreads; - } - - void operator()(GNode& src, Galois::UserContext& ctx) { - int increment = 1; - acquire(src); - if (discharge(src, ctx)) { - increment += BETA; - } - - int v = *counter.local.getLocal() += increment; - if (app.global_relabel_interval > 0 && v >= limit) { - app.should_global_relabel = true; - ctx.breakLoop(); - return; - } - } -}; - -template -void writePfpGraph(const std::string& inputFile, const std::string& outputFile) { - typedef Galois::Graph::FileGraph ReaderGraph; - typedef ReaderGraph::GraphNode ReaderGNode; - - ReaderGraph reader; - reader.structureFromFile(inputFile); - - typedef Galois::Graph::FileGraphWriter Writer; - typedef Galois::LargeArray EdgeData; - typedef typename EdgeData::value_type edge_value_type; - - Writer p; - EdgeData edgeData; - - // Count edges - size_t numEdges = 0; - for (ReaderGraph::iterator ii = reader.begin(), ei = reader.end(); ii != ei; ++ii) { - ReaderGNode rsrc = *ii; - for (ReaderGraph::edge_iterator jj = reader.edge_begin(rsrc), - ej = reader.edge_end(rsrc); jj != ej; ++jj) { - ReaderGNode rdst = reader.getEdgeDst(jj); - if (rsrc == rdst) continue; - if (!reader.hasNeighbor(rdst, rsrc)) - ++numEdges; - ++numEdges; - } - } - - p.setNumNodes(reader.size()); - p.setNumEdges(numEdges); - p.setSizeofEdgeData(sizeof(edge_value_type)); - - p.phase1(); - for (ReaderGraph::iterator ii = reader.begin(), ei = reader.end(); ii != ei; ++ii) { - ReaderGNode rsrc = *ii; - for (ReaderGraph::edge_iterator jj = reader.edge_begin(rsrc), - ej = reader.edge_end(rsrc); jj != ej; ++jj) { - ReaderGNode rdst = reader.getEdgeDst(jj); - if (rsrc == rdst) continue; - if (!reader.hasNeighbor(rdst, rsrc)) - p.incrementDegree(rdst); - p.incrementDegree(rsrc); - } - } - - EdgeTy one = 1; - static_assert(sizeof(one) == sizeof(uint32_t), "Unexpected edge data size"); - one = Galois::convert_le32(one); - - p.phase2(); - edgeData.create(numEdges); - for (ReaderGraph::iterator ii = reader.begin(), ei = reader.end(); ii != ei; ++ii) { - ReaderGNode rsrc = *ii; - for (ReaderGraph::edge_iterator jj = reader.edge_begin(rsrc), - ej = reader.edge_end(rsrc); jj != ej; ++jj) { - ReaderGNode rdst = reader.getEdgeDst(jj); - if (rsrc == rdst) continue; - if (!reader.hasNeighbor(rdst, rsrc)) - edgeData.set(p.addNeighbor(rdst, rsrc), 0); - EdgeTy cap = useUnitCapacity ? one : reader.getEdgeData(jj); - edgeData.set(p.addNeighbor(rsrc, rdst), cap); - } - } - - edge_value_type* rawEdgeData = p.finish(); - std::copy(edgeData.begin(), edgeData.end(), rawEdgeData); - - p.structureToFile(outputFile); -} - -void initializeGraph(std::string inputFile, uint32_t sourceId, uint32_t sinkId, Config *newApp) { - if (useSymmetricDirectly) { - Galois::Graph::readGraph(newApp->graph, inputFile); - for (Graph::iterator ss = newApp->graph.begin(), es = newApp->graph.end(); ss != es; ++ss) { - for (Graph::edge_iterator ii = newApp->graph.edge_begin(*ss), ei = newApp->graph.edge_end(*ss); ii != ei; ++ii) - newApp->graph.getEdgeData(ii) = 1; - } - } else { - if (inputFile.find(".gr.pfp") != inputFile.size() - strlen(".gr.pfp")) { - std::string pfpName = inputFile + ".pfp"; - std::ifstream pfpFile(pfpName.c_str()); - if (!pfpFile.good()) { - writePfpGraph(inputFile, pfpName); - } - inputFile = pfpName; - } - Galois::Graph::readGraph(newApp->graph, inputFile); - -#ifdef HAVE_BIG_ENDIAN - // Convert edge data to host ordering - for (Graph::iterator ss = newApp->graph.begin(), es = newApp->graph.end(); ss != es; ++ss) { - for (Graph::edge_iterator ii = newApp->graph.edge_begin(*ss), ei = newApp->graph.edge_end(*ss); ii != ei; ++ii) { - Graph::edge_data_type& cap = newApp->graph.getEdgeData(ii); - static_assert(sizeof(cap) == sizeof(uint32_t), "Unexpected edge data size"); - cap = Galois::convert_le32(cap); - } - } -#endif - } - - Graph& g = newApp->graph; - - if (sourceId == sinkId || sourceId >= g.size() || sinkId >= g.size()) { - std::cerr << "invalid source or sink id\n"; - abort(); - } - - uint32_t id = 0; - for (Graph::iterator ii = g.begin(), ei = g.end(); ii != ei; ++ii, ++id) { - if (id == sourceId) { - newApp->source = *ii; - g.getData(newApp->source).height = g.size(); - } else if (id == sinkId) { - newApp->sink = *ii; - } - g.getData(*ii).id = id; - } -} - -template -void initializePreflow(C& initial) { - for (Graph::edge_iterator ii = app.graph.edge_begin(app.source), - ee = app.graph.edge_end(app.source); ii != ee; ++ii) { - GNode dst = app.graph.getEdgeDst(ii); - int cap = app.graph.getEdgeData(ii); - reduceCapacity(ii, app.source, dst, cap); - Node& node = app.graph.getData(dst); - node.excess += cap; - if (cap > 0) - initial.push_back(dst); - } -} - -void run() { - typedef Galois::WorkList::dChunkedFIFO<16> Chunk; - typedef Galois::WorkList::OrderedByIntegerMetric OBIM; - - Galois::InsertBag initial; - initializePreflow(initial); - - while (initial.begin() != initial.end()) { - Galois::StatTimer T_discharge("DischargeTime"); - T_discharge.start(); - Counter counter; - switch (detAlgo) { - case nondet: - if (useHLOrder) { - Galois::for_each_local(initial, Process(counter), Galois::loopname("Discharge"), Galois::wl()); - } else { - Galois::for_each_local(initial, Process(counter), Galois::loopname("Discharge")); - } - break; - case detBase: - Galois::for_each_det(initial.begin(), initial.end(), Process(counter), "Discharge"); - break; - case detDisjoint: - Galois::for_each_det(initial.begin(), initial.end(), Process(counter), "Discharge"); - break; - default: std::cerr << "Unknown algorithm" << detAlgo << "\n"; abort(); - } - T_discharge.stop(); - - if (app.should_global_relabel) { - Galois::StatTimer T_global_relabel("GlobalRelabelTime"); - T_global_relabel.start(); - initial.clear(); - globalRelabel(initial); - app.should_global_relabel = false; - T_global_relabel.stop(); - } else { - break; - } - } -} - - -int main(int argc, char** argv) { - Galois::StatManager M; - bool serial = false; - LonestarStart(argc, argv, name, desc, url); - - initializeGraph(filename, sourceId, sinkId, &app); - if (relabelInt == 0) { - app.global_relabel_interval = app.graph.size() * ALPHA + app.graph.sizeEdges() / 3; - } else { - app.global_relabel_interval = relabelInt; - } - Galois::StatTimer T; - T.start(); - run(); - T.stop(); - - std::cout << "max flow = " << app.graph.getData(app.sink).excess << "\n"; - std::cout << "time: " << ((double)T.get()/1000) << " s\n"; - if (!skipVerify) { - Config orig; - initializeGraph(filename, sourceId, sinkId, &orig); - verify(orig); - } - - return 0; -} diff --git a/maxflow/galois/apps/pta/CMakeLists.txt b/maxflow/galois/apps/pta/CMakeLists.txt deleted file mode 100644 index 3c998a5..0000000 --- a/maxflow/galois/apps/pta/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(pta) diff --git a/maxflow/galois/apps/pta/PointsTo.cpp b/maxflow/galois/apps/pta/PointsTo.cpp deleted file mode 100644 index 7957b1c..0000000 --- a/maxflow/galois/apps/pta/PointsTo.cpp +++ /dev/null @@ -1,581 +0,0 @@ -/** Points-to Analysis application -*- C++ -*- - * @file - * - * An inclusion-based points-to analysis algorithm to demostrate the Galois system. - * - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author rupesh nasre. - */ -#include "Galois/Galois.h" -#include "Galois/Bag.h" -#include "Galois/SparseBitVector.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Graph/FileGraph.h" -#include "Galois/WorkList/WorkList.h" -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include -#include -#include -#include - -namespace cll = llvm::cl; - -namespace { - -const char* name = "Points-to Analysis"; -const char* desc = "Performs inclusion-based points-to analysis over the input constraints."; -const char* url = NULL; - -static cll::opt input(cll::Positional, cll::desc(""), cll::Required); - -const unsigned THRESHOLD_LOADSTORE = 500; // no of nodes to be processed before adding load/store edges. -const unsigned THRESHOLD_OCD = 500; - -struct Node { - unsigned id; - unsigned priority; - - Node() { - id = 0; - priority = 0; - } - Node(unsigned lid): id(lid) { - priority = 0; - } -}; - -typedef Galois::Graph::FirstGraph Graph; -typedef Graph::GraphNode GNode; - -/* copied from Andrew's SSSP. */ - -struct UpdateRequest { - GNode n; - unsigned int w; - - UpdateRequest(GNode& N, unsigned int W) - :n(N), w(W) - {} -}; - -struct UpdateRequestIndexer - : std::binary_function { - unsigned int operator() (const UpdateRequest& val) const { - unsigned int t = val.w; - return t; - } -}; - - -//typedef std::pair Edge; -typedef std::vector WorkList; -typedef Galois::WorkList::OrderedByIntegerMetric > OBIM; - -class PtsToCons { -public: - typedef enum {AddressOf = 0, Copy, Load, Store} ConstraintType; - - PtsToCons(ConstraintType tt, unsigned ss, unsigned dd) { - src = ss; - dst = dd; - type = tt; - } - void getSrcDst(unsigned &ss, unsigned &dd) { - ss = src; - dd = dst; - } - ConstraintType getType() { - return type; - } - void print() { - if (type == Store) { - std::cerr << "*"; - } - std::cerr << "v" << dst; - std::cerr << " = "; - if (type == Load) { - std::cerr << "*"; - } else if (type == AddressOf) { - std::cerr << "&"; - } - std::cerr << "v" << src; - std::cerr << std::endl; - } -private: - unsigned src, dst; - ConstraintType type; -}; - -typedef std::vector PointsToConstraints; -typedef std::vector PointsToInfo; - -Graph graph; -std::vector nodes; -PointsToInfo result; -PointsToConstraints addrcopyconstraints, loadstoreconstraints; -unsigned numNodes = 0; - -class OCD { // Online Cycle Detection and elimination. -public: - OCD() { } - - void init() { - NoRepresentative = numNodes; - representative.resize(numNodes); - visited.resize(numNodes); - for (unsigned ii = 0; ii < numNodes; ++ii) { - representative[ii] = NoRepresentative; - } - } - void process(WorkList &worklist) { // worklist of nodes that are sources of new edges. - - for (unsigned ii = 0; ii < numNodes; ++ii) { - visited[ii] = false; - } - unsigned cyclenode = numNodes; // set to invalid id. - for (WorkList::iterator ii = worklist.begin(); ii != worklist.end(); ++ii) { - Node &nn = graph.getData(ii->n, Galois::MethodFlag::NONE); - unsigned nodeid = nn.id; - //std::cout << "debug: cycle process " << nodeid << std::endl; - if (cycleDetect(nodeid, cyclenode)) { - cycleCollapse(cyclenode); - } - } - } - - bool cycleDetect(unsigned nodeid, unsigned &cyclenode) { // it is okay not to detect all cycles as it is only an efficiency concern. - nodeid = getFinalRepresentative(nodeid); - if (isAncestor(nodeid)) { - cyclenode = nodeid; - return true; - } - if (visited[nodeid]) { - return false; - } - visited[nodeid] = true; - ancestors.push_back(nodeid); - - GNode nn = nodes[nodeid]; - for (Graph::edge_iterator ii = graph.edge_begin(nn, Galois::MethodFlag::NONE), ei = graph.edge_end(nn, Galois::MethodFlag::NONE); ii != ei; ++ii) { - Node &nn = graph.getData(graph.getEdgeDst(ii), Galois::MethodFlag::NONE); - unsigned iiid = nn.id; - if (cycleDetect(iiid, cyclenode)) { - //return true; // don't pop from ancestors. - cycleCollapse(cyclenode); - } - } - ancestors.pop_back(); - return false; - } - void cycleCollapse(unsigned repr) { - // assert(repr is present in ancestors). - //static unsigned ncycles = 0; - unsigned reprrepr = getFinalRepresentative(repr); - for (std::vector::iterator ii = ancestors.begin(); ii != ancestors.end(); ++ii) { - if (*ii == repr) { - //std::cout << "debug: collapsing cycle for " << repr << std::endl; - // cycle exists between nodes ancestors[*ii..end]. - for (std::vector::iterator jj = ii; jj != ancestors.end(); ++jj) { - unsigned jjrepr = getFinalRepresentative(*jj); // jjrepr has no representative. - makeRepr(jjrepr, reprrepr); - } - //std::cout << "debug: cycles collapsed = " << ++ncycles << std::endl; - break; - } - } - //ancestors.clear(); // since collapse is called from top level process(), the ancestors need to be cleared for the next element in the worklist. - } - void makeRepr(unsigned nodeid, unsigned repr) { - // make repr the representative of nodeid. - if (repr != nodeid) { - //std::cout << "debug: repr[" << nodeid << "] = " << repr << std::endl; - representative[nodeid] = repr; - if (!result[repr].isSubsetEq(result[nodeid])) { - //graph.getData(nodes[repr]); // lock it. - result[repr].unify(result[nodeid]); - } - } - } - unsigned getFinalRepresentative(unsigned nodeid) { - unsigned lnnid = nodeid; - while (representative[lnnid] != NoRepresentative) { - lnnid = representative[lnnid]; - } - // path compression. - unsigned repr = representative[nodeid]; - while (repr != NoRepresentative) { - representative[nodeid] = lnnid; - nodeid = repr; - repr = representative[nodeid]; - } - return lnnid; - } - -private: - bool isAncestor(unsigned nodeid) { - for (std::vector::iterator ii = ancestors.begin(); ii != ancestors.end(); ++ii) { - if (*ii == nodeid) { - return true; - } - } - return false; - } - std::vector ancestors; - std::vector visited; - std::vector representative; - unsigned NoRepresentative; -}; - -OCD ocd; - -void checkReprPointsTo() { - for (unsigned ii = 0; ii < result.size(); ++ii) { - unsigned repr = ocd.getFinalRepresentative(ii); - if (repr != ii && !result[ii].isSubsetEq(result[repr])) { - std::cout << "ERROR: pointsto(" << ii << ") is not less than its representative pointsto(" << repr << ").\n"; - } - } -} - -unsigned countPointsToFacts() { - unsigned count = 0; - for (PointsToInfo::iterator ii = result.begin(); ii != result.end(); ++ii) { - unsigned repr = ocd.getFinalRepresentative(ii - result.begin()); - count += result[repr].count(); - } - return count; -} -void printPointsToInfo(PointsToInfo &result) { - std::string prefix = "v"; - for (PointsToInfo::iterator ii = result.begin(); ii != result.end(); ++ii) { - std::cout << prefix << ii - result.begin() << ": "; - unsigned repr = ocd.getFinalRepresentative(ii - result.begin()); - result[repr].print(std::cout, prefix); - } -} -void processLoadStoreSerial(PointsToConstraints &constraints, WorkList &worklist, Galois::MethodFlag flag = Galois::MethodFlag::NONE) { - // add edges to the graph based on points-to information of the nodes - // and then add the source of each edge to the worklist. - for (PointsToConstraints::iterator ii = constraints.begin(); ii != constraints.end(); ++ii) { - unsigned src, dst; - //std::cout << "debug: Processing constraint: "; ii->print(); - ii->getSrcDst(src, dst); - unsigned srcrepr = ocd.getFinalRepresentative(src); - unsigned dstrepr = ocd.getFinalRepresentative(dst); - if (ii->getType() == PtsToCons::Load) { - GNode &nndstrepr = nodes[dstrepr]; - std::vector ptstoOfSrc; - result[srcrepr].getAllSetBits(ptstoOfSrc); - for (std::vector::iterator pointee = ptstoOfSrc.begin(); pointee != ptstoOfSrc.end(); ++pointee) { - unsigned pointeerepr = ocd.getFinalRepresentative(*pointee); - if (pointeerepr != dstrepr && graph.findEdge(nodes[pointeerepr], nodes[dstrepr]) == graph.edge_begin(nodes[pointeerepr])) { - GNode &nn = nodes[pointeerepr]; - graph.addEdge(nn, nndstrepr, flag); - //std::cout << "debug: adding edge from " << *pointee << " to " << dst << std::endl; - worklist.push_back(UpdateRequest(nn, graph.getData(nn, Galois::MethodFlag::NONE).priority)); - } - } - } else { // store. - std::vector ptstoOfDst; - bool newedgeadded = false; - GNode &nnsrcrepr = nodes[srcrepr]; - result[dstrepr].getAllSetBits(ptstoOfDst); - for (std::vector::iterator pointee = ptstoOfDst.begin(); pointee != ptstoOfDst.end(); ++pointee) { - unsigned pointeerepr = ocd.getFinalRepresentative(*pointee); - if (srcrepr != pointeerepr && graph.findEdge(nodes[srcrepr],nodes[pointeerepr]) == graph.edge_end(nodes[srcrepr])) { - graph.addEdge(nnsrcrepr, nodes[pointeerepr], flag); - //std::cout << "debug: adding edge from " << src << " to " << *pointee << std::endl; - newedgeadded = true; - } - } - if (newedgeadded) { - worklist.push_back(UpdateRequest(nnsrcrepr, graph.getData(nnsrcrepr, Galois::MethodFlag::NONE).priority)); - } - } - } -} -void processAddressOfCopy(PointsToConstraints &constraints, WorkList &worklist) { - for (PointsToConstraints::iterator ii = constraints.begin(); ii != constraints.end(); ++ii) { - unsigned src, dst; - //std::cout << "debug: Processing constraint: "; ii->print(); - ii->getSrcDst(src, dst); - if (ii->getType() == PtsToCons::AddressOf) { - if (result[dst].set(src)) { - //std::cout << "debug: saving v" << dst << "->v" << src << std::endl; - } - } else if (src != dst) { // copy. - GNode &nn = nodes[src]; - graph.addEdge(nn, nodes[dst], Galois::MethodFlag::NONE); - //std::cout << "debug: adding edge from " << src << " to " << dst << std::endl; - worklist.push_back(UpdateRequest(nn, graph.getData(nn, Galois::MethodFlag::NONE).priority)); - } - } -} -unsigned propagate(GNode &src, GNode &dst, Galois::MethodFlag flag = Galois::MethodFlag::ALL) { - unsigned srcid = graph.getData(src, Galois::MethodFlag::NONE).id; - unsigned dstid = graph.getData(dst, Galois::MethodFlag::NONE).id; - unsigned newptsto = 0; - - if (srcid != dstid) { - unsigned srcreprid = ocd.getFinalRepresentative(srcid); - unsigned dstreprid = ocd.getFinalRepresentative(dstid); - if (srcreprid != dstreprid && !result[srcreprid].isSubsetEq(result[dstreprid])) { - //std::cout << "debug: unifying " << dstreprid << " by " << srcreprid << std::endl; - graph.getData(nodes[dstreprid], flag); - newptsto = result[dstreprid].unify(result[srcreprid]); - //newptsto = 0; - } - } - return newptsto; -} - -void processLoadStore(PointsToConstraints &constraints, WorkList &worklist, Galois::MethodFlag flag = Galois::MethodFlag::ALL) { - // add edges to the graph based on points-to information of the nodes - // and then add the source of each edge to the worklist. - for (PointsToConstraints::iterator ii = constraints.begin(); ii != constraints.end(); ++ii) { - unsigned src, dst; - //std::cout << "debug: Processing constraint: "; ii->print(); - ii->getSrcDst(src, dst); - unsigned srcrepr = ocd.getFinalRepresentative(src); - unsigned dstrepr = ocd.getFinalRepresentative(dst); - if (ii->getType() == PtsToCons::Load) { - GNode &nndstrepr = nodes[dstrepr]; - std::vector ptstoOfSrc; - result[srcrepr].getAllSetBits(ptstoOfSrc); - for (std::vector::iterator pointee = ptstoOfSrc.begin(); pointee != ptstoOfSrc.end(); ++pointee) { - unsigned pointeerepr = ocd.getFinalRepresentative(*pointee); - if (pointeerepr != dstrepr && graph.findEdge(nodes[pointeerepr], nodes[dstrepr]) == graph.edge_end(nodes[pointeerepr])) { - GNode &nn = nodes[pointeerepr]; - graph.addEdge(nn, nndstrepr, flag); - //std::cout << "debug: adding edge from " << *pointee << " to " << dst << std::endl; - worklist.push_back(UpdateRequest(nn, graph.getData(nn, Galois::MethodFlag::ALL).priority)); - } - } - } else { // store. - std::vector ptstoOfDst; - bool newedgeadded = false; - GNode &nnsrcrepr = nodes[srcrepr]; - result[dstrepr].getAllSetBits(ptstoOfDst); - for (std::vector::iterator pointee = ptstoOfDst.begin(); pointee != ptstoOfDst.end(); ++pointee) { - unsigned pointeerepr = ocd.getFinalRepresentative(*pointee); - if (srcrepr != pointeerepr && graph.findEdge(nodes[srcrepr],nodes[pointeerepr]) == graph.edge_end(nodes[srcrepr])) { - graph.addEdge(nnsrcrepr, nodes[pointeerepr], flag); - //std::cout << "debug: adding edge from " << src << " to " << *pointee << std::endl; - newedgeadded = true; - } - } - if (newedgeadded) { - worklist.push_back(UpdateRequest(nnsrcrepr, graph.getData(nnsrcrepr, Galois::MethodFlag::ALL).priority)); - } - } - } -} - -unsigned nfired; -//unsigned niter; -struct Process { - Process() { } - - template - void operator()(UpdateRequest &req, Context& ctx) { - if (++nfired < THRESHOLD_LOADSTORE) { - GNode &src = req.n; - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - unsigned newptsto = propagate(src, dst, Galois::MethodFlag::ALL); - if (newptsto) { - ctx.push(UpdateRequest(dst, newptsto)); - } - } - } else { - nfired = 0; - WorkList wl; - processLoadStore(loadstoreconstraints, wl, Galois::MethodFlag::ALL); - /*if (wl.size() > THRESHOLD_OCD) { - ocd.process(wl); - }*/ - for (WorkList::iterator ii = wl.begin(); ii != wl.end(); ++ii) { - ctx.push(*ii); - } - } - /*if (nfired % 500 == 0) { - std::cout << ++niter << std::endl; - }*/ - - } -}; - -void runSerial(PointsToConstraints &addrcopyconstraints, PointsToConstraints &loadstoreconstraints) { - WorkList worklist; - //unsigned niteration = 0; - //bool changed = false; - unsigned nnodesprocessed = 0; - - processAddressOfCopy(addrcopyconstraints, worklist); - processLoadStoreSerial(loadstoreconstraints, worklist, Galois::MethodFlag::NONE); // required when there are zero copy constraints which keeps worklist empty. - - //std::cout << "debug: no of addr+copy constraints = " << addrcopyconstraints.size() << ", no of load+store constraints = " << loadstoreconstraints.size() << std::endl; - //std::cout << "debug: no of nodes = " << nodes.size() << std::endl; - - while (!worklist.empty()) { - //std::cout << "debug: Iteration " << ++niteration << ", worklist.size=" << worklist.size() << "\n"; - GNode src = worklist.back().n; - worklist.pop_back(); - - //std::cout << "debug: processing worklist element " << graph.getData(src, Galois::MethodFlag::NONE).id << std::endl; - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - unsigned newptsto = propagate(src, dst, Galois::MethodFlag::NONE); - if (newptsto) { - worklist.push_back(UpdateRequest(dst, newptsto)); - } - } - if (++nnodesprocessed > THRESHOLD_LOADSTORE || worklist.empty()) { - nnodesprocessed = 0; - processLoadStoreSerial(loadstoreconstraints, worklist); // add edges to graph, add their sources to worklist. - if (worklist.size() > THRESHOLD_OCD) { - ocd.process(worklist); - } - } - } -} - -void runParallel(PointsToConstraints &addrcopyconstraints, PointsToConstraints &loadstoreconstraints) { - WorkList worklist; - //unsigned niteration = 0; - - processAddressOfCopy(addrcopyconstraints, worklist); - processLoadStore(loadstoreconstraints, worklist, Galois::MethodFlag::NONE); - - //using namespace Galois::Runtime::WorkList; - //Galois::for_each > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each, FIFO<> > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each, FIFO<> > >(worklist.begin(), worklist.end(), Process()); - //Galois::for_each, FIFO<> > >(worklist.begin(), worklist.end(), Process()); - Galois::for_each(worklist.begin(), worklist.end(), Process()); -} - -unsigned readConstraints(const char *file, PointsToConstraints &addrcopyconstraints, PointsToConstraints &loadstoreconstraints) { - unsigned numNodes = 0; - unsigned nconstraints = 0; - - std::ifstream cfile(file); - std::string cstr; - - getline(cfile, cstr); // no of vars. - sscanf(cstr.c_str(), "%d", &numNodes); - - getline(cfile, cstr); // no of constraints. - sscanf(cstr.c_str(), "%d", &nconstraints); - - addrcopyconstraints.clear(); - loadstoreconstraints.clear(); - - unsigned consno, src, dst, offset; - PtsToCons::ConstraintType type; - - for (unsigned ii = 0; ii < nconstraints; ++ii) { - getline(cfile, cstr); - union { int as_int; PtsToCons::ConstraintType as_ctype; } type_converter; - sscanf(cstr.c_str(), "%d,%d,%d,%d,%d", &consno, &src, &dst, &type_converter.as_int, &offset); - type = type_converter.as_ctype; - PtsToCons cc(type, src, dst); - if (type == PtsToCons::AddressOf || type == PtsToCons::Copy) { - addrcopyconstraints.push_back(cc); - } else if (type == PtsToCons::Load || PtsToCons::Store) { - loadstoreconstraints.push_back(cc); - } - } - cfile.close(); - - return numNodes; -} - - -void printConstraints(PointsToConstraints &constraints) { - for (PointsToConstraints::iterator ii = constraints.begin(); ii != constraints.end(); ++ii) { - ii->print(); - } -} - -} - -int main(int argc, char** argv) { - LonestarStart(argc, argv, name, desc, url); - - numNodes = readConstraints(input.c_str(), addrcopyconstraints, loadstoreconstraints); - //printConstraints(addrcopyconstraints); - //printConstraints(loadstoreconstraints); - - result.resize(numNodes); - nodes.resize(numNodes); - ocd.init(); - - unsigned nodeid = 0; - - - for (PointsToInfo::iterator ii = result.begin(); ii != result.end(); ++ii, ++nodeid) { - ii->init(numNodes); - - GNode src = graph.createNode(Node(nodeid)); - graph.addNode(src); - nodes[nodeid] = src; - } - - Galois::StatTimer T; - T.start(); - - //numThreads = 0; - if (numThreads) { - std::cout << "-------- Parallel version: " << numThreads << " threads.\n"; - runParallel(addrcopyconstraints, loadstoreconstraints); - } else { - std::cout << "-------- Sequential version.\n"; - runSerial(addrcopyconstraints, loadstoreconstraints); - } - T.stop(); - - - //std::cout << "No of points-to facts computed = " << countPointsToFacts() << std::endl; - //checkReprPointsTo(); - //printPointsToInfo(result); - /*if (!skipVerify && !verify(result)) { - std::cerr << "If graph was connected, verification failed\n"; - assert(0 && "If graph was connected, verification failed"); - abort(); - }*/ - - return 0; -} - diff --git a/maxflow/galois/apps/spanningtree/CMakeLists.txt b/maxflow/galois/apps/spanningtree/CMakeLists.txt deleted file mode 100644 index b5b2963..0000000 --- a/maxflow/galois/apps/spanningtree/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(spanningtree) diff --git a/maxflow/galois/apps/spanningtree/SpanningTree.cpp b/maxflow/galois/apps/spanningtree/SpanningTree.cpp deleted file mode 100644 index bb92248..0000000 --- a/maxflow/galois/apps/spanningtree/SpanningTree.cpp +++ /dev/null @@ -1,322 +0,0 @@ -/** Spanning-tree application -*- C++ -*- - * @file - * - * A simple spanning tree algorithm to demonstrate the Galois system. - * - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/UnionFind.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/ParallelSTL/ParallelSTL.h" -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include - -namespace cll = llvm::cl; - -const char* name = "Spanning Tree Algorithm"; -const char* desc = "Computes the spanning forest of a graph"; -const char* url = NULL; - -enum Algo { - demo, - asynchronous, - blockedasync -}; - -static cll::opt inputFilename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumVal(demo, "Demonstration algorithm"), - clEnumVal(asynchronous, "Asynchronous"), - clEnumVal(blockedasync, "Blocked Asynchronous"), - clEnumValEnd), cll::init(blockedasync)); - -struct Node: public Galois::UnionFindNode { - Node*& component() { return m_component; } -}; - -typedef Galois::Graph::LC_Linear_Graph - ::with_numa_alloc::type Graph; - -typedef Graph::GraphNode GNode; - -Graph graph; - -std::ostream& operator<<(std::ostream& os, const Node& n) { - os << "[id: " << &n << "]"; - return os; -} - -typedef std::pair Edge; - -Galois::InsertBag mst; - -/** - * Construct a spanning forest via a modified BFS algorithm. Intended as a - * simple introduction to the Galois system and not intended to particularly - * fast. Restrictions: graph must be strongly connected. In this case, the - * spanning tree is over the undirected graph created by making the directed - * graph symmetric. - */ -struct DemoAlgo { - Node* root; - - void operator()(GNode src, Galois::UserContext& ctx) { - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::ALL), - ei = graph.edge_end(src, Galois::MethodFlag::ALL); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - if (ddata.component() == root) - continue; - ddata.component() = root; - mst.push(std::make_pair(src, dst)); - ctx.push(dst); - } - } - - void operator()() { - Graph::iterator ii = graph.begin(), ei = graph.end(); - if (ii != ei) { - root = &graph.getData(*ii); - Galois::for_each(*ii, *this); - } - } -}; - -/** - * Like asynchronous connected components algorithm. - */ -struct AsyncAlgo { - struct Merge { - Galois::Statistic& emptyMerges; - Merge(Galois::Statistic& e): emptyMerges(e) { } - - void operator()(const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - for (Graph::edge_iterator ii = graph.edge_begin(src, Galois::MethodFlag::NONE), - ei = graph.edge_end(src, Galois::MethodFlag::NONE); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - if (sdata.merge(&ddata)) { - mst.push(std::make_pair(src, dst)); - } else { - emptyMerges += 1; - } - } - } - }; - - //! Normalize component by doing find with path compression - struct Normalize { - void operator()(const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - sdata.component() = sdata.findAndCompress(); - } - }; - - void operator()() { - Galois::Statistic emptyMerges("EmptyMerges"); - Galois::do_all_local(graph, Merge(emptyMerges), - Galois::loopname("Merge"), Galois::do_all_steal(true)); - Galois::do_all_local(graph, Normalize(), Galois::loopname("Normalize")); - } -}; - -/** - * Improve performance of async algorithm by following machine topology. - */ -struct BlockedAsyncAlgo { - struct WorkItem { - GNode src; - Graph::edge_iterator start; - }; - - struct Merge { - typedef int tt_does_not_need_aborts; - - Galois::InsertBag& items; - - //! Add the next edge between components to the worklist - template - void process(const GNode& src, const Graph::edge_iterator& start, Pusher& pusher) { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - int count = 1; - for (Graph::edge_iterator ii = start, ei = graph.edge_end(src, Galois::MethodFlag::NONE); - ii != ei; - ++ii, ++count) { - GNode dst = graph.getEdgeDst(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - if (sdata.merge(&ddata)) { - mst.push(std::make_pair(src, dst)); - if (Limit == 0 || count != Limit) - continue; - } - - if (MakeContinuation || (Limit != 0 && count == Limit)) { - WorkItem item = { src, ii + 1 }; - pusher.push(item); - break; - } - } - } - - void operator()(const GNode& src) { - Graph::edge_iterator start = graph.edge_begin(src, Galois::MethodFlag::NONE); - if (Galois::Runtime::LL::getPackageForSelf(Galois::Runtime::LL::getTID()) == 0) { - process(src, start, items); - } else { - process(src, start, items); - } - } - - void operator()(const WorkItem& item, Galois::UserContext& ctx) { - process(item.src, item.start, ctx); - } - }; - - //! Normalize component by doing find with path compression - struct Normalize { - void operator()(const GNode& src) const { - Node& sdata = graph.getData(src, Galois::MethodFlag::NONE); - sdata.component() = sdata.findAndCompress(); - } - }; - - void operator()() { - Galois::InsertBag items; - Merge merge = { items }; - Galois::do_all_local(graph, merge, Galois::loopname("Initialize"), Galois::do_all_steal(false)); - Galois::for_each_local(items, merge, - Galois::loopname("Merge"), Galois::wl >()); - Galois::do_all_local(graph, Normalize(), Galois::loopname("Normalize")); - } -}; - -struct is_bad_graph { - bool operator()(const GNode& n) const { - Node& me = graph.getData(n); - for (Graph::edge_iterator ii = graph.edge_begin(n), ei = graph.edge_end(n); ii != ei; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Node& data = graph.getData(dst); - if (me.component() != data.component()) { - std::cerr << "not in same component: " << me << " and " << data << "\n"; - return true; - } - } - return false; - } -}; - -struct is_bad_mst { - bool operator()(const Edge& e) const { - return graph.getData(e.first).component() != graph.getData(e.second).component(); - } -}; - -struct CheckAcyclic { - struct Accum { - Galois::GAccumulator roots; - }; - - Accum* accum; - - void operator()(const GNode& n) { - Node& data = graph.getData(n); - if (data.component() == &data) - accum->roots += 1; - } - - bool operator()() { - Accum a; - accum = &a; - Galois::do_all_local(graph, *this); - unsigned numRoots = a.roots.reduce(); - unsigned numEdges = std::distance(mst.begin(), mst.end()); - if (graph.size() - numRoots != numEdges) { - std::cerr << "Generated graph is not a forest. " - << "Expected " << graph.size() - numRoots << " edges but " - << "found " << numEdges << "\n"; - return false; - } - - std::cout << "Num trees: " << numRoots << "\n"; - std::cout << "Tree edges: " << numEdges << "\n"; - return true; - } -}; - -bool verify() { - if (Galois::ParallelSTL::find_if(graph.begin(), graph.end(), is_bad_graph()) == graph.end()) { - if (Galois::ParallelSTL::find_if(mst.begin(), mst.end(), is_bad_mst()) == mst.end()) { - CheckAcyclic c; - return c(); - } - } - return false; -} - -template -void run() { - Algo algo; - - Galois::StatTimer T; - T.start(); - algo(); - T.stop(); -} - -int main(int argc, char** argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - Galois::StatTimer Tinitial("InitializeTime"); - Tinitial.start(); - Galois::Graph::readGraph(graph, inputFilename); - std::cout << "Num nodes: " << graph.size() << "\n"; - Tinitial.stop(); - - //Galois::preAlloc(numThreads + graph.size() / Galois::Runtime::MM::pageSize * 60); - Galois::reportPageAlloc("MeminfoPre"); - switch (algo) { - case demo: run(); break; - case asynchronous: run(); break; - case blockedasync: run(); break; - default: std::cerr << "Unknown algo: " << algo << "\n"; - } - Galois::reportPageAlloc("MeminfoPost"); - - if (!skipVerify && !verify()) { - std::cerr << "verification failed\n"; - assert(0 && "verification failed"); - abort(); - } - - return 0; -} diff --git a/maxflow/galois/apps/sssp/CMakeLists.txt b/maxflow/galois/apps/sssp/CMakeLists.txt deleted file mode 100644 index 9e7578b..0000000 --- a/maxflow/galois/apps/sssp/CMakeLists.txt +++ /dev/null @@ -1,2 +0,0 @@ -app(sssp-all SSSPall.cpp) -app(sssp SSSP.cpp) diff --git a/maxflow/galois/apps/sssp/GraphLabAlgo.h b/maxflow/galois/apps/sssp/GraphLabAlgo.h deleted file mode 100644 index 60ea5a0..0000000 --- a/maxflow/galois/apps/sssp/GraphLabAlgo.h +++ /dev/null @@ -1,86 +0,0 @@ -#ifndef APPS_SSSP_GRAPHLABALGO_H -#define APPS_SSSP_GRAPHLABALGO_H - -#include "Galois/DomainSpecificExecutors.h" -#include "Galois/Graph/OCGraph.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/GraphNodeBag.h" - -#include - -#include "SSSP.h" - -struct GraphLabAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type - ::with_numa_alloc::type InnerGraph; - typedef Galois::Graph::LC_InOut_Graph Graph; - typedef Graph::GraphNode GNode; - - std::string name() const { return "GraphLab"; } - - void readGraph(Graph& graph) { readInOutGraph(graph); } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - g.getData(n).dist = DIST_INFINITY; - } - }; - - struct Program { - Dist min_dist; - bool changed; - - struct gather_type { }; - typedef int tt_needs_scatter_out_edges; - - struct message_type { - Dist dist; - message_type(Dist d = DIST_INFINITY): dist(d) { } - - message_type& operator+=(const message_type& other) { - dist = std::min(dist, other.dist); - return *this; - } - }; - - void init(Graph& graph, GNode node, const message_type& msg) { - min_dist = msg.dist; - } - - void apply(Graph& graph, GNode node, const gather_type&) { - changed = false; - SNode& data = graph.getData(node, Galois::MethodFlag::NONE); - if (data.dist > min_dist) { - changed = true; - data.dist = min_dist; - } - } - - bool needsScatter(Graph& graph, GNode node) { - return changed; - } - - void scatter(Graph& graph, GNode node, GNode src, GNode dst, - Galois::GraphLab::Context& ctx, Graph::edge_data_reference edgeValue) { - SNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - SNode& sdata = graph.getData(src, Galois::MethodFlag::NONE); - Dist newDist = sdata.dist + edgeValue; - if (ddata.dist > newDist) { - ctx.push(dst, message_type(newDist)); - } - } - - void gather(Graph& graph, GNode node, GNode src, GNode dst, gather_type&, Graph::edge_data_reference) { } - }; - - void operator()(Graph& graph, const GNode& source) { - Galois::GraphLab::SyncEngine engine(graph, Program()); - engine.signal(source, Program::message_type(0)); - engine.execute(); - } -}; - -#endif diff --git a/maxflow/galois/apps/sssp/LigraAlgo.h b/maxflow/galois/apps/sssp/LigraAlgo.h deleted file mode 100644 index d25230c..0000000 --- a/maxflow/galois/apps/sssp/LigraAlgo.h +++ /dev/null @@ -1,102 +0,0 @@ -#ifndef APPS_SSSP_LIGRAALGO_H -#define APPS_SSSP_LIGRAALGO_H - -#include "Galois/DomainSpecificExecutors.h" -#include "Galois/Graph/OCGraph.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/GraphNodeBag.h" - -#include - -#include "SSSP.h" - -template -struct LigraAlgo: public Galois::LigraGraphChi::ChooseExecutor { - struct LNode: public SNode { - bool visited; - }; - - typedef typename Galois::Graph::LC_InlineEdge_Graph - ::template with_compressed_node_ptr::type - ::template with_no_lockable::type - ::template with_numa_alloc::type InnerGraph; - typedef typename boost::mpl::if_c, - Galois::Graph::LC_InOut_Graph>::type - Graph; - typedef typename Graph::GraphNode GNode; - - std::string name() const { return UseGraphChi ? "LigraChi" : "Ligra"; } - - void readGraph(Graph& graph) { - readInOutGraph(graph); - this->checkIfInMemoryGraph(graph, memoryLimit); - } - - struct Initialize { - Graph& graph; - Initialize(Graph& g): graph(g) { } - void operator()(GNode n) { - LNode& data = graph.getData(n); - data.dist = DIST_INFINITY; - data.visited = false; - } - }; - - struct EdgeOperator { - template - bool cond(GTy& graph, typename GTy::GraphNode) { return true; } - - template - bool operator()(GTy& graph, typename GTy::GraphNode src, typename GTy::GraphNode dst, typename GTy::edge_data_reference weight) { - LNode& sdata = graph.getData(src, Galois::MethodFlag::NONE); - LNode& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - - while (true) { - Dist newDist = sdata.dist + weight; - Dist oldDist = ddata.dist; - if (oldDist <= newDist) - return false; - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - return __sync_bool_compare_and_swap(&ddata.visited, false, true); - } - } - return false; - } - }; - - struct ResetVisited { - Graph& graph; - ResetVisited(Graph& g): graph(g) { } - void operator()(size_t n) { - graph.getData(graph.nodeFromId(n)).visited = false; - } - }; - - void operator()(Graph& graph, const GNode& source) { - Galois::Statistic roundStat("Rounds"); - - Galois::GraphNodeBagPair<> bags(graph.size()); - - graph.getData(source).dist = 0; - - this->outEdgeMap(memoryLimit, graph, EdgeOperator(), source, bags.next()); - Galois::do_all_local(bags.next(), ResetVisited(graph)); - - unsigned rounds = 0; - while (!bags.next().empty()) { - if (++rounds == graph.size()) { - std::cout << "Negative weight cycle\n"; - break; - } - - bags.swap(); - this->outEdgeMap(memoryLimit, graph, EdgeOperator(), bags.cur(), bags.next(), true); - Galois::do_all_local(bags.next(), ResetVisited(graph)); - } - - roundStat += rounds + 1; - } -}; - -#endif diff --git a/maxflow/galois/apps/sssp/SSSP.cpp b/maxflow/galois/apps/sssp/SSSP.cpp deleted file mode 100644 index 3018896..0000000 --- a/maxflow/galois/apps/sssp/SSSP.cpp +++ /dev/null @@ -1,558 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Single source shortest paths. - * - * @author Andrew Lenharth - */ -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" -#include "Galois/Bag.h" -#include "Galois/Statistic.h" -#include "Galois/Timer.h" -#include "Galois/Graph/LCGraph.h" -#include "Galois/Graph/TypeTraits.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -#include -#include -#include - -#include "SSSP.h" -#include "GraphLabAlgo.h" -#include "LigraAlgo.h" - -namespace cll = llvm::cl; - -static const char* name = "Single Source Shortest Path"; -static const char* desc = - "Computes the shortest path from a source node to all nodes in a directed " - "graph using a modified chaotic iteration algorithm"; -static const char* url = "single_source_shortest_path"; - -enum Algo { - async, - asyncWithCas, - asyncPP, - graphlab, - ligra, - ligraChi, - serial -}; - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt transposeGraphName("graphTranspose", cll::desc("Transpose of input graph")); -static cll::opt symmetricGraph("symmetricGraph", cll::desc("Input graph is symmetric")); -static cll::opt startNode("startNode", cll::desc("Node to start search from"), cll::init(0)); -static cll::opt reportNode("reportNode", cll::desc("Node to report distance to"), cll::init(1)); -static cll::opt stepShift("delta", cll::desc("Shift value for the deltastep"), cll::init(10)); -cll::opt memoryLimit("memoryLimit", - cll::desc("Memory limit for out-of-core algorithms (in MB)"), cll::init(~0U)); -static cll::opt algo("algo", cll::desc("Choose an algorithm:"), - cll::values( - clEnumValN(Algo::async, "async", "Asynchronous"), - clEnumValN(Algo::asyncPP, "asyncPP", "Async, CAS, push-pull"), - clEnumValN(Algo::asyncWithCas, "asyncWithCas", "Use compare-and-swap to update nodes"), - clEnumValN(Algo::serial, "serial", "Serial"), - clEnumValN(Algo::graphlab, "graphlab", "Use GraphLab programming model"), - clEnumValN(Algo::ligraChi, "ligraChi", "Use Ligra and GraphChi programming model"), - clEnumValN(Algo::ligra, "ligra", "Use Ligra programming model"), - clEnumValEnd), cll::init(Algo::asyncWithCas)); - -static const bool trackWork = true; -static Galois::Statistic* BadWork; -static Galois::Statistic* WLEmptyWork; - -template -struct not_visited { - Graph& g; - - not_visited(Graph& g): g(g) { } - - bool operator()(typename Graph::GraphNode n) const { - return g.getData(n).dist >= DIST_INFINITY; - } -}; - -template -struct not_consistent { - not_consistent(Graph& g) { } - - bool operator()(typename Graph::GraphNode n) const { return false; } -}; - -template -struct not_consistent::value>::type> { - Graph& g; - not_consistent(Graph& g): g(g) { } - - bool operator()(typename Graph::GraphNode n) const { - Dist dist = g.getData(n).dist; - if (dist == DIST_INFINITY) - return false; - - for (typename Graph::edge_iterator ii = g.edge_begin(n), ee = g.edge_end(n); ii != ee; ++ii) { - Dist ddist = g.getData(g.getEdgeDst(ii)).dist; - Dist w = g.getEdgeData(ii); - if (ddist > dist + w) { - //std::cout << ddist << " " << dist + w << " " << n << " " << g.getEdgeDst(ii) << "\n"; // XXX - return true; - } - } - return false; - } -}; - -template -struct max_dist { - Graph& g; - Galois::GReduceMax& m; - - max_dist(Graph& g, Galois::GReduceMax& m): g(g), m(m) { } - - void operator()(typename Graph::GraphNode n) const { - Dist d = g.getData(n).dist; - if (d == DIST_INFINITY) - return; - m.update(d); - } -}; - -template -struct UpdateRequestIndexer: public std::unary_function { - unsigned int operator() (const UpdateRequest& val) const { - unsigned int t = val.w >> stepShift; - return t; - } -}; - - -template -bool verify(Graph& graph, typename Graph::GraphNode source) { - if (graph.getData(source).dist != 0) { - std::cerr << "source has non-zero dist value\n"; - return false; - } - namespace pstl = Galois::ParallelSTL; - - size_t notVisited = pstl::count_if(graph.begin(), graph.end(), not_visited(graph)); - if (notVisited) { - std::cerr << notVisited << " unvisited nodes; this is an error if the graph is strongly connected\n"; - } - - bool consistent = pstl::find_if(graph.begin(), graph.end(), not_consistent(graph)) == graph.end(); - if (!consistent) { - std::cerr << "node found with incorrect distance\n"; - return false; - } - - Galois::GReduceMax m; - Galois::do_all(graph.begin(), graph.end(), max_dist(graph, m)); - std::cout << "max dist: " << m.reduce() << "\n"; - - return true; -} - -template -void initialize(Algo& algo, - typename Algo::Graph& graph, - typename Algo::Graph::GraphNode& source, - typename Algo::Graph::GraphNode& report) { - - algo.readGraph(graph); - std::cout << "Read " << graph.size() << " nodes\n"; - - if (startNode >= graph.size() || reportNode >= graph.size()) { - std::cerr - << "failed to set report: " << reportNode - << " or failed to set source: " << startNode << "\n"; - assert(0); - abort(); - } - - typename Algo::Graph::iterator it = graph.begin(); - std::advance(it, startNode); - source = *it; - it = graph.begin(); - std::advance(it, reportNode); - report = *it; -} - -template -void readInOutGraph(Graph& graph) { - using namespace Galois::Graph; - if (symmetricGraph) { - Galois::Graph::readGraph(graph, filename); - } else if (transposeGraphName.size()) { - Galois::Graph::readGraph(graph, filename, transposeGraphName); - } else { - GALOIS_DIE("Graph type not supported"); - } -} - -struct SerialAlgo { - typedef Galois::Graph::LC_CSR_Graph - ::with_no_lockable::type Graph; - typedef Graph::GraphNode GNode; - typedef UpdateRequestCommon UpdateRequest; - - std::string name() const { return "Serial"; } - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - - void operator()(Graph::GraphNode n) { - g.getData(n).dist = DIST_INFINITY; - } - }; - - void operator()(Graph& graph, const GNode src) const { - std::set > initial; - UpdateRequest init(src, 0); - initial.insert(init); - - Galois::Statistic counter("Iterations"); - - while (!initial.empty()) { - counter += 1; - UpdateRequest req = *initial.begin(); - initial.erase(initial.begin()); - SNode& data = graph.getData(req.n, Galois::MethodFlag::NONE); - if (req.w < data.dist) { - data.dist = req.w; - for (Graph::edge_iterator - ii = graph.edge_begin(req.n, Galois::MethodFlag::NONE), - ee = graph.edge_end(req.n, Galois::MethodFlag::NONE); - ii != ee; ++ii) { - GNode dst = graph.getEdgeDst(ii); - Dist d = graph.getEdgeData(ii); - Dist newDist = req.w + d; - if (newDist < graph.getData(dst, Galois::MethodFlag::NONE).dist) { - initial.insert(UpdateRequest(dst, newDist)); - } - } - } - } - } -}; - -template -struct AsyncAlgo { - typedef SNode Node; - - typedef Galois::Graph::LC_InlineEdge_Graph - ::template with_out_of_line_lockable::type - ::template with_compressed_node_ptr::type - ::template with_numa_alloc::type - Graph; - typedef typename Graph::GraphNode GNode; - typedef UpdateRequestCommon UpdateRequest; - - std::string name() const { - return UseCas ? "Asynchronous with CAS" : "Asynchronous"; - } - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(typename Graph::GraphNode n) { - g.getData(n, Galois::MethodFlag::NONE).dist = DIST_INFINITY; - } - }; - - template - void relaxEdge(Graph& graph, Node& sdata, typename Graph::edge_iterator ii, Pusher& pusher) { - GNode dst = graph.getEdgeDst(ii); - Dist d = graph.getEdgeData(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - Dist newDist = sdata.dist + d; - Dist oldDist; - while (newDist < (oldDist = ddata.dist)) { - if (!UseCas || __sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - if (!UseCas) - ddata.dist = newDist; - if (trackWork && oldDist != DIST_INFINITY) - *BadWork += 1; - pusher.push(UpdateRequest(dst, newDist)); - break; - } - } - } - - template - void relaxNode(Graph& graph, UpdateRequest& req, Pusher& pusher) { - const Galois::MethodFlag flag = UseCas ? Galois::MethodFlag::NONE : Galois::MethodFlag::ALL; - Node& sdata = graph.getData(req.n, flag); - volatile Dist* sdist = &sdata.dist; - - if (req.w != *sdist) { - if (trackWork) - *WLEmptyWork += 1; - return; - } - - for (typename Graph::edge_iterator ii = graph.edge_begin(req.n, flag), ei = graph.edge_end(req.n, flag); ii != ei; ++ii) { - if (req.w != *sdist) { - if (trackWork) - *WLEmptyWork += 1; - break; - } - relaxEdge(graph, sdata, ii, pusher); - } - } - - struct Process { - AsyncAlgo* self; - Graph& graph; - Process(AsyncAlgo* s, Graph& g): self(s), graph(g) { } - void operator()(UpdateRequest& req, Galois::UserContext& ctx) { - self->relaxNode(graph, req, ctx); - } - }; - - typedef Galois::InsertBag Bag; - - struct InitialProcess { - AsyncAlgo* self; - Graph& graph; - Bag& bag; - Node& sdata; - InitialProcess(AsyncAlgo* s, Graph& g, Bag& b, Node& d): self(s), graph(g), bag(b), sdata(d) { } - void operator()(typename Graph::edge_iterator ii) { - self->relaxEdge(graph, sdata, ii, bag); - } - }; - - void operator()(Graph& graph, GNode source) { - using namespace Galois::WorkList; - typedef dChunkedFIFO<64> Chunk; - typedef OrderedByIntegerMetric, Chunk, 10> OBIM; - - std::cout << "INFO: Using delta-step of " << (1 << stepShift) << "\n"; - std::cout << "WARNING: Performance varies considerably due to delta parameter.\n"; - std::cout << "WARNING: Do not expect the default to be good for your graph.\n"; - - Bag initial; - graph.getData(source).dist = 0; - Galois::do_all( - graph.out_edges(source, Galois::MethodFlag::NONE).begin(), - graph.out_edges(source, Galois::MethodFlag::NONE).end(), - InitialProcess(this, graph, initial, graph.getData(source))); - Galois::for_each_local(initial, Process(this, graph), Galois::wl()); - } -}; - -struct AsyncAlgoPP { - typedef SNode Node; - - typedef Galois::Graph::LC_InlineEdge_Graph - ::with_out_of_line_lockable::type - ::with_compressed_node_ptr::type - ::with_numa_alloc::type - Graph; - typedef Graph::GraphNode GNode; - typedef UpdateRequestCommon UpdateRequest; - - std::string name() const { - return "Asynchronous with CAS and Push and pull"; - } - - void readGraph(Graph& graph) { Galois::Graph::readGraph(graph, filename); } - - struct Initialize { - Graph& g; - Initialize(Graph& g): g(g) { } - void operator()(Graph::GraphNode n) { - g.getData(n, Galois::MethodFlag::NONE).dist = DIST_INFINITY; - } - }; - - template - void relaxEdge(Graph& graph, Dist& sdata, typename Graph::edge_iterator ii, Pusher& pusher) { - GNode dst = graph.getEdgeDst(ii); - Dist d = graph.getEdgeData(ii); - Node& ddata = graph.getData(dst, Galois::MethodFlag::NONE); - Dist newDist = sdata + d; - Dist oldDist; - if (newDist < (oldDist = ddata.dist)) { - do { - if (__sync_bool_compare_and_swap(&ddata.dist, oldDist, newDist)) { - if (trackWork && oldDist != DIST_INFINITY) - *BadWork += 1; - pusher.push(UpdateRequest(dst, newDist)); - break; - } - } while (newDist < (oldDist = ddata.dist)); - } else { - sdata = std::min(oldDist + d, sdata); - } - } - - struct Process { - AsyncAlgoPP* self; - Graph& graph; - Process(AsyncAlgoPP* s, Graph& g): self(s), graph(g) { } - - void operator()(UpdateRequest& req, Galois::UserContext& ctx) { - const Galois::MethodFlag flag = Galois::MethodFlag::NONE; - Node& sdata = graph.getData(req.n, flag); - volatile Dist* psdist = &sdata.dist; - Dist sdist = *psdist; - - if (req.w != sdist) { - if (trackWork) - *WLEmptyWork += 1; - return; - } - - for (Graph::edge_iterator ii = graph.edge_begin(req.n, flag), ei = graph.edge_end(req.n, flag); ii != ei; ++ii) { - self->relaxEdge(graph, sdist, ii, ctx); - } - - // //try doing a pull - // Dist oldDist; - // while (sdist < (oldDist = *psdist)) { - // if (__sync_bool_compare_and_swap(psdist, oldDist, sdist)) { - // req.w = sdist; - // operator()(req, ctx); - // } - // } - } - }; - - typedef Galois::InsertBag Bag; - - struct InitialProcess { - AsyncAlgoPP* self; - Graph& graph; - Bag& bag; - InitialProcess(AsyncAlgoPP* s, Graph& g, Bag& b): self(s), graph(g), bag(b) { } - void operator()(Graph::edge_iterator ii) { - Dist d = 0; - self->relaxEdge(graph, d, ii, bag); - } - }; - - void operator()(Graph& graph, GNode source) { - using namespace Galois::WorkList; - typedef dChunkedFIFO<64> Chunk; - typedef OrderedByIntegerMetric, Chunk, 10> OBIM; - - std::cout << "INFO: Using delta-step of " << (1 << stepShift) << "\n"; - std::cout << "WARNING: Performance varies considerably due to delta parameter.\n"; - std::cout << "WARNING: Do not expect the default to be good for your graph.\n"; - - Bag initial; - graph.getData(source).dist = 0; - Galois::do_all( - graph.out_edges(source, Galois::MethodFlag::NONE).begin(), - graph.out_edges(source, Galois::MethodFlag::NONE).end(), - InitialProcess(this, graph, initial)); - Galois::for_each_local(initial, Process(this, graph), Galois::wl()); - } -}; - -namespace Galois { -template<> -struct does_not_need_aborts::Process> : public boost::true_type {}; -} - -static_assert(Galois::does_not_need_aborts::Process>::value, "Oops"); - -template -void run(bool prealloc = true) { - typedef typename Algo::Graph Graph; - typedef typename Graph::GraphNode GNode; - - Algo algo; - Graph graph; - GNode source, report; - - initialize(algo, graph, source, report); - - size_t approxNodeData = graph.size() * 64; - //size_t approxEdgeData = graph.sizeEdges() * sizeof(typename Graph::edge_data_type) * 2; - if (prealloc) - Galois::preAlloc(numThreads + approxNodeData / Galois::Runtime::MM::pageSize); - Galois::reportPageAlloc("MeminfoPre"); - - Galois::StatTimer T; - std::cout << "Running " << algo.name() << " version\n"; - T.start(); - Galois::do_all_local(graph, typename Algo::Initialize(graph)); - algo(graph, source); - T.stop(); - - Galois::reportPageAlloc("MeminfoPost"); - Galois::Runtime::reportNumaAlloc("NumaPost"); - - std::cout << "Node " << reportNode << " has distance " << graph.getData(report).dist << "\n"; - - if (!skipVerify) { - if (verify(graph, source)) { - std::cout << "Verification successful.\n"; - } else { - std::cerr << "Verification failed.\n"; - assert(0 && "Verification failed"); - abort(); - } - } -} - -int main(int argc, char **argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, name, desc, url); - - if (trackWork) { - BadWork = new Galois::Statistic("BadWork"); - WLEmptyWork = new Galois::Statistic("EmptyWork"); - } - - Galois::StatTimer T("TotalTime"); - T.start(); - switch (algo) { - case Algo::serial: run(); break; - case Algo::async: run >(); break; - case Algo::asyncWithCas: run >(); break; - case Algo::asyncPP: run(); break; -#if defined(__IBMCPP__) && __IBMCPP__ <= 1210 -#else - case Algo::ligra: run >(); break; - case Algo::ligraChi: run >(false); break; - case Algo::graphlab: run(); break; -#endif - default: std::cerr << "Unknown algorithm\n"; abort(); - } - T.stop(); - - if (trackWork) { - delete BadWork; - delete WLEmptyWork; - } - - return 0; -} diff --git a/maxflow/galois/apps/sssp/SSSP.h b/maxflow/galois/apps/sssp/SSSP.h deleted file mode 100644 index e56d5df..0000000 --- a/maxflow/galois/apps/sssp/SSSP.h +++ /dev/null @@ -1,81 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Single source shortest paths. - * - * @author Andrew Lenharth - */ -#ifndef APPS_SSSP_SSSP_H -#define APPS_SSSP_SSSP_H - -#include "llvm/Support/CommandLine.h" - -#include -#include -#include -#include - -typedef unsigned int Dist; -static const Dist DIST_INFINITY = std::numeric_limits::max() - 1; - -template -struct UpdateRequestCommon { - GrNode n; - Dist w; - - UpdateRequestCommon(const GrNode& N, Dist W): n(N), w(W) {} - - UpdateRequestCommon(): n(), w(0) {} - - bool operator>(const UpdateRequestCommon& rhs) const { - if (w > rhs.w) return true; - if (w < rhs.w) return false; - return n > rhs.n; - } - - bool operator<(const UpdateRequestCommon& rhs) const { - if (w < rhs.w) return true; - if (w > rhs.w) return false; - return n < rhs.n; - } - - bool operator!=(const UpdateRequestCommon& other) const { - if (w != other.w) return true; - return n != other.n; - } - - uintptr_t getID() const { - return reinterpret_cast(n); - } -}; - -struct SNode { - Dist dist; -}; - -template -void readInOutGraph(Graph& graph); - -extern llvm::cl::opt memoryLimit; - - -#endif diff --git a/maxflow/galois/apps/sssp/SSSPall.cpp b/maxflow/galois/apps/sssp/SSSPall.cpp deleted file mode 100644 index bc1c1a6..0000000 --- a/maxflow/galois/apps/sssp/SSSPall.cpp +++ /dev/null @@ -1,172 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Single source shortest paths. - * - * @author Andrew Lenharth - */ -#include "SSSPall.h" - -#include "Galois/Timer.h" -#include "Galois/Statistic.h" -#include "Galois/Galois.h" -#include "Galois/UserContext.h" -#include "Galois/Graph/LCGraph.h" -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#include -#include - -namespace cll = llvm::cl; - -static const char* name = "Single Source Shortest Path"; -static const char* desc = - "Computes the shortest path from a source node to all nodes in a directed " - "graph using a modified Bellman-Ford algorithm"; -static const char* url = "single_source_shortest_path"; - -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); -static cll::opt stepShift("delta", cll::desc("Shift value for the deltastep"), cll::init(10)); - -typedef Galois::Graph::LC_InlineEdge_Graph - ::with_out_of_line_lockable::type - ::with_compressed_node_ptr::type - ::with_numa_alloc::type - Graph; -typedef Graph::GraphNode GNode; - -typedef UpdateRequestCommon UpdateRequest; - -struct UpdateRequestIndexer - : std::unary_function { - unsigned int operator() (const UpdateRequest& val) const { - unsigned int t = val.w >> stepShift; - return t; - } -}; - -Graph graph; - -struct process { - typedef int tt_does_not_need_aborts; - - void operator()(UpdateRequest& req, Galois::UserContext& lwl) { - SNode& data = graph.getData(req.n,Galois::MethodFlag::NONE); - // if (req.w >= data.dist) - // *WLEmptyWork += 1; - unsigned int v; - while (req.w < (v = data.dist[req.c])) { - if (__sync_bool_compare_and_swap(&data.dist[req.c], v, req.w)) { - // if (v != DIST_INFINITY) - // *BadWork += 1; - for (Graph::edge_iterator ii = graph.edge_begin(req.n, Galois::MethodFlag::NONE), - ee = graph.edge_end(req.n, Galois::MethodFlag::NONE); ii != ee; ++ii) { - GNode dst = graph.getEdgeDst(ii); - int d = graph.getEdgeData(ii); - unsigned int newDist = req.w + d; - SNode& rdata = graph.getData(dst,Galois::MethodFlag::NONE); - if (newDist < rdata.dist[req.c]) - lwl.push(UpdateRequest(dst, newDist, req.c)); - } - break; - } - } - } -}; - -struct reset { - void operator()(GNode n) {//, Galois::UserContext& lwl) { - SNode& S = graph.getData(n, Galois::MethodFlag::NONE); - for (int i = 0; i < NUM; ++i) - S.dist[i] = DIST_INFINITY; - } - // void operator()(GNode n, Galois::UserContext& lwl) { - // operator()(n); - // } -}; - -void runBodyParallel(const GNode src[NUM], int n) { - using namespace Galois::WorkList; - typedef dChunkedLIFO<16> dChunk; - typedef ChunkedLIFO<16> Chunk; - typedef OrderedByIntegerMetric OBIM; - - Galois::StatTimer T; - - UpdateRequest one[NUM]; - for (int i = 0; i < n; ++i) - one[i] = UpdateRequest(src[i], 0, i); - T.start(); - Galois::for_each(&one[0], &one[n], process(), Galois::wl()); - T.stop(); -} - -void resetParallel() { - Galois::do_all(graph.begin(), graph.end(), reset()); -} - -int main(int argc, char **argv) { - LonestarStart(argc, argv, name, desc, url); - - // Galois::Statistic sBadWork("BadWork"); - // Galois::Statistic sWLEmptyWork("WLEmptyWork"); - // BadWork = &sBadWork; - // WLEmptyWork = &sWLEmptyWork; - - Galois::Graph::readGraph(graph, filename); - - std::cout << "Read " << graph.size() << " nodes\n"; - std::cout << "Using delta-step of " << (1 << stepShift) << "\n"; - std::cout << "Doing " << NUM << " at a time\n"; - std::cout << "WARNING: Performance varies considerably due to -delta. Do not expect the default to be good for your graph\n"; - - unsigned int id = 0; - for (Graph::iterator src = graph.begin(), ee = - graph.end(); src != ee; ++src) { - SNode& node = graph.getData(*src,Galois::MethodFlag::NONE); - node.id = id++; - } - - resetParallel(); - - Galois::StatTimer T("AllSourcesTimer"); - T.start(); - int at = 0; - GNode N[NUM]; - for (Graph::iterator src = graph.begin(), ee = - graph.end(); src != ee; ++src) { - N[at++] = *src; - if (at == NUM) { - runBodyParallel(N, NUM); - resetParallel(); - at = 0; - } - } - if (at != 0) - runBodyParallel(N, at); - - T.stop(); - - return 0; -} diff --git a/maxflow/galois/apps/sssp/SSSPall.h b/maxflow/galois/apps/sssp/SSSPall.h deleted file mode 100644 index c4be3a7..0000000 --- a/maxflow/galois/apps/sssp/SSSPall.h +++ /dev/null @@ -1,88 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Single source shortest paths. - * - * @author Andrew Lenharth - */ -#ifndef SSSP_H -#define SSSP_H - -#include -#include -#include -#include - -#define NUM 32 - -static const unsigned int DIST_INFINITY = - std::numeric_limits::max() - 1; - -template -struct UpdateRequestCommon { - GrNode n; - unsigned int w; - unsigned int c; - - UpdateRequestCommon(const GrNode& N, unsigned int W, unsigned int C) - :n(N), w(W), c(C) - {} - - UpdateRequestCommon() - :n(), w(0), c(0) - {} - - bool operator>(const UpdateRequestCommon& rhs) const { - if (w > rhs.w) return true; - if (w < rhs.w) return false; - if (n > rhs.n) return true; - if (n < rhs.n) return false; - return c > rhs.c; - } - - bool operator<(const UpdateRequestCommon& rhs) const { - if (w < rhs.w) return true; - if (w > rhs.w) return false; - if (n < rhs.n) return true; - if (n > rhs.n) return false; - return c < rhs.c; - } - - bool operator!=(const UpdateRequestCommon& other) const { - if (w != other.w) return true; - if (n != other.n) return true; - return c != other.c; - } - - uintptr_t getID() const { - //return static_cast(n); - return reinterpret_cast(n); - } -}; - -struct SNode { - unsigned int id; - unsigned int dist[NUM]; - - SNode(int _id = -1) : id(_id) { } -}; -#endif diff --git a/maxflow/galois/apps/surveypropagation/CMakeLists.txt b/maxflow/galois/apps/surveypropagation/CMakeLists.txt deleted file mode 100644 index 9ddc57a..0000000 --- a/maxflow/galois/apps/surveypropagation/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -app(surveypropagation) diff --git a/maxflow/galois/apps/surveypropagation/SurveyPropagation.cpp b/maxflow/galois/apps/surveypropagation/SurveyPropagation.cpp deleted file mode 100644 index b272486..0000000 --- a/maxflow/galois/apps/surveypropagation/SurveyPropagation.cpp +++ /dev/null @@ -1,475 +0,0 @@ -/** Survey propagation -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Survey Propagation - * - * @author Andrew Lenharth - */ -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include "Galois/Galois.h" -#include "Galois/Accumulator.h" - -#include "llvm/Support/CommandLine.h" - -#include "Lonestar/BoilerPlate.h" - -#ifdef GALOIS_USE_EXP -#include "Galois/PriorityScheduling.h" -#endif - -#include -#include - -#include - -using namespace std; -using namespace Galois::WorkList; - -namespace cll = llvm::cl; - -static const char* name = "Survey Propagation"; -static const char* desc = "Solves SAT problems using survey propagation"; -static const char* url = "survey_propagation"; - -static cll::opt seed(cll::Positional, cll::desc(""), cll::Required); -static cll::opt M(cll::Positional, cll::desc(""), cll::Required); -static cll::opt N(cll::Positional, cll::desc(""), cll::Required); -static cll::opt K(cll::Positional, cll::desc(""), cll::Required); - -//#define WLWL LocalQueues, FIFO<> > -#define WLWL dChunkedFIFO<1024> - -//SAT problem: -//variables Xi E {0,1}, i E {1 .. N), M constraints -//constraints are or clauses of variables or negation of variables -//clause a has variables i1...iK, J^a_ir E {-+1} -//zi1 = J^a_ir * xir - -//Graph form: -//N variables each get a variable node (circles in paper) (SET X, i,j,k...) -//M clauses get a function node (square in paper) (set A, a,b,c...) -// edge between xi and a if xi appears in a, weighted by J^a_i -//V(i) function nodes a... to which variable node i is connected -//n_i = |V(i)| = degree of variable node -//V+(i) positive edges, V-(i) negative edges (per J^a_i) (V(i) = V+(i) + V-(i)) -//V(i)\b set V(i) without b -//given connected Fnode a and Vnode j, V^u_a(j) and V^s_a(j) are neighbors which cause j sat or unsat a: -// if (J^a_j = 1): V^u_a(j) = V+(j); V^s_a(j) = V-(j)\a -// if (J^a_j = -1): V^u_a(j) = V-(j); V^s_a(j) = V+(j)\a - -//Graph+data: -//survey n_a->i E [0,1] - - - -//implementation -//As a graph -//nodes have: -// a name -// a eta product -//edges have: -// a double survey -// a bool for sign (inversion of variable) -// a pi product -//Graph is undirected (for now) - -struct SPEdge { - double eta; - bool isNegative; - - SPEdge() {} - SPEdge(bool isNeg) :isNegative(isNeg) { - eta = (double)rand() / (double)RAND_MAX; - } -}; - -struct SPNode { - bool isClause; - int name; - bool solved; - bool value; - int t; - - double Bias; - - SPNode(int n, bool b) :isClause(b), name(n), solved(false), value(false), t(0) {} -}; - -typedef Galois::Graph::FirstGraph Graph; -typedef Galois::Graph::FirstGraph::GraphNode GNode; - -static Graph graph; - -static std::vector literals; -static std::vector > clauses; - -static Galois::GAccumulator nontrivial; - -static Galois::GReduceMax maxBias; -static Galois::GAccumulator numBias; -static Galois::GAccumulator sumBias; - -//interesting parameters: -static const double epsilon = 0.000001; -static const int tmax = 100; -//static int tlimit = 0; - -void initialize_random_formula(int M, int N, int K) { - //M clauses - //N variables - //K vars per clause - - //build up clauses and literals - clauses.resize(M); - literals.resize(N); - - for (int m = 0; m < M; ++m) { - GNode node = graph.createNode(SPNode(m, true)); - graph.addNode(node, Galois::MethodFlag::NONE); - clauses[m] = std::make_pair(node, 0); - } - for (int n = 0; n < N; ++n) { - GNode node = graph.createNode(SPNode(n, false)); - graph.addNode(node, Galois::MethodFlag::NONE); - literals[n] = node; - } - - for (int m = 0; m < M; ++m) { - //Generate K unique values - std::vector touse; - while (touse.size() != (unsigned)K) { - //extra complex to generate uniform rand value - int newK = (int)(((double)rand()/((double)RAND_MAX + 1)) * (double)(N)); - if (std::find(touse.begin(), touse.end(), newK) == touse.end()) { - touse.push_back(newK); - graph.getEdgeData(graph.addEdge(clauses[m].first, literals[newK], Galois::MethodFlag::NONE)) = SPEdge((bool)(rand() % 2)); - } - } - } - - //std::random_shuffle(literals.begin(), literals.end()); - //std::random_shuffle(clauses.begin(), clauses.end()); -} - -void print_formula() { - for (unsigned m = 0; m < clauses.size(); ++m) { - if (m != 0) - std::cout << " & "; - std::cout << "c" << m << "( "; - GNode N = clauses[m].first; - for (Graph::edge_iterator ii = graph.edge_begin(N, Galois::MethodFlag::NONE), ee = graph.edge_end( N, Galois::MethodFlag::NONE); ii != ee; ++ii) { - if (ii != graph.edge_begin(N, Galois::MethodFlag::NONE)) - std::cout << " | "; - SPEdge& E = graph.getEdgeData(ii, Galois::MethodFlag::NONE); - if (E.isNegative) - std::cout << "-"; - - SPNode& V = graph.getData(graph.getEdgeDst(ii), Galois::MethodFlag::NONE); - std::cout << "v" << V.name; - if (V.solved) - std::cout << "[" << (V.value ? 1 : 0) << "]"; - std::cout << "{" << E.eta << "," << V.Bias << "," << (V.value ? 1 : 0) << "}"; - std::cout << " "; - } - std::cout << " )"; - } - std::cout << "\n"; -} - -void print_fixed() { - for (unsigned n = 0; n < literals.size(); ++n) { - GNode N = literals[n]; - SPNode& V = graph.getData(N, Galois::MethodFlag::NONE); - if (V.solved) - std::cout << V.name << "[" << (V.value ? 1 : 0) << "] "; - } - std::cout << "\n"; -} - -int count_fixed() { - int retval = 0; - for (unsigned n = 0; n < literals.size(); ++n) { - GNode N = literals[n]; - SPNode& V = graph.getData(N, Galois::MethodFlag::NONE); - if (V.solved) - ++retval; - } - return retval; -} - -struct update_eta { - - double eta_for_a_i(GNode a, GNode i) { - double etaNew = 1.0; - //for each j - for (Graph::edge_iterator jii = graph.edge_begin(a, Galois::MethodFlag::NONE), - jee = graph.edge_end(a, Galois::MethodFlag::NONE); jii != jee; ++jii) { - GNode j = graph.getEdgeDst(jii); - if (j != i) { - bool ajNegative = graph.getEdgeData(jii, Galois::MethodFlag::NONE).isNegative; - double prodP = 1.0; - double prodN = 1.0; - double prod0 = 1.0; - //for each b - for (Graph::edge_iterator bii = graph.edge_begin(j, Galois::MethodFlag::NONE), - bee = graph.edge_end(j, Galois::MethodFlag::NONE); - bii != bee; ++bii) { - GNode b = graph.getEdgeDst(bii); - SPEdge Ebj = graph.getEdgeData(bii, Galois::MethodFlag::NONE); - if (b != a) - prod0 *= (1.0 - Ebj.eta); - if (Ebj.isNegative) - prodN *= (1.0 - Ebj.eta); - else - prodP *= (1.0 - Ebj.eta); - } - double PIu, PIs; - if (ajNegative) { - PIu = (1.0 - prodN) * prodP; - PIs = (1.0 - prodP) * prodN; - } else { - PIs = (1.0 - prodN) * prodP; - PIu = (1.0 - prodP) * prodN; - } - double PI0 = prod0; - etaNew *= (PIu / (PIu + PIs + PI0)); - } - } - return etaNew; - } - - template - void operator()(std::pair a, Context& ctx) { - this->operator()(a.first, ctx); - } - - template - void operator()(GNode a, Context& ctx) { - //std::cerr << graph.getData(a).t << " "; - // if (graph.getData(a, Galois::MethodFlag::NONE).t >= tlimit) - // return; - - // for (Graph::neighbor_iterator iii = graph.neighbor_begin(a), - // iee = graph.neighbor_end(a); iii != iee; ++iii) - // for (Graph::neighbor_iterator bii = graph.neighbor_begin(*iii), - // bee = graph.neighbor_end(*iii); bii != bee; ++bii) - // // for (Graph::neighbor_iterator jii = graph.neighbor_begin(*bii), - // // jee = graph.neighbor_end(*bii); - // // jii != jee; ++jii) - //{} - - ++graph.getData(a).t; - - //for each i - for (Graph::edge_iterator iii = graph.edge_begin(a, Galois::MethodFlag::NONE), - iee = graph.edge_end(a, Galois::MethodFlag::NONE); iii != iee; ++iii) { - GNode i = graph.getEdgeDst(iii); - double e = eta_for_a_i(a, i); - double olde = graph.getEdgeData(iii, Galois::MethodFlag::NONE).eta; - graph.getEdgeData(iii).eta = e; - //std::cout << olde << ',' << e << " "; - if (fabs(olde - e) > epsilon) { - for (Graph::edge_iterator bii = graph.edge_begin(i, Galois::MethodFlag::NONE), - bee = graph.edge_end(i, Galois::MethodFlag::NONE); bii != bee; ++bii) { - GNode b = graph.getEdgeDst(bii); - if (a != b) // && graph.getData(b, Galois::MethodFlag::NONE).t < tlimit) - ctx.push(std::make_pair(b,100-(int)(100.0*(olde - e)))); - } - } - } - } -}; - -//compute biases on each node -struct update_biases { - void operator()(GNode i) { - SPNode& idata = graph.getData(i, Galois::MethodFlag::NONE); - if (idata.solved) return; - - double pp1 = 1.0; - double pp2 = 1.0; - double pn1 = 1.0; - double pn2 = 1.0; - double p0 = 1.0; - - //for each function a - for (Graph::edge_iterator aii = graph.edge_begin(i, Galois::MethodFlag::NONE), aee = graph.edge_end(i, Galois::MethodFlag::NONE); aii != aee; ++aii) { - SPEdge& aie = graph.getEdgeData(aii, Galois::MethodFlag::NONE); - - double etaai = aie.eta; - if (etaai > epsilon) - nontrivial += 1; - if (aie.isNegative) { - pp2 *= (1.0 - etaai); - pn1 *= (1.0 - etaai); - } else { - pp1 *= (1.0 - etaai); - pn2 *= (1.0 - etaai); - } - p0 *= (1.0 - etaai); - } - double pp = (1.0 - pp1) * pp2; - double pn = (1.0 - pn1) * pn2; - - double BiasP = pp / (pp + pn + p0); - double BiasN = pn / (pp + pn + p0); - // double Bias0 = 1.0 - BiasP - BiasN; - - double d = BiasP - BiasN; - if (d < 0.0) - d = BiasN - BiasP; - idata.Bias = d; - idata.value = (BiasP > BiasN); - - assert(!std::isnan(d) && !std::isnan(-d)); - maxBias.update(d); - numBias += 1; - sumBias += d; - } -}; - -struct EIndexer: public std::unary_function,int> { - int operator()(const std::pair& v) { - return v.second; - } -}; - -struct ELess { - bool operator()(const std::pair& lhs, - const std::pair& rhs) { - if (lhs.second < rhs.second) return true; - if (lhs.second > rhs.second) return false; - return lhs < rhs; - } -}; -struct EGreater { - bool operator()(const std::pair& lhs, - const std::pair& rhs) { - if (lhs.second > rhs.second) return true; - if (lhs.second < rhs.second) return false; - return lhs > rhs; - } -}; - -//return true if converged -void SP_algorithm() { - //0) at t = 0, for every edge a->i, randomly initialize the message sigma a->i(t=0) in [0,1] - //1) for t = 1 to tmax: - //1.1) sweep the set of edges in a random order, and update sequentially the warnings on all the edges of the graph, generating the values sigma a->i (t) using SP_update - //1.2) if (|sigma a->i(t) - sigma a->i (t-1) < E on all the edges, the iteration has converged and generated sigma* a->i = sigma a->i(t), goto 2 - //2) if t = tmax return un-converged. if (t < tmax) then return the set of fixed point warnings sigma* a->i = sigma a->i (t) - - // tlimit += tmax; -#ifdef GALOIS_USE_EXP - Exp::PriAuto<64, EIndexer, WLWL, ELess, EGreater >::for_each(clauses.begin(), clauses.end(), update_eta(), "update_eta"); -#else - Galois::for_each(clauses.begin(), clauses.end(), update_eta(), Galois::loopname("update_eta"), Galois::wl()); -#endif - - maxBias.reset(); - numBias.reset(); - sumBias.reset(); - nontrivial.reset(); - Galois::do_all(literals.begin(), literals.end(), update_biases(), Galois::loopname("update_biases")); -} - -struct fix_variables { - double limit; - fix_variables(double d) :limit(d) {} - void operator()(GNode i) {//, const Context& ctx) { - SPNode& idata = graph.getData(i); - if (idata.solved) return; - if (idata.Bias > limit) { - idata.solved = true; - //TODO: simplify graph - //for each b - for (Graph::edge_iterator bii = graph.edge_begin(i), - bee = graph.edge_end(i); - bii != bee; ++bii) { - graph.getData(graph.getEdgeDst(bii)).solved = true; - graph.getData(graph.getEdgeDst(bii)).value = true; - } - graph.removeNode(i); - } - } -}; - -void decimate() { - double m = maxBias.reduce(); - double n = nontrivial.reduce(); - int num = numBias.reduce(); - double average = num > 0 ? sumBias.reduce() / num : 0.0; - std::cout << "NonTrivial " << n << " MaxBias " << m << " Average Bias " << average << "\n"; - double d = ((m - average) * 0.25) + average; - Galois::do_all(literals.begin(), literals.end(), fix_variables(d), Galois::loopname("fix_variables")); -} - -bool survey_inspired_decimation() { - //0) Randomize initial conditions for the surveys - //1) run SP - // if (SP does not converge, return SP UNCONVEREGED and stop - // if SP convereges, use fixed-point surveys n*a->i to - //2) decimate - //2.1) if non-trivial surveys (n != 0) are found, then: - // a) compute biases (W+,W-,W0) from PI+,PI-,PI0 - // b) fix largest |W+ - W-| to x = W+ > W- - // c) clean the graph - //2.2) if all surveys are trivial(n = 0), output simplified subformula - //4) if solved, output SAT, if no contradiction, continue at 1, if contridiction, stop - do { - SP_algorithm(); - if (nontrivial.reduce()) { - std::cout << "DECIMATED\n"; - decimate(); - } else { - std::cout << "SIMPLIFIED\n"; - return false; - } - } while (true); // while (true); - return true; -} - - -int main(int argc, char** argv) { - Galois::StatManager MM; - LonestarStart(argc, argv, name, desc, url); - srand(seed); - initialize_random_formula(M,N,K); - //print_formula(); - //build_graph(); - //print_graph(); - - std::cout << "Starting...\n"; - - Galois::StatTimer T; - T.start(); - survey_inspired_decimation(); - T.stop(); - - //print_formula(); - //print_fixed(); - - std::cout << "Fixed " << count_fixed() << " variables\n"; - - return 0; -} diff --git a/maxflow/galois/apps/tutorial/CMakeLists.txt b/maxflow/galois/apps/tutorial/CMakeLists.txt deleted file mode 100644 index facf64d..0000000 --- a/maxflow/galois/apps/tutorial/CMakeLists.txt +++ /dev/null @@ -1,4 +0,0 @@ -app(hello-world HelloWorld.cpp) -app(torus Torus.cpp) -app(torus-improved TorusImproved.cpp) -app(sssp-simple SSSPsimple.cpp) diff --git a/maxflow/galois/apps/tutorial/HelloWorld.cpp b/maxflow/galois/apps/tutorial/HelloWorld.cpp deleted file mode 100644 index 9d595ed..0000000 --- a/maxflow/galois/apps/tutorial/HelloWorld.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/** My first Galois program -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * My first Galois program. Prints "Hello World" in parallel. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include -#include - -struct HelloWorld { - void operator()(int i) { - std::cout << "Hello " << i << "\n"; - } -}; - -void helloWorld(int i) { - std::cout << "Hello " << i << "\n"; -} - -int main(int argc, char** argv) { - if (argc < 3) { - std::cerr << " \n"; - return 1; - } - unsigned int numThreads = atoi(argv[1]); - int n = atoi(argv[2]); - - numThreads = Galois::setActiveThreads(numThreads); - std::cout << "Using " << numThreads << " threads and " << n << " iterations\n"; - - std::cout << "Using a function object\n"; - Galois::do_all(boost::make_counting_iterator(0), boost::make_counting_iterator(n), HelloWorld()); - - std::cout << "Using a function pointer\n"; - Galois::do_all(boost::make_counting_iterator(0), boost::make_counting_iterator(n), helloWorld); - - return 0; -} diff --git a/maxflow/galois/apps/tutorial/SSSPsimple.cpp b/maxflow/galois/apps/tutorial/SSSPsimple.cpp deleted file mode 100644 index 22ad5cf..0000000 --- a/maxflow/galois/apps/tutorial/SSSPsimple.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/** Single source shortest paths -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Single source shortest paths. - * - * @author Andrew Lenharth - */ - -#include "Galois/Statistic.h" -#include "Galois/Galois.h" -#include "Galois/Graph/LCGraph.h" -#include "llvm/Support/CommandLine.h" -#include "Lonestar/BoilerPlate.h" - -typedef Galois::Graph::LC_Linear_Graph Graph; -typedef Graph::GraphNode GNode; -typedef std::pair UpdateRequest; - -static const unsigned int DIST_INFINITY = - std::numeric_limits::max(); - -unsigned stepShift = 11; -Graph graph; - -namespace cll = llvm::cl; -static cll::opt filename(cll::Positional, cll::desc(""), cll::Required); - -void relax_edge(unsigned src_data, Graph::edge_iterator ii, - Galois::UserContext& ctx) { - GNode dst = graph.getEdgeDst(ii); - unsigned int edge_data = graph.getEdgeData(ii); - unsigned& dst_data = graph.getData(dst); - unsigned int newDist = dst_data + edge_data; - if (newDist < dst_data) { - dst_data = newDist; - ctx.push(std::make_pair(newDist, dst)); - } -} - -struct SSSP { - void operator()(UpdateRequest& req, Galois::UserContext& ctx) const { - unsigned& data = graph.getData(req.second); - if (req.first >= data) return; - - for (Graph::edge_iterator ii = graph.edge_begin(req.second), - ee = graph.edge_end(req.second); ii != ee; ++ii) - relax_edge(data, ii, ctx); - } -}; - -struct Init { - void operator()(GNode& n, Galois::UserContext& ctx) const { - graph.getData(n) = DIST_INFINITY; - } -}; - -struct UpdateRequestIndexer: public std::unary_function { - unsigned int operator() (const UpdateRequest& val) const { - return val.first >> stepShift; - } -}; - -int main(int argc, char **argv) { - Galois::StatManager statManager; - LonestarStart(argc, argv, 0,0,0); - - Galois::Graph::readGraph(graph, filename); - Galois::for_each(graph.begin(), graph.end(), Init()); - - using namespace Galois::WorkList; - typedef dChunkedLIFO<16> dChunk; - typedef OrderedByIntegerMetric OBIM; - - Galois::StatTimer T; - T.start(); - graph.getData(*graph.begin()) = 0; - Galois::for_each(std::make_pair(0U, *graph.begin()), SSSP(), Galois::wl()); - T.stop(); - return 0; -} diff --git a/maxflow/galois/apps/tutorial/Torus.cpp b/maxflow/galois/apps/tutorial/Torus.cpp deleted file mode 100644 index a8d15be..0000000 --- a/maxflow/galois/apps/tutorial/Torus.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/** Tutorial torus application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Simple tutorial application. Creates a torus graph and each node increments - * its neighbors data by one. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Statistic.h" -#include "Galois/Graph/Graph.h" -#include - -//! Graph has int node data, void edge data and is directed -typedef Galois::Graph::FirstGraph Graph; -//! Opaque pointer to graph node -typedef Graph::GraphNode GNode; - -//! Increments node value of each neighbor by 1 -struct IncrementNeighbors { - Graph& g; - IncrementNeighbors(Graph& g): g(g) { } - - //! Operator. Context parameter is unused in this example. - void operator()(GNode n, Galois::UserContext& ctx) { - // For each outgoing edge (n, dst) - for (Graph::edge_iterator ii = g.edge_begin(n), ei = g.edge_end(n); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - int& data = g.getData(dst); - // Increment node data by 1 - data += 1; - } - } -}; - -//! Returns true if node value equals v -struct ValueEqual { - Graph& g; - int v; - ValueEqual(Graph& g, int v): g(g), v(v) { } - bool operator()(GNode n) { - return g.getData(n) == v; - } -}; - -//! Construct a simple torus graph -void constructTorus(Graph& g, int height, int width) { - // Construct set of nodes - int numNodes = height * width; - std::vector nodes(numNodes); - for (int i = 0; i < numNodes; ++i) { - GNode n = g.createNode(0); - g.addNode(n); - nodes[i] = n; - } - - // Add edges - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - GNode c = nodes[x*height + y]; - GNode n = nodes[x*height + ((y+1) % height)]; - GNode s = nodes[x*height + ((y-1+height) % height)]; - GNode e = nodes[((x+1) % width)*height + y]; - GNode w = nodes[((x-1+width) % width)*height + y]; - g.addEdge(c, n); - g.addEdge(c, s); - g.addEdge(c, e); - g.addEdge(c, w); - } - } -} - -int main(int argc, char** argv) { - if (argc < 3) { - std::cerr << " \n"; - return 1; - } - unsigned int numThreads = atoi(argv[1]); - int n = atoi(argv[2]); - - numThreads = Galois::setActiveThreads(numThreads); - std::cout << "Using " << numThreads << " thread(s) and " << n << " x " << n << " torus\n"; - - Graph graph; - constructTorus(graph, n, n); - - Galois::StatTimer T; - T.start(); - Galois::for_each(graph.begin(), graph.end(), IncrementNeighbors(graph)); - T.stop(); - - std::cout << "Elapsed time: " << T.get() << " milliseconds\n"; - - // Verify - int count = std::count_if(graph.begin(), graph.end(), ValueEqual(graph, 4)); - if (count != n * n) { - std::cerr << "Expected " << n * n << " nodes with value = 4 but found " << count << " instead.\n"; - return 1; - } else { - std::cout << "Correct!\n"; - } - - return 0; -} diff --git a/maxflow/galois/apps/tutorial/TorusImproved.cpp b/maxflow/galois/apps/tutorial/TorusImproved.cpp deleted file mode 100644 index 85fa93f..0000000 --- a/maxflow/galois/apps/tutorial/TorusImproved.cpp +++ /dev/null @@ -1,184 +0,0 @@ -/** Tutorial torus application -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2013, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @section Description - * - * Simple tutorial application. Creates a torus graph and each node increments - * its neighbors data by one. This version improves {@link Torus.cpp} by - * allocating the graph in parallel, which distributes the graph, and by - * more carefully assigning work to threads. - * - * @author Donald Nguyen - */ -#include "Galois/Galois.h" -#include "Galois/Timer.h" -#include "Galois/Graph/Graph.h" -#include - -//! Graph has int node data, void edge data and is directed -typedef Galois::Graph::FirstGraph Graph; -//! Opaque pointer to graph node -typedef Graph::GraphNode GNode; - -//! Increments node value of each neighbor by 1 -struct IncrementNeighbors { - Graph& g; - IncrementNeighbors(Graph& g): g(g) { } - - //! Operator. Context parameter is unused in this example. - void operator()(GNode n, Galois::UserContext& ctx) { - // For each outgoing edge (n, dst) - for (Graph::edge_iterator ii = g.edge_begin(n), ei = g.edge_end(n); ii != ei; ++ii) { - GNode dst = g.getEdgeDst(ii); - int& data = g.getData(dst); - // Increment node data by 1 - data += 1; - } - } -}; - -//! Returns true if node value equals v -struct ValueEqual { - Graph& g; - int v; - ValueEqual(Graph& g, int v): g(g), v(v) { } - bool operator()(GNode n) { - return g.getData(n) == v; - } -}; - -class Point2D { - int v[2]; -public: - Point2D() { } - Point2D(int x, int y) { v[0] = x; v[1] = y; } - - const int& at(int i) const { return v[i]; } - const int& x() const { return v[0]; } - const int& y() const { return v[1]; } - int dim() const { return 2; } -}; - -/** - * Sort pairs according to Morton Z-Order. - * - * From http://en.wikipedia.org/wiki/Z-order_%28curve%29 - */ -struct ZOrderCompare { - bool operator()(const Point2D& p1, const Point2D& p2) const { - int index = 0; - int x = 0; - for (int k = 0; k < p1.dim(); ++k) { - int y = p1.at(k) ^ p2.at(k); - if (lessMsb(x, y)) { - index = k; - x = y; - } - } - return p1.at(index) - p2.at(index) <= 0; - } - - bool lessMsb(int a, int b) const { - return a < b && a < (a ^ b); - } -}; - -struct CreateNodes { - Graph& g; - std::vector& nodes; - int height; - CreateNodes(Graph& g, std::vector& n, int h): g(g), nodes(n), height(h) { } - - void operator()(const Point2D& p) { - GNode n = g.createNode(0); - g.addNode(n); - nodes[p.x() * height + p.y()] = n; - } -}; - -//! Construct a simple torus graph -void constructTorus(Graph& g, int height, int width) { - // Construct set of nodes - int numNodes = height * width; - std::vector points(numNodes); - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - points[x*height + y] = Point2D(x, y); - } - } - // Sort in a space-filling way - std::sort(points.begin(), points.end(), ZOrderCompare()); - - // Using space-filling order, assign nodes and create (and allocate) them in parallel - std::vector nodes(numNodes); - Galois::do_all(points.begin(), points.end(), CreateNodes(g, nodes, height)); - - // Add edges - for (int x = 0; x < width; ++x) { - for (int y = 0; y < height; ++y) { - GNode c = nodes[x*height + y]; - GNode n = nodes[x*height + ((y+1) % height)]; - GNode s = nodes[x*height + ((y-1+height) % height)]; - GNode e = nodes[((x+1) % width)*height + y]; - GNode w = nodes[((x-1+width) % width)*height + y]; - g.addEdge(c, n); - g.addEdge(c, s); - g.addEdge(c, e); - g.addEdge(c, w); - } - } -} - -int main(int argc, char** argv) { - if (argc < 3) { - std::cerr << " \n"; - return 1; - } - unsigned int numThreads = atoi(argv[1]); - int n = atoi(argv[2]); - - numThreads = Galois::setActiveThreads(numThreads); - std::cout << "Using " << numThreads << " threads and " << n << " x " << n << " torus\n"; - - Graph graph; - constructTorus(graph, n, n); - - Galois::Timer T; - T.start(); - // Unlike Galois::for_each, Galois::for_each_local initially assigns work - // based on which thread created each node (Galois::for_each uses a simple - // blocking of the iterator range to initialize work, but the iterator order - // of a Graph is implementation-defined). - Galois::for_each_local(graph, IncrementNeighbors(graph)); - T.stop(); - - std::cout << "Elapsed time: " << T.get() << " milliseconds\n"; - - // Verify - int count = std::count_if(graph.begin(), graph.end(), ValueEqual(graph, 4)); - if (count != n * n) { - std::cerr << "Expected " << n * n << " nodes with value = 4 but found " << count << " instead.\n"; - return 1; - } else { - std::cout << "Correct!\n"; - } - - return 0; -} diff --git a/maxflow/galois/cmake/Modules/AddLLVMDefinitions.cmake b/maxflow/galois/cmake/Modules/AddLLVMDefinitions.cmake deleted file mode 100644 index 33ac973..0000000 --- a/maxflow/galois/cmake/Modules/AddLLVMDefinitions.cmake +++ /dev/null @@ -1,13 +0,0 @@ -# There is no clear way of keeping track of compiler command-line -# options chosen via `add_definitions', so we need our own method for -# using it on tools/llvm-config/CMakeLists.txt. - -# Beware that there is no implementation of remove_llvm_definitions. - -macro(add_llvm_definitions) - # We don't want no semicolons on LLVM_DEFINITIONS: - foreach(arg ${ARGN}) - set(LLVM_DEFINITIONS "${LLVM_DEFINITIONS} ${arg}") - endforeach(arg) - add_definitions( ${ARGN} ) -endmacro(add_llvm_definitions) diff --git a/maxflow/galois/cmake/Modules/CheckCXX11Features.cmake b/maxflow/galois/cmake/Modules/CheckCXX11Features.cmake deleted file mode 100644 index 71cf559..0000000 --- a/maxflow/galois/cmake/Modules/CheckCXX11Features.cmake +++ /dev/null @@ -1,57 +0,0 @@ -include(CheckCXXSourceCompiles) -include(CMakePushCheckState) - -set(CheckUniformIntDistribution -" -#include -int main(){ - std::mt19937 gen; - std::uniform_int_distribution r(0, 6); - return r(gen); -} -") - -set(CheckUniformRealDistribution -" -#include -int main(){ - std::mt19937 gen; - std::uniform_real_distribution r(0, 1); - return r(gen) < 0.5 ? 0 : 1; -} -") - -set(CheckChrono -" -#include -int main(){ - typedef std::chrono::steady_clock Clock; - std::chrono::time_point start, stop; - start = Clock::now(); - stop = Clock::now(); - unsigned long res = - std::chrono::duration_cast(stop-start).count(); - return res < 1000 ? 0 : 1; -} -") - -set(CheckAlignof -" -int main(){ - return alignof(int) != 0; -} -") - -cmake_push_check_state() - -set(CMAKE_REQUIRED_FLAGS ${CXX11_FLAGS}) -CHECK_CXX_SOURCE_COMPILES("${CheckUniformIntDistribution}" - HAVE_CXX11_UNIFORM_INT_DISTRIBUTION) -CHECK_CXX_SOURCE_COMPILES("${CheckUniformRealDistribution}" - HAVE_CXX11_UNIFORM_REAL_DISTRIBUTION) -CHECK_CXX_SOURCE_COMPILES("${CheckChrono}" - HAVE_CXX11_CHRONO) -CHECK_CXX_SOURCE_COMPILES("${CheckAlignof}" - HAVE_CXX11_ALIGNOF) - -cmake_pop_check_state() diff --git a/maxflow/galois/cmake/Modules/CheckCilk.cmake b/maxflow/galois/cmake/Modules/CheckCilk.cmake deleted file mode 100644 index 03be90c..0000000 --- a/maxflow/galois/cmake/Modules/CheckCilk.cmake +++ /dev/null @@ -1,10 +0,0 @@ -include(CheckCXXSourceCompiles) -set(Cilk_CXX_TEST_SOURCE -" -#include -int main(){ cilk_for(int i=0;i<1; ++i); } -") -CHECK_CXX_SOURCE_COMPILES("${Cilk_CXX_TEST_SOURCE}" HAVE_CILK) -if(HAVE_CILK) - message(STATUS "A compiler with CILK support found") -endif() diff --git a/maxflow/galois/cmake/Modules/CheckEndian.cmake b/maxflow/galois/cmake/Modules/CheckEndian.cmake deleted file mode 100644 index df14b6d..0000000 --- a/maxflow/galois/cmake/Modules/CheckEndian.cmake +++ /dev/null @@ -1,7 +0,0 @@ -include(TestBigEndian) -TEST_BIG_ENDIAN(HAVE_BIG_ENDIAN) -include(CheckIncludeFiles) -CHECK_INCLUDE_FILES(endian.h HAVE_ENDIAN_H) -include(CheckSymbolExists) -CHECK_SYMBOL_EXISTS(le64toh "endian.h" HAVE_LE64TOH) -CHECK_SYMBOL_EXISTS(le32toh "endian.h" HAVE_LE32TOH) diff --git a/maxflow/galois/cmake/Modules/CheckHugePages.cmake b/maxflow/galois/cmake/Modules/CheckHugePages.cmake deleted file mode 100644 index 91b7e7b..0000000 --- a/maxflow/galois/cmake/Modules/CheckHugePages.cmake +++ /dev/null @@ -1,18 +0,0 @@ -include(CheckCSourceRuns) -set(HugePages_C_TEST_SOURCE -" -#ifdef __linux__ -#include -#endif -#include - -int main(int c, char** argv) { - void *ptr = mmap(0, 2*1024*1024, PROT_READ|PROT_WRITE, MAP_HUGETLB, -1, 0); - - return ptr != MAP_FAILED; -} -") -CHECK_C_SOURCE_RUNS("${HugePages_C_TEST_SOURCE}" HAVE_HUGEPAGES) -if(HAVE_HUGEPAGES) - message(STATUS "Huge pages found") -endif() diff --git a/maxflow/galois/cmake/Modules/FindCXX11.cmake b/maxflow/galois/cmake/Modules/FindCXX11.cmake deleted file mode 100644 index 73f124d..0000000 --- a/maxflow/galois/cmake/Modules/FindCXX11.cmake +++ /dev/null @@ -1,38 +0,0 @@ -# Find C++11 flags -# Once done this will define -# CXX11_FLAGS - Compiler flags to enable C++11 -include(CheckCXXCompilerFlag) - -# This covers gcc, icc, clang, xlc - -# Place xlc (-qlanglvl=extended0x) first because xlc parses -std but does not -# halt even with -qhalt=i -set(CXX11_FLAG_CANDIDATES -qlanglvl=extended0x -std=c++11 -std=c++0x) - -# some versions of cmake don't recognize clang's rejection of unknown flags -if(CMAKE_CXX_COMPILER_ID MATCHES "Clang") - set(CXX11_FLAG_CANDIDATES -std=c++11 -std=c++0x) -endif() - -# Don't do anything when already set -if(CXX11_FLAGS) - set(CXX11_FLAG_CANDIDATES) - set(CXX11_FOUND_INTERNAL "YES") -endif() - -foreach(FLAG ${CXX11_FLAG_CANDIDATES}) - unset(CXX11_FLAG_DETECTED CACHE) - message(STATUS "Try C++11 flag = [${FLAG}]") - check_cxx_compiler_flag("${FLAG}" CXX11_FLAG_DETECTED) - if(CXX11_FLAG_DETECTED) - set(CXX11_FOUND_INTERNAL "YES") - set(CXX11_FLAGS "${FLAG}" CACHE STRING "C++ compiler flags for C++11 features") - break() - endif() -endforeach() - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(CXX11 DEFAULT_MSG CXX11_FOUND_INTERNAL CXX11_FLAGS) -mark_as_advanced(CXX11_FLAGS) - -include(CheckCXX11Features) diff --git a/maxflow/galois/cmake/Modules/FindEigen.cmake b/maxflow/galois/cmake/Modules/FindEigen.cmake deleted file mode 100644 index 776d9e0..0000000 --- a/maxflow/galois/cmake/Modules/FindEigen.cmake +++ /dev/null @@ -1,23 +0,0 @@ -# Find Eigen library -# Once done this will define -# Eigen_FOUND - System has Eigen -# Eigen_INCLUDE_DIRS - The Eigen include directories -# Eigen_LIBRARIES - The libraries needed to use Eigen - -set(Eigen_LIBRARIES) # Include-only library - -if(Eigen_INCLUDE_DIR) - set(Eigen_FIND_QUIETLY TRUE) -endif() - -find_path(Eigen_INCLUDE_DIRS NAMES Eigen/Eigen PATHS ENV EIGEN_HOME) - -include(FindPackageHandleStandardArgs) -# handle the QUIETLY and REQUIRED arguments and set Eigen_FOUND to TRUE -# if all listed variables are TRUE -find_package_handle_standard_args(Eigen DEFAULT_MSG Eigen_INCLUDE_DIRS) -if(EIGEN_FOUND) - set(Eigen_FOUND TRUE) -endif() - -mark_as_advanced(Eigen_INCLUDE_DIRS) diff --git a/maxflow/galois/cmake/Modules/FindFortran.cmake b/maxflow/galois/cmake/Modules/FindFortran.cmake deleted file mode 100644 index e30d391..0000000 --- a/maxflow/galois/cmake/Modules/FindFortran.cmake +++ /dev/null @@ -1,14 +0,0 @@ -# Check if Fortran is possibly around before using enable_lanauge because -# enable_language(... OPTIONAL) does not fail gracefully if language is not -# found: -# http://public.kitware.com/Bug/view.php?id=9220 -set(Fortran_EXECUTABLE) -if(Fortran_EXECUTABLE) - set(Fortran_FIND_QUIETLY TRUE) -endif() -find_program(Fortran_EXECUTABLE NAMES gfortran ifort g77 f77 g90 f90) -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(Fortran DEFAULT_MSG Fortran_EXECUTABLE) -if(FORTRAN_FOUND) - set(Fortran_FOUND TRUE) -endif() diff --git a/maxflow/galois/cmake/Modules/FindGMP.cmake b/maxflow/galois/cmake/Modules/FindGMP.cmake deleted file mode 100644 index b145141..0000000 --- a/maxflow/galois/cmake/Modules/FindGMP.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Find the GMP librairies -# GMP_FOUND - system has GMP lib -# GMP_INCLUDE_DIR - the GMP include directory -# GMP_LIBRARIES - Libraries needed to use GMP - -# Copyright (c) 2006, Laurent Montel, -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -if(GMP_INCLUDE_DIRS AND GMP_LIBRARIES AND GMPXX_LIBRARIES) - set(GMP_FIND_QUIETLY TRUE) -endif() - -find_path(GMP_INCLUDE_DIRS NAMES gmp.h) -find_library(GMP_LIBRARIES NAMES gmp libgmp) -find_library(GMPXX_LIBRARIES NAMES gmpxx libgmpxx) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(GMP DEFAULT_MSG GMP_INCLUDE_DIRS GMP_LIBRARIES) - -mark_as_advanced(GMP_INCLUDE_DIRS GMP_LIBRARIES GMPXX_LIBRARIES) diff --git a/maxflow/galois/cmake/Modules/FindHPCToolKit.cmake b/maxflow/galois/cmake/Modules/FindHPCToolKit.cmake deleted file mode 100644 index f5e5197..0000000 --- a/maxflow/galois/cmake/Modules/FindHPCToolKit.cmake +++ /dev/null @@ -1,20 +0,0 @@ -# Find HPCToolKit libraries -# Once done this will define -# HPCToolKit_FOUND - System has lib -# HPCToolKit_INCLUDE_DIRS - The include directories -# HPCToolKit_LIBRARIES - The libraries needed to use - -if(HPCToolKit_INCLUDE_DIRS AND HPCToolKit_LIBRARIES) - set(HPCToolKit_FIND_QUIETLY TRUE) -endif() - -find_path(HPCToolKit_INCLUDE_DIRS hpctoolkit.h PATHS ${HPCToolKit_ROOT} PATH_SUFFIXES include) -find_library(HPCToolKit_LIBRARY NAMES libhpctoolkit.a hpctoolkit PATHS ${HPCToolKit_ROOT} PATH_SUFFIXES lib lib64 lib32 lib/hpctoolkit) -set(HPCToolKit_LIBRARIES ${HPCToolKit_LIBRARY}) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(HPCToolKit DEFAULT_MSG HPCToolKit_LIBRARIES HPCToolKit_INCLUDE_DIRS) -if(HPCTOOLKIT_FOUND) - set(HPCToolKit_FOUND on) -endif() -mark_as_advanced(HPCToolKit_INCLUDE_DIRS HPCToolKit_LIBRARIES) diff --git a/maxflow/galois/cmake/Modules/FindNUMA.cmake b/maxflow/galois/cmake/Modules/FindNUMA.cmake deleted file mode 100644 index 1359bce..0000000 --- a/maxflow/galois/cmake/Modules/FindNUMA.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# Find numa library -# Once done this will define -# NUMA_FOUND - libnuma found -# NUMA_OLD - old libnuma API -if(NOT NUMA_FOUND) - find_library(NUMA_LIBRARIES NAMES numa PATH_SUFFIXES lib lib64) - if(NUMA_LIBRARIES) - include(CheckLibraryExists) - check_library_exists(${NUMA_LIBRARIES} numa_available "" NUMA_FOUND_INTERNAL) - if(NUMA_FOUND_INTERNAL) - check_library_exists(${NUMA_LIBRARIES} numa_allocate_nodemask "" NUMA_NEW_INTERNAL) - if(NOT NUMA_NEW_INTERNAL) - set(NUMA_OLD "yes" CACHE) - endif() - endif() - - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(NUMA DEFAULT_MSG NUMA_LIBRARIES) - mark_as_advanced(NUMA_FOUND) - endif() -endif() - diff --git a/maxflow/galois/cmake/Modules/FindPAPI.cmake b/maxflow/galois/cmake/Modules/FindPAPI.cmake deleted file mode 100644 index f88a250..0000000 --- a/maxflow/galois/cmake/Modules/FindPAPI.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# Find PAPI libraries -# Once done this will define -# PAPI_FOUND - System has PAPI -# PAPI_INCLUDE_DIRS - The PAPI include directories -# PAPI_LIBRARIES - The libraries needed to use PAPI - -if(PAPI_INCLUDE_DIRS AND PAPI_LIBRARIES) - set(PAPI_FIND_QUIETLY TRUE) -endif() - -find_path(PAPI_INCLUDE_DIRS papi.h PATHS ${PAPI_ROOT} PATH_SUFFIXES include) -find_library(PAPI_LIBRARY NAMES papi PATHS ${PAPI_ROOT} PATH_SUFFIXES lib lib64) -find_library(PAPI_LIBRARIES NAMES rt PATH_SUFFIXES lib lib64) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(PAPI DEFAULT_MSG PAPI_LIBRARY PAPI_LIBRARIES PAPI_INCLUDE_DIRS) -if(PAPI_FOUND) - set(PAPI_LIBRARIES ${PAPI_LIBRARY} ${PAPI_LIBRARIES}) -endif() - -mark_as_advanced(PAPI_INCLUDE_DIRS PAPI_LIBRARIES) diff --git a/maxflow/galois/cmake/Modules/FindQGLViewer.cmake b/maxflow/galois/cmake/Modules/FindQGLViewer.cmake deleted file mode 100644 index 0d741b4..0000000 --- a/maxflow/galois/cmake/Modules/FindQGLViewer.cmake +++ /dev/null @@ -1,20 +0,0 @@ -# Find QGLViewer libraries -# Once done this will define -# QGLViewer_FOUND - System has QGLViewer -# QGLViewer_INCLUDE_DIRS - The QGLViewer include directories -# QGLViewer_LIBRARIES - The libraries needed to use QGLViewer - -if(QGLViewer_INCLUDE_DIRS AND QGLVIEWER_LIBRARIES) - set(QGLViewer_FIND_QUIETLY TRUE) -endif() - -find_path(QGLViewer_INCLUDE_DIRS NAMES QGLViewer/qglviewer.h) -find_library(QGLViewer_LIBRARIES NAMES QGLViewer PATH_SUFFIXES lib lib64) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(QGLViewer DEFAULT_MSG QGLViewer_INCLUDE_DIRS QGLViewer_LIBRARIES) -if(QGLVIEWER_FOUND) - set(QGLViewer_FOUND TRUE) -endif() - -mark_as_advanced(QGLViewer_INCLUDE_DIRS QGLViewer_LIBRARIES) diff --git a/maxflow/galois/cmake/Modules/FindSubversion.cmake b/maxflow/galois/cmake/Modules/FindSubversion.cmake deleted file mode 100644 index a6b0bb1..0000000 --- a/maxflow/galois/cmake/Modules/FindSubversion.cmake +++ /dev/null @@ -1,137 +0,0 @@ -# - Extract information from a subversion working copy -# The module defines the following variables: -# Subversion_SVN_EXECUTABLE - path to svn command line client -# Subversion_VERSION_SVN - version of svn command line client -# Subversion_FOUND - true if the command line client was found -# If the command line client executable is found the macro -# Subversion_WC_INFO( ) -# is defined to extract information of a subversion working copy at -# a given location. The macro defines the following variables: -# _WC_URL - url of the repository (at ) -# _WC_ROOT - root url of the repository -# _WC_REVISION - current revision -# _WC_LAST_CHANGED_AUTHOR - author of last commit -# _WC_LAST_CHANGED_DATE - date of last commit -# _WC_LAST_CHANGED_REV - revision of last commit -# _WC_LAST_CHANGED_LOG - last log of base revision -# _WC_INFO - output of command `svn info ' -# Example usage: -# FIND_PACKAGE(Subversion) -# IF(Subversion_FOUND) -# Subversion_WC_INFO(${PROJECT_SOURCE_DIR} Project) -# MESSAGE("Current revision is ${Project_WC_REVISION}") -# Subversion_WC_LOG(${PROJECT_SOURCE_DIR} Project) -# MESSAGE("Last changed log is ${Project_LAST_CHANGED_LOG}") -# ENDIF(Subversion_FOUND) - -# Copyright (c) 2006, Tristan Carel -# All rights reserved. -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of the University of California, Berkeley nor the -# names of its contributors may be used to endorse or promote products -# derived from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY -# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -# $Id: FindSubversion.cmake,v 1.2.2.3 2008-05-23 20:09:34 hoffman Exp $ - -SET(Subversion_FOUND FALSE) -SET(Subversion_SVN_FOUND FALSE) - -FIND_PROGRAM(Subversion_SVN_EXECUTABLE svn - DOC "subversion command line client") -MARK_AS_ADVANCED(Subversion_SVN_EXECUTABLE) - -IF(Subversion_SVN_EXECUTABLE) - SET(Subversion_SVN_FOUND TRUE) - SET(Subversion_FOUND TRUE) - - MACRO(Subversion_WC_INFO dir prefix) - # the subversion commands should be executed with the C locale, otherwise - # the message (which are parsed) may be translated, Alex - SET(_Subversion_SAVED_LC_ALL "$ENV{LC_ALL}") - SET(ENV{LC_ALL} C) - - EXECUTE_PROCESS(COMMAND ${Subversion_SVN_EXECUTABLE} --version - WORKING_DIRECTORY ${dir} - OUTPUT_VARIABLE Subversion_VERSION_SVN - OUTPUT_STRIP_TRAILING_WHITESPACE) - - EXECUTE_PROCESS(COMMAND ${Subversion_SVN_EXECUTABLE} info ${dir} - OUTPUT_VARIABLE ${prefix}_WC_INFO - ERROR_VARIABLE Subversion_svn_info_error - RESULT_VARIABLE Subversion_svn_info_result - OUTPUT_STRIP_TRAILING_WHITESPACE) - - IF(NOT ${Subversion_svn_info_result} EQUAL 0) - MESSAGE(STATUS "Command \"${Subversion_SVN_EXECUTABLE} info ${dir}\" failed with output:\n${Subversion_svn_info_error}") - set(Subversion_SVN_FOUND FALSE) - set(Subversion_FOUND FALSE) - ELSE(NOT ${Subversion_svn_info_result} EQUAL 0) - - STRING(REGEX REPLACE "^(.*\n)?svn, version ([.0-9]+).*" - "\\2" Subversion_VERSION_SVN "${Subversion_VERSION_SVN}") - STRING(REGEX REPLACE "^(.*\n)?URL: ([^\n]+).*" - "\\2" ${prefix}_WC_URL "${${prefix}_WC_INFO}") - STRING(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*" - "\\2" ${prefix}_WC_REVISION "${${prefix}_WC_INFO}") - STRING(REGEX REPLACE "^(.*\n)?Last Changed Author: ([^\n]+).*" - "\\2" ${prefix}_WC_LAST_CHANGED_AUTHOR "${${prefix}_WC_INFO}") - STRING(REGEX REPLACE "^(.*\n)?Last Changed Rev: ([^\n]+).*" - "\\2" ${prefix}_WC_LAST_CHANGED_REV "${${prefix}_WC_INFO}") - STRING(REGEX REPLACE "^(.*\n)?Last Changed Date: ([^\n]+).*" - "\\2" ${prefix}_WC_LAST_CHANGED_DATE "${${prefix}_WC_INFO}") - - ENDIF(NOT ${Subversion_svn_info_result} EQUAL 0) - - # restore the previous LC_ALL - SET(ENV{LC_ALL} ${_Subversion_SAVED_LC_ALL}) - - ENDMACRO(Subversion_WC_INFO) - - MACRO(Subversion_WC_LOG dir prefix) - # This macro can block if the certificate is not signed: - # svn ask you to accept the certificate and wait for your answer - # This macro requires a svn server network access (Internet most of the time) - # and can also be slow since it access the svn server - EXECUTE_PROCESS(COMMAND - ${Subversion_SVN_EXECUTABLE} log -r BASE ${dir} - OUTPUT_VARIABLE ${prefix}_LAST_CHANGED_LOG - ERROR_VARIABLE Subversion_svn_log_error - RESULT_VARIABLE Subversion_svn_log_result - OUTPUT_STRIP_TRAILING_WHITESPACE) - - IF(NOT ${Subversion_svn_log_result} EQUAL 0) - MESSAGE(SEND_ERROR "Command \"${Subversion_SVN_EXECUTABLE} log -r BASE ${dir}\" failed with output:\n${Subversion_svn_log_error}") - ENDIF(NOT ${Subversion_svn_log_result} EQUAL 0) - ENDMACRO(Subversion_WC_LOG) - -ENDIF(Subversion_SVN_EXECUTABLE) - -IF(NOT Subversion_FOUND) - IF(NOT Subversion_FIND_QUIETLY) - MESSAGE(STATUS "Subversion was not found.") - ELSE(NOT Subversion_FIND_QUIETLY) - IF(Subversion_FIND_REQUIRED) - MESSAGE(FATAL_ERROR "Subversion was not found.") - ENDIF(Subversion_FIND_REQUIRED) - ENDIF(NOT Subversion_FIND_QUIETLY) -ENDIF(NOT Subversion_FOUND) - -# FindSubversion.cmake ends here. diff --git a/maxflow/galois/cmake/Modules/FindTBB.cmake b/maxflow/galois/cmake/Modules/FindTBB.cmake deleted file mode 100644 index d3f94d0..0000000 --- a/maxflow/galois/cmake/Modules/FindTBB.cmake +++ /dev/null @@ -1,286 +0,0 @@ -# Locate Intel Threading Building Blocks include paths and libraries -# FindTBB.cmake can be found at https://code.google.com/p/findtbb/ -# Written by Hannes Hofmann -# Improvements by Gino van den Bergen , -# Florian Uhlig , -# Jiri Marsik - -# The MIT License -# -# Copyright (c) 2011 Hannes Hofmann -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -# GvdB: This module uses the environment variable TBB_ARCH_PLATFORM which defines architecture and compiler. -# e.g. "ia32/vc8" or "em64t/cc4.1.0_libc2.4_kernel2.6.16.21" -# TBB_ARCH_PLATFORM is set by the build script tbbvars[.bat|.sh|.csh], which can be found -# in the TBB installation directory (TBB_INSTALL_DIR). -# -# GvdB: Mac OS X distribution places libraries directly in lib directory. -# -# For backwards compatibility, you may explicitely set the CMake variables TBB_ARCHITECTURE and TBB_COMPILER. -# TBB_ARCHITECTURE [ ia32 | em64t | itanium ] -# which architecture to use -# TBB_COMPILER e.g. vc9 or cc3.2.3_libc2.3.2_kernel2.4.21 or cc4.0.1_os10.4.9 -# which compiler to use (detected automatically on Windows) - -# This module respects -# TBB_INSTALL_DIR or $ENV{TBB21_INSTALL_DIR} or $ENV{TBB_INSTALL_DIR} - -# This module defines -# TBB_INCLUDE_DIRS, where to find task_scheduler_init.h, etc. -# TBB_LIBRARY_DIRS, where to find libtbb, libtbbmalloc -# TBB_DEBUG_LIBRARY_DIRS, where to find libtbb_debug, libtbbmalloc_debug -# TBB_INSTALL_DIR, the base TBB install directory -# TBB_LIBRARIES, the libraries to link against to use TBB. -# TBB_DEBUG_LIBRARIES, the libraries to link against to use TBB with debug symbols. -# TBB_FOUND, If false, don't try to use TBB. -# TBB_INTERFACE_VERSION, as defined in tbb/tbb_stddef.h - - -if (WIN32) - # has em64t/vc8 em64t/vc9 - # has ia32/vc7.1 ia32/vc8 ia32/vc9 - set(_TBB_DEFAULT_INSTALL_DIR "C:/Program Files/Intel/TBB" "C:/Program Files (x86)/Intel/TBB") - set(_TBB_LIB_NAME "tbb") - set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") - set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") - set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") - if (MSVC71) - set (_TBB_COMPILER "vc7.1") - endif(MSVC71) - if (MSVC80) - set(_TBB_COMPILER "vc8") - endif(MSVC80) - if (MSVC90) - set(_TBB_COMPILER "vc9") - endif(MSVC90) - if(MSVC10) - set(_TBB_COMPILER "vc10") - endif(MSVC10) - # Todo: add other Windows compilers such as ICL. - set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) -endif (WIN32) - -if (UNIX) - if (APPLE) - # MAC - set(_TBB_DEFAULT_INSTALL_DIR "/Library/Frameworks/Intel_TBB.framework/Versions") - # libs: libtbb.dylib, libtbbmalloc.dylib, *_debug - set(_TBB_LIB_NAME "tbb") - set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") - set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") - set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") - # default flavor on apple: ia32/cc4.0.1_os10.4.9 - # Jiri: There is no reason to presume there is only one flavor and - # that user's setting of variables should be ignored. - if(NOT TBB_COMPILER) - set(_TBB_COMPILER "cc4.0.1_os10.4.9") - elseif (NOT TBB_COMPILER) - set(_TBB_COMPILER ${TBB_COMPILER}) - endif(NOT TBB_COMPILER) - if(NOT TBB_ARCHITECTURE) - set(_TBB_ARCHITECTURE "ia32") - elseif(NOT TBB_ARCHITECTURE) - set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) - endif(NOT TBB_ARCHITECTURE) - else (APPLE) - # LINUX - set(_TBB_DEFAULT_INSTALL_DIR "/opt/intel/tbb" "/usr/local/include" "/usr/include") - set(_TBB_LIB_NAME "tbb") - set(_TBB_LIB_MALLOC_NAME "${_TBB_LIB_NAME}malloc") - set(_TBB_LIB_DEBUG_NAME "${_TBB_LIB_NAME}_debug") - set(_TBB_LIB_MALLOC_DEBUG_NAME "${_TBB_LIB_MALLOC_NAME}_debug") - # has em64t/cc3.2.3_libc2.3.2_kernel2.4.21 em64t/cc3.3.3_libc2.3.3_kernel2.6.5 em64t/cc3.4.3_libc2.3.4_kernel2.6.9 em64t/cc4.1.0_libc2.4_kernel2.6.16.21 - # has ia32/* - # has itanium/* - set(_TBB_COMPILER ${TBB_COMPILER}) - set(_TBB_ARCHITECTURE ${TBB_ARCHITECTURE}) - endif (APPLE) -endif (UNIX) - -if (CMAKE_SYSTEM MATCHES "SunOS.*") -# SUN -# not yet supported -# has em64t/cc3.4.3_kernel5.10 -# has ia32/* -endif (CMAKE_SYSTEM MATCHES "SunOS.*") - - -#-- Clear the public variables -set (TBB_FOUND "NO") - - -#-- Find TBB install dir and set ${_TBB_INSTALL_DIR} and cached ${TBB_INSTALL_DIR} -# first: use CMake variable TBB_INSTALL_DIR -if (TBB_INSTALL_DIR) - set (_TBB_INSTALL_DIR ${TBB_INSTALL_DIR}) -endif (TBB_INSTALL_DIR) -# second: use environment variable -if (NOT _TBB_INSTALL_DIR) - if (NOT "$ENV{TBBROOT}" STREQUAL "") - set (_TBB_INSTALL_DIR $ENV{TBBROOT}) - endif() - if (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") - set (_TBB_INSTALL_DIR $ENV{TBB_INSTALL_DIR}) - endif (NOT "$ENV{TBB_INSTALL_DIR}" STREQUAL "") - # Intel recommends setting TBB21_INSTALL_DIR - if (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") - set (_TBB_INSTALL_DIR $ENV{TBB21_INSTALL_DIR}) - endif (NOT "$ENV{TBB21_INSTALL_DIR}" STREQUAL "") - if (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") - set (_TBB_INSTALL_DIR $ENV{TBB22_INSTALL_DIR}) - endif (NOT "$ENV{TBB22_INSTALL_DIR}" STREQUAL "") - if (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") - set (_TBB_INSTALL_DIR $ENV{TBB30_INSTALL_DIR}) - endif (NOT "$ENV{TBB30_INSTALL_DIR}" STREQUAL "") -endif (NOT _TBB_INSTALL_DIR) -# third: try to find path automatically -if (NOT _TBB_INSTALL_DIR) - if (_TBB_DEFAULT_INSTALL_DIR) - set (_TBB_INSTALL_DIR ${_TBB_DEFAULT_INSTALL_DIR}) - endif (_TBB_DEFAULT_INSTALL_DIR) -endif (NOT _TBB_INSTALL_DIR) -# sanity check -if (NOT _TBB_INSTALL_DIR) - message ("ERROR: Unable to find Intel TBB install directory. ${_TBB_INSTALL_DIR}") -else (NOT _TBB_INSTALL_DIR) -# finally: set the cached CMake variable TBB_INSTALL_DIR -if (NOT TBB_INSTALL_DIR) - set (TBB_INSTALL_DIR ${_TBB_INSTALL_DIR} CACHE PATH "Intel TBB install directory") - mark_as_advanced(TBB_INSTALL_DIR) -endif (NOT TBB_INSTALL_DIR) - - -#-- A macro to rewrite the paths of the library. This is necessary, because -# find_library() always found the em64t/vc9 version of the TBB libs -macro(TBB_CORRECT_LIB_DIR var_name) -# if (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") - string(REPLACE em64t "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) -# endif (NOT "${_TBB_ARCHITECTURE}" STREQUAL "em64t") - string(REPLACE ia32 "${_TBB_ARCHITECTURE}" ${var_name} ${${var_name}}) - string(REPLACE vc7.1 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) - string(REPLACE vc8 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) - string(REPLACE vc9 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) - string(REPLACE vc10 "${_TBB_COMPILER}" ${var_name} ${${var_name}}) -endmacro(TBB_CORRECT_LIB_DIR var_content) - - -#-- Look for include directory and set ${TBB_INCLUDE_DIR} -set (TBB_INC_SEARCH_DIR ${_TBB_INSTALL_DIR}/include) -# Jiri: tbbvars now sets the CPATH environment variable to the directory -# containing the headers. -find_path(TBB_INCLUDE_DIR - tbb/task_scheduler_init.h - PATHS ${TBB_INC_SEARCH_DIR} ENV CPATH -) -mark_as_advanced(TBB_INCLUDE_DIR) - - -#-- Look for libraries -# GvdB: $ENV{TBB_ARCH_PLATFORM} is set by the build script tbbvars[.bat|.sh|.csh] -if (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") - set (_TBB_LIBRARY_DIR - ${_TBB_INSTALL_DIR}/lib/$ENV{TBB_ARCH_PLATFORM} - ${_TBB_INSTALL_DIR}/$ENV{TBB_ARCH_PLATFORM}/lib - ) -endif (NOT $ENV{TBB_ARCH_PLATFORM} STREQUAL "") -# Jiri: This block isn't mutually exclusive with the previous one -# (hence no else), instead I test if the user really specified -# the variables in question. -if ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) - # HH: deprecated - message(STATUS "[Warning] FindTBB.cmake: The use of TBB_ARCHITECTURE and TBB_COMPILER is deprecated and may not be supported in future versions. Please set \$ENV{TBB_ARCH_PLATFORM} (using tbbvars.[bat|csh|sh]).") - # Jiri: It doesn't hurt to look in more places, so I store the hints from - # ENV{TBB_ARCH_PLATFORM} and the TBB_ARCHITECTURE and TBB_COMPILER - # variables and search them both. - set (_TBB_LIBRARY_DIR "${_TBB_INSTALL_DIR}/${_TBB_ARCHITECTURE}/${_TBB_COMPILER}/lib" ${_TBB_LIBRARY_DIR}) -endif ((NOT ${TBB_ARCHITECTURE} STREQUAL "") AND (NOT ${TBB_COMPILER} STREQUAL "")) - -# GvdB: Mac OS X distribution places libraries directly in lib directory. -list(APPEND _TBB_LIBRARY_DIR ${_TBB_INSTALL_DIR}/lib) - -# Jiri: No reason not to check the default paths. From recent versions, -# tbbvars has started exporting the LIBRARY_PATH and LD_LIBRARY_PATH -# variables, which now point to the directories of the lib files. -# It all makes more sense to use the ${_TBB_LIBRARY_DIR} as a HINTS -# argument instead of the implicit PATHS as it isn't hard-coded -# but computed by system introspection. Searching the LIBRARY_PATH -# and LD_LIBRARY_PATH environment variables is now even more important -# that tbbvars doesn't export TBB_ARCH_PLATFORM and it facilitates -# the use of TBB built from sources. -find_library(TBB_LIBRARY ${_TBB_LIB_NAME} HINTS ${_TBB_LIBRARY_DIR} - PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) -find_library(TBB_MALLOC_LIBRARY ${_TBB_LIB_MALLOC_NAME} HINTS ${_TBB_LIBRARY_DIR} - PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) - -#Extract path from TBB_LIBRARY name -get_filename_component(TBB_LIBRARY_DIR ${TBB_LIBRARY} PATH) - -#TBB_CORRECT_LIB_DIR(TBB_LIBRARY) -#TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY) -mark_as_advanced(TBB_LIBRARY TBB_MALLOC_LIBRARY) - -#-- Look for debug libraries -# Jiri: Changed the same way as for the release libraries. -find_library(TBB_LIBRARY_DEBUG ${_TBB_LIB_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} - PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) -find_library(TBB_MALLOC_LIBRARY_DEBUG ${_TBB_LIB_MALLOC_DEBUG_NAME} HINTS ${_TBB_LIBRARY_DIR} - PATHS ENV LIBRARY_PATH ENV LD_LIBRARY_PATH) - -# Jiri: Self-built TBB stores the debug libraries in a separate directory. -# Extract path from TBB_LIBRARY_DEBUG name -get_filename_component(TBB_LIBRARY_DEBUG_DIR ${TBB_LIBRARY_DEBUG} PATH) - -#TBB_CORRECT_LIB_DIR(TBB_LIBRARY_DEBUG) -#TBB_CORRECT_LIB_DIR(TBB_MALLOC_LIBRARY_DEBUG) -mark_as_advanced(TBB_LIBRARY_DEBUG TBB_MALLOC_LIBRARY_DEBUG) - - -if (TBB_INCLUDE_DIR) - if (TBB_LIBRARY) - set (TBB_FOUND "YES") - set (TBB_LIBRARIES ${TBB_LIBRARY} ${TBB_MALLOC_LIBRARY} ${TBB_LIBRARIES}) - set (TBB_DEBUG_LIBRARIES ${TBB_LIBRARY_DEBUG} ${TBB_MALLOC_LIBRARY_DEBUG} ${TBB_DEBUG_LIBRARIES}) - set (TBB_INCLUDE_DIRS ${TBB_INCLUDE_DIR} CACHE PATH "TBB include directory" FORCE) - set (TBB_LIBRARY_DIRS ${TBB_LIBRARY_DIR} CACHE PATH "TBB library directory" FORCE) - # Jiri: Self-built TBB stores the debug libraries in a separate directory. - set (TBB_DEBUG_LIBRARY_DIRS ${TBB_LIBRARY_DEBUG_DIR} CACHE PATH "TBB debug library directory" FORCE) - mark_as_advanced(TBB_INCLUDE_DIRS TBB_LIBRARY_DIRS TBB_DEBUG_LIBRARY_DIRS TBB_LIBRARIES TBB_DEBUG_LIBRARIES) - message(STATUS "Found Intel TBB") - endif (TBB_LIBRARY) -endif (TBB_INCLUDE_DIR) - -if (NOT TBB_FOUND) - message("ERROR: Intel TBB NOT found!") - message(STATUS "Looked for Threading Building Blocks in ${_TBB_INSTALL_DIR}") - # do only throw fatal, if this pkg is REQUIRED - if (TBB_FIND_REQUIRED) - message(FATAL_ERROR "Could NOT find TBB library.") - endif (TBB_FIND_REQUIRED) -endif (NOT TBB_FOUND) - -endif (NOT _TBB_INSTALL_DIR) - -if (TBB_FOUND) - set(TBB_INTERFACE_VERSION 0) - FILE(READ "${TBB_INCLUDE_DIRS}/tbb/tbb_stddef.h" _TBB_VERSION_CONTENTS) - STRING(REGEX REPLACE ".*#define TBB_INTERFACE_VERSION ([0-9]+).*" "\\1" TBB_INTERFACE_VERSION "${_TBB_VERSION_CONTENTS}") - set(TBB_INTERFACE_VERSION "${TBB_INTERFACE_VERSION}") -endif (TBB_FOUND) diff --git a/maxflow/galois/cmake/Modules/FindVTune.cmake b/maxflow/galois/cmake/Modules/FindVTune.cmake deleted file mode 100644 index 0009c96..0000000 --- a/maxflow/galois/cmake/Modules/FindVTune.cmake +++ /dev/null @@ -1,21 +0,0 @@ -# Find VTune libraries -# Once done this will define -# VTune_FOUND - System has VTune -# VTune_INCLUDE_DIRS - The VTune include directories -# VTune_LIBRARIES - The libraries needed to use VTune - -if(VTune_INCLUDE_DIRS AND VTune_LIBRARIES) - set(VTune_FIND_QUIETLY TRUE) -endif() - -find_path(VTune_INCLUDE_DIRS ittnotify.h PATHS ${VTune_ROOT} PATH_SUFFIXES include) -find_library(VTune_LIBRARY NAMES ittnotify PATHS ${VTune_ROOT} PATH_SUFFIXES lib lib64 lib32) -find_library(VTune_LIBRARIES NAMES dl PATH_SUFFIXES lib lib64 lib32) - -include(FindPackageHandleStandardArgs) -find_package_handle_standard_args(VTune DEFAULT_MSG VTune_LIBRARY VTune_LIBRARIES VTune_INCLUDE_DIRS) -if(VTUNE_FOUND) - set(VTune_FOUND on) - set(VTune_LIBRARIES ${VTune_LIBRARY} ${VTune_LIBRARIES}) -endif() -mark_as_advanced(VTune_INCLUDE_DIRS VTune_LIBRARIES) diff --git a/maxflow/galois/cmake/Modules/GaloisConfig.cmake.in b/maxflow/galois/cmake/Modules/GaloisConfig.cmake.in deleted file mode 100644 index 27b5f5b..0000000 --- a/maxflow/galois/cmake/Modules/GaloisConfig.cmake.in +++ /dev/null @@ -1,18 +0,0 @@ -# Config file for the Galois package -# It defines the following variables -# Galois_INCLUDE_DIRS -# Galois_LIBRARIES -# Galois_CXX_COMPILER -# Galois_CXX_FLAGS - -get_filename_component(GALOIS_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH) -set(Galois_INCLUDE_DIRS "@GALOIS_INCLUDE_DIR@") -set(Galois_INCLUDE_DIRS ${Galois_INCLUDE_DIRS} "@GALOIS_INCLUDE_DIRS@") - -if(NOT TARGET galois AND NOT Galois_BINARY_DIR) - include("${GALOIS_CMAKE_DIR}/GaloisTargets.cmake") -endif() - -set(Galois_LIBRARIES galois) -set(Galois_CXX_COMPILER "@GALOIS_CXX_COMPILER@") -set(Galois_CXX_FLAGS "@GALOIS_FLAGS@") diff --git a/maxflow/galois/cmake/Modules/GaloisConfigVersion.cmake.in b/maxflow/galois/cmake/Modules/GaloisConfigVersion.cmake.in deleted file mode 100644 index a915759..0000000 --- a/maxflow/galois/cmake/Modules/GaloisConfigVersion.cmake.in +++ /dev/null @@ -1,11 +0,0 @@ -set(PACKAGE_VERSION "@GALOIS_VERSION@") - -# Check whether the requested PACKAGE_FIND_VERSION is compatible -if("${PACKAGE_VERSION}" VERSION_LESS "${PACKAGE_FIND_VERSION}") - set(PACKAGE_VERSION_COMPATIBLE FALSE) -else() - set(PACKAGE_VERSION_COMPATIBLE TRUE) - if ("${PACKAGE_VERSION}" VERSION_EQUAL "${PACKAGE_FIND_VERSION}") - set(PACKAGE_VERSION_EXACT TRUE) - endif() -endif() diff --git a/maxflow/galois/cmake/Modules/GetSVNVersion-write.cmake b/maxflow/galois/cmake/Modules/GetSVNVersion-write.cmake deleted file mode 100644 index 6a5ca2c..0000000 --- a/maxflow/galois/cmake/Modules/GetSVNVersion-write.cmake +++ /dev/null @@ -1,15 +0,0 @@ -### Don't include directly, for use by GetSVNVersion.cmake -find_package(Subversion) -# Extract svn info into MY_XXX variables -if(Subversion_FOUND) - Subversion_WC_INFO(${SOURCE_DIR} MY) - if (Subversion_FOUND) - file(WRITE include/Galois/svnversion.h.txt "#define GALOIS_SVNVERSION ${MY_WC_REVISION}\n") - else() - file(WRITE include/Galois/svnversion.h.txt "#define GALOIS_SVNVERSION 0\n") - endif() -else() - file(WRITE include/Galois/svnversion.h.txt "#define GALOIS_SVNVERSION 0\n") -endif() - -execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different include/Galois/svnversion.h.txt include/Galois/svnversion.h) diff --git a/maxflow/galois/cmake/Modules/GetSVNVersion.cmake b/maxflow/galois/cmake/Modules/GetSVNVersion.cmake deleted file mode 100644 index 57698f8..0000000 --- a/maxflow/galois/cmake/Modules/GetSVNVersion.cmake +++ /dev/null @@ -1,14 +0,0 @@ -# DUMMY is a non-existent file to force regeneration of svn header every build -add_custom_target(svnversion ALL DEPENDS DUMMY ${PROJECT_BINARY_DIR}/include/Galois/svnversion.h) - -find_file(_MODULE "GetSVNVersion-write.cmake" PATHS ${CMAKE_MODULE_PATH}) - -add_custom_command(OUTPUT DUMMY ${PROJECT_BINARY_DIR}/include/Galois/svnversion.h - COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=${CMAKE_SOURCE_DIR} - -DCMAKE_MODULE_PATH="${CMAKE_SOURCE_DIR}/cmake/Modules/" -P ${_MODULE}) - -set(_MODULE off) - -set_source_files_properties(${PROJECT_BINARY_DIR}/include/Galois/svnversion.h - PROPERTIES GENERATED TRUE - HEADER_FILE_ONLY TRUE) diff --git a/maxflow/galois/cmake/Modules/HandleLLVMOptions.cmake b/maxflow/galois/cmake/Modules/HandleLLVMOptions.cmake deleted file mode 100644 index f3ff373..0000000 --- a/maxflow/galois/cmake/Modules/HandleLLVMOptions.cmake +++ /dev/null @@ -1,195 +0,0 @@ -include(AddLLVMDefinitions) - -if( CMAKE_COMPILER_IS_GNUCXX ) - set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) -elseif( "${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" ) - set(LLVM_COMPILER_IS_GCC_COMPATIBLE ON) -endif() - -# Run-time build mode; It is used for unittests. -if(MSVC_IDE) - # Expect "$(Configuration)", "$(OutDir)", etc. - # It is expanded by msbuild or similar. - set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}") -elseif(NOT CMAKE_BUILD_TYPE STREQUAL "") - # Expect "Release" "Debug", etc. - # Or unittests could not run. - set(RUNTIME_BUILD_MODE ${CMAKE_BUILD_TYPE}) -else() - # It might be "." - set(RUNTIME_BUILD_MODE "${CMAKE_CFG_INTDIR}") -endif() - -set(LIT_ARGS_DEFAULT "-sv") -if (MSVC OR XCODE) - set(LIT_ARGS_DEFAULT "${LIT_ARGS_DEFAULT} --no-progress-bar") -endif() -set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" - CACHE STRING "Default options for lit") - -#XXX(ddn): Don't pollute with extra definitions -if(0) -if( LLVM_ENABLE_ASSERTIONS ) - # MSVC doesn't like _DEBUG on release builds. See PR 4379. - if( NOT MSVC ) - add_definitions( -D_DEBUG ) - endif() - # On Release builds cmake automatically defines NDEBUG, so we - # explicitly undefine it: - if( uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" ) - add_definitions( -UNDEBUG ) - endif() -else() - if( NOT uppercase_CMAKE_BUILD_TYPE STREQUAL "RELEASE" ) - if( NOT MSVC_IDE AND NOT XCODE ) - add_definitions( -DNDEBUG ) - endif() - endif() -endif() -endif() - -if(WIN32) - if(CYGWIN) - set(LLVM_ON_WIN32 0) - set(LLVM_ON_UNIX 1) - else(CYGWIN) - set(LLVM_ON_WIN32 1) - set(LLVM_ON_UNIX 0) - - # This is effective only on Win32 hosts to use gnuwin32 tools. - set(LLVM_LIT_TOOLS_DIR "" CACHE PATH "Path to GnuWin32 tools") - endif(CYGWIN) - set(LTDL_SHLIB_EXT ".dll") - set(EXEEXT ".exe") - # Maximum path length is 160 for non-unicode paths - set(MAXPATHLEN 160) -else(WIN32) - if(UNIX) - set(LLVM_ON_WIN32 0) - set(LLVM_ON_UNIX 1) - if(APPLE) - set(LTDL_SHLIB_EXT ".dylib") - else(APPLE) - set(LTDL_SHLIB_EXT ".so") - endif(APPLE) - set(EXEEXT "") - # FIXME: Maximum path length is currently set to 'safe' fixed value - set(MAXPATHLEN 2024) - else(UNIX) - MESSAGE(SEND_ERROR "Unable to determine platform") - endif(UNIX) -endif(WIN32) - -if( LLVM_ENABLE_PIC ) - if( XCODE ) - # Xcode has -mdynamic-no-pic on by default, which overrides -fPIC. I don't - # know how to disable this, so just force ENABLE_PIC off for now. - message(WARNING "-fPIC not supported with Xcode.") - elseif( WIN32 ) - # On Windows all code is PIC. MinGW warns if -fPIC is used. - else() - include(CheckCXXCompilerFlag) - check_cxx_compiler_flag("-fPIC" SUPPORTS_FPIC_FLAG) - if( SUPPORTS_FPIC_FLAG ) - message(STATUS "Building with -fPIC") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC") - else( SUPPORTS_FPIC_FLAG ) - message(WARNING "-fPIC not supported.") - endif() - endif() -endif() - -if( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) - # TODO: support other platforms and toolchains. - option(LLVM_BUILD_32_BITS "Build 32 bits executables and libraries." OFF) - if( LLVM_BUILD_32_BITS ) - message(STATUS "Building 32 bits executables and libraries.") - add_llvm_definitions( -m32 ) - list(APPEND CMAKE_EXE_LINKER_FLAGS -m32) - list(APPEND CMAKE_SHARED_LINKER_FLAGS -m32) - endif( LLVM_BUILD_32_BITS ) -endif( CMAKE_SIZEOF_VOID_P EQUAL 8 AND NOT WIN32 ) - -if( MSVC_IDE AND ( MSVC90 OR MSVC10 ) ) - # Only Visual Studio 2008 and 2010 officially supports /MP. - # Visual Studio 2005 do support it but it's experimental there. - set(LLVM_COMPILER_JOBS "0" CACHE STRING - "Number of parallel compiler jobs. 0 means use all processors. Default is 0.") - if( NOT LLVM_COMPILER_JOBS STREQUAL "1" ) - if( LLVM_COMPILER_JOBS STREQUAL "0" ) - add_llvm_definitions( /MP ) - else() - if (MSVC10) - message(FATAL_ERROR - "Due to a bug in CMake only 0 and 1 is supported for " - "LLVM_COMPILER_JOBS when generating for Visual Studio 2010") - else() - message(STATUS "Number of parallel compiler jobs set to " ${LLVM_COMPILER_JOBS}) - add_llvm_definitions( /MP${LLVM_COMPILER_JOBS} ) - endif() - endif() - else() - message(STATUS "Parallel compilation disabled") - endif() -endif() - -if( MSVC ) - include(ChooseMSVCCRT) - - # Add definitions that make MSVC much less annoying. - add_llvm_definitions( - # For some reason MS wants to deprecate a bunch of standard functions... - -D_CRT_SECURE_NO_DEPRECATE - -D_CRT_SECURE_NO_WARNINGS - -D_CRT_NONSTDC_NO_DEPRECATE - -D_CRT_NONSTDC_NO_WARNINGS - -D_SCL_SECURE_NO_DEPRECATE - -D_SCL_SECURE_NO_WARNINGS - - -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' - -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' - -wd4224 # Suppress 'nonstandard extension used : formal parameter 'identifier' was previously defined as a type' - -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' - -wd4267 # Suppress ''var' : conversion from 'size_t' to 'type', possible loss of data' - -wd4275 # Suppress 'An exported class was derived from a class that was not exported.' - -wd4291 # Suppress ''declaration' : no matching operator delete found; memory will not be freed if initialization throws an exception' - -wd4345 # Suppress 'behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized' - -wd4351 # Suppress 'new behavior: elements of array 'array' will be default initialized' - -wd4355 # Suppress ''this' : used in base member initializer list' - -wd4503 # Suppress ''identifier' : decorated name length exceeded, name was truncated' - -wd4551 # Suppress 'function call missing argument list' - -wd4624 # Suppress ''derived class' : destructor could not be generated because a base class destructor is inaccessible' - -wd4715 # Suppress ''function' : not all control paths return a value' - -wd4800 # Suppress ''type' : forcing value to bool 'true' or 'false' (performance warning)' - -wd4065 # Suppress 'switch statement contains 'default' but no 'case' labels' - -wd4181 # Suppress 'qualifier applied to reference type; ignored' - -w14062 # Promote "enumerator in switch of enum is not handled" to level 1 warning. - ) - - # Enable warnings - if (LLVM_ENABLE_WARNINGS) - add_llvm_definitions( /W4 /Wall ) - if (LLVM_ENABLE_PEDANTIC) - # No MSVC equivalent available - endif (LLVM_ENABLE_PEDANTIC) - endif (LLVM_ENABLE_WARNINGS) - if (LLVM_ENABLE_WERROR) - add_llvm_definitions( /WX ) - endif (LLVM_ENABLE_WERROR) -elseif( LLVM_COMPILER_IS_GCC_COMPATIBLE ) - if (LLVM_ENABLE_WARNINGS) - add_llvm_definitions( -Wall -W -Wno-unused-parameter -Wwrite-strings ) - if (LLVM_ENABLE_PEDANTIC) - add_llvm_definitions( -pedantic -Wno-long-long ) - endif (LLVM_ENABLE_PEDANTIC) - endif (LLVM_ENABLE_WARNINGS) - if (LLVM_ENABLE_WERROR) - add_llvm_definitions( -Werror ) - endif (LLVM_ENABLE_WERROR) -endif( MSVC ) - -add_llvm_definitions( -D__STDC_LIMIT_MACROS ) -add_llvm_definitions( -D__STDC_CONSTANT_MACROS ) - -option(LLVM_INCLUDE_TESTS "Generate build targets for the LLVM unit tests." ON) diff --git a/maxflow/galois/cmake/Modules/ParseArguments.cmake b/maxflow/galois/cmake/Modules/ParseArguments.cmake deleted file mode 100644 index e13f671..0000000 --- a/maxflow/galois/cmake/Modules/ParseArguments.cmake +++ /dev/null @@ -1,52 +0,0 @@ -# Parse arguments passed to a function into several lists separated by -# upper-case identifiers and options that do not have an associated list e.g.: -# -# SET(arguments -# hello OPTION3 world -# LIST3 foo bar -# OPTION2 -# LIST1 fuz baz -# ) -# PARSE_ARGUMENTS(ARG "LIST1;LIST2;LIST3" "OPTION1;OPTION2;OPTION3" ${arguments}) -# -# results in 7 distinct variables: -# * ARG_DEFAULT_ARGS: hello;world -# * ARG_LIST1: fuz;baz -# * ARG_LIST2: -# * ARG_LIST3: foo;bar -# * ARG_OPTION1: FALSE -# * ARG_OPTION2: TRUE -# * ARG_OPTION3: TRUE -# -# taken from http://www.cmake.org/Wiki/CMakeMacroParseArguments - -MACRO(PARSE_ARGUMENTS prefix arg_names option_names) - SET(DEFAULT_ARGS) - FOREACH(arg_name ${arg_names}) - SET(${prefix}_${arg_name}) - ENDFOREACH(arg_name) - FOREACH(option ${option_names}) - SET(${prefix}_${option} FALSE) - ENDFOREACH(option) - - SET(current_arg_name DEFAULT_ARGS) - SET(current_arg_list) - FOREACH(arg ${ARGN}) - SET(larg_names ${arg_names}) - LIST(FIND larg_names "${arg}" is_arg_name) - IF (is_arg_name GREATER -1) - SET(${prefix}_${current_arg_name} ${current_arg_list}) - SET(current_arg_name ${arg}) - SET(current_arg_list) - ELSE (is_arg_name GREATER -1) - SET(loption_names ${option_names}) - LIST(FIND loption_names "${arg}" is_option) - IF (is_option GREATER -1) - SET(${prefix}_${arg} TRUE) - ELSE (is_option GREATER -1) - SET(current_arg_list ${current_arg_list} ${arg}) - ENDIF (is_option GREATER -1) - ENDIF (is_arg_name GREATER -1) - ENDFOREACH(arg) - SET(${prefix}_${current_arg_name} ${current_arg_list}) -ENDMACRO(PARSE_ARGUMENTS) diff --git a/maxflow/galois/cmake/Modules/UseDoxygen.cmake b/maxflow/galois/cmake/Modules/UseDoxygen.cmake deleted file mode 100644 index bd4aef0..0000000 --- a/maxflow/galois/cmake/Modules/UseDoxygen.cmake +++ /dev/null @@ -1,122 +0,0 @@ -# - Run Doxygen -# -# Adds a doxygen target that runs doxygen to generate the html -# and optionally the LaTeX API documentation. -# The doxygen target is added to the doc target as a dependency. -# i.e.: the API documentation is built with: -# make doc -# -# USAGE: GLOBAL INSTALL -# -# Install it with: -# cmake ./ && sudo make install -# Add the following to the CMakeLists.txt of your project: -# include(UseDoxygen OPTIONAL) -# Optionally copy Doxyfile.in in the directory of CMakeLists.txt and edit it. -# -# USAGE: INCLUDE IN PROJECT -# -# set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}) -# include(UseDoxygen) -# Add the Doxyfile.in and UseDoxygen.cmake files to the projects source directory. -# -# -# Variables you may define are: -# DOXYFILE_SOURCE_DIR - Path where the Doxygen input files are. -# Defaults to the current source and binary directory. -# DOXYFILE_OUTPUT_DIR - Path where the Doxygen output is stored. Defaults to "doc". -# -# DOXYFILE_LATEX - Set to "NO" if you do not want the LaTeX documentation -# to be built. -# DOXYFILE_LATEX_DIR - Directory relative to DOXYFILE_OUTPUT_DIR where -# the Doxygen LaTeX output is stored. Defaults to "latex". -# -# DOXYFILE_HTML_DIR - Directory relative to DOXYFILE_OUTPUT_DIR where -# the Doxygen html output is stored. Defaults to "html". -# - -# -# Copyright (c) 2009, 2010 Tobias Rautenkranz -# -# Redistribution and use is allowed according to the terms of the New -# BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. -# - -macro(usedoxygen_set_default name value) - if(NOT DEFINED "${name}") - set("${name}" "${value}") - endif() -endmacro() - -find_package(Doxygen) - -if(DOXYGEN_FOUND) - find_file(DOXYFILE_IN "Doxyfile.in" - PATHS "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_ROOT}/Modules/" - NO_DEFAULT_PATH) - set(DOXYFILE "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile") - include(FindPackageHandleStandardArgs) - find_package_handle_standard_args(DOXYFILE_IN DEFAULT_MSG "DOXYFILE_IN") -endif() - -if(DOXYGEN_FOUND AND DOXYFILE_IN_FOUND) - usedoxygen_set_default(DOXYFILE_OUTPUT_DIR "${CMAKE_CURRENT_BINARY_DIR}/doc") - usedoxygen_set_default(DOXYFILE_HTML_DIR "html") - usedoxygen_set_default(DOXYFILE_SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}\" \"${CMAKE_CURRENT_BINARY_DIR}") - - set_property(DIRECTORY APPEND PROPERTY - ADDITIONAL_MAKE_CLEAN_FILES - "${DOXYFILE_OUTPUT_DIR}/${DOXYFILE_HTML_DIR}") - - add_custom_target(doxygen - COMMAND ${DOXYGEN_EXECUTABLE} - ${DOXYFILE} - COMMENT "Writing documentation to ${DOXYFILE_OUTPUT_DIR}..." - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}) - - ## LaTeX - set(DOXYFILE_PDFLATEX "NO") - set(DOXYFILE_DOT "NO") - - find_package(LATEX) - find_program(MAKE_PROGRAM make) - - if(NOT DEFINED DOXYFILE_LATEX) - set(DOXFILE_LATEX, "YES") - endif() - - if(LATEX_COMPILER AND MAKEINDEX_COMPILER AND MAKE_PROGRAM AND DOXYFILE_LATEX STREQUAL "YES") - set(DOXYFILE_LATEX "YES") - usedoxygen_set_default(DOXYFILE_LATEX_DIR "latex") - - set_property(DIRECTORY APPEND PROPERTY - ADDITIONAL_MAKE_CLEAN_FILES - "${DOXYFILE_OUTPUT_DIR}/${DOXYFILE_LATEX_DIR}") - - if(PDFLATEX_COMPILER) - set(DOXYFILE_PDFLATEX "YES") - endif() - if(DOXYGEN_DOT_EXECUTABLE) - set(DOXYFILE_DOT "YES") - endif() - - add_custom_command(TARGET doxygen - POST_BUILD - COMMAND ${MAKE_PROGRAM} - COMMENT "Running LaTeX for Doxygen documentation in ${DOXYFILE_OUTPUT_DIR}/${DOXYFILE_LATEX_DIR}..." - WORKING_DIRECTORY "${DOXYFILE_OUTPUT_DIR}/${DOXYFILE_LATEX_DIR}") - else() - set(DOXYGEN_LATEX "NO") - endif() - - - configure_file(${DOXYFILE_IN} Doxyfile IMMEDIATE @ONLY) - - get_target_property(DOC_TARGET doc TYPE) - if(NOT DOC_TARGET) - add_custom_target(doc) - endif() - - add_dependencies(doc doxygen) -endif() diff --git a/maxflow/galois/cmake/Modules/UseStdMacro.cmake b/maxflow/galois/cmake/Modules/UseStdMacro.cmake deleted file mode 100644 index 696b92d..0000000 --- a/maxflow/galois/cmake/Modules/UseStdMacro.cmake +++ /dev/null @@ -1,2 +0,0 @@ -add_definitions(-D__STDC_LIMIT_MACROS) -add_definitions(-D__STDC_CONSTANT_MACROS) diff --git a/maxflow/galois/cmake/Modules/llvm-extras.cmake b/maxflow/galois/cmake/Modules/llvm-extras.cmake deleted file mode 100644 index 99ad0fd..0000000 --- a/maxflow/galois/cmake/Modules/llvm-extras.cmake +++ /dev/null @@ -1,392 +0,0 @@ -include(HandleLLVMOptions) - -if( WIN32 AND NOT CYGWIN ) - # We consider Cygwin as another Unix - set(PURE_WINDOWS 1) -endif() - -include(CheckIncludeFile) -include(CheckLibraryExists) -include(CheckSymbolExists) -include(CheckFunctionExists) -include(CheckCXXSourceCompiles) -include(TestBigEndian) - -if( UNIX AND NOT BEOS ) - # Used by check_symbol_exists: - set(CMAKE_REQUIRED_LIBRARIES m) -endif() - -# Helper macros and functions -macro(add_cxx_include result files) - set(${result} "") - foreach (file_name ${files}) - set(${result} "${${result}}#include<${file_name}>\n") - endforeach() -endmacro(add_cxx_include files result) - -function(check_type_exists type files variable) - add_cxx_include(includes "${files}") - CHECK_CXX_SOURCE_COMPILES(" - ${includes} ${type} typeVar; - int main() { - return 0; - } - " ${variable}) -endfunction() - -# include checks -check_include_file(argz.h HAVE_ARGZ_H) -check_include_file(assert.h HAVE_ASSERT_H) -check_include_file(ctype.h HAVE_CTYPE_H) -check_include_file(dirent.h HAVE_DIRENT_H) -check_include_file(dl.h HAVE_DL_H) -check_include_file(dld.h HAVE_DLD_H) -check_include_file(dlfcn.h HAVE_DLFCN_H) -check_include_file(errno.h HAVE_ERRNO_H) -check_include_file(execinfo.h HAVE_EXECINFO_H) -check_include_file(fcntl.h HAVE_FCNTL_H) -check_include_file(inttypes.h HAVE_INTTYPES_H) -check_include_file(limits.h HAVE_LIMITS_H) -check_include_file(link.h HAVE_LINK_H) -check_include_file(malloc.h HAVE_MALLOC_H) -check_include_file(malloc/malloc.h HAVE_MALLOC_MALLOC_H) -check_include_file(memory.h HAVE_MEMORY_H) -check_include_file(ndir.h HAVE_NDIR_H) -if( NOT PURE_WINDOWS ) - check_include_file(pthread.h HAVE_PTHREAD_H) -endif() -check_include_file(setjmp.h HAVE_SETJMP_H) -check_include_file(signal.h HAVE_SIGNAL_H) -check_include_file(stdint.h HAVE_STDINT_H) -check_include_file(stdio.h HAVE_STDIO_H) -check_include_file(stdlib.h HAVE_STDLIB_H) -check_include_file(string.h HAVE_STRING_H) -check_include_file(strings.h HAVE_STRINGS_H) -check_include_file(sys/dir.h HAVE_SYS_DIR_H) -check_include_file(sys/dl.h HAVE_SYS_DL_H) -check_include_file(sys/ioctl.h HAVE_SYS_IOCTL_H) -check_include_file(sys/mman.h HAVE_SYS_MMAN_H) -check_include_file(sys/ndir.h HAVE_SYS_NDIR_H) -check_include_file(sys/param.h HAVE_SYS_PARAM_H) -check_include_file(sys/resource.h HAVE_SYS_RESOURCE_H) -check_include_file(sys/stat.h HAVE_SYS_STAT_H) -check_include_file(sys/time.h HAVE_SYS_TIME_H) -check_include_file(sys/types.h HAVE_SYS_TYPES_H) -check_include_file(sys/uio.h HAVE_SYS_UIO_H) -check_include_file(sys/wait.h HAVE_SYS_WAIT_H) -check_include_file(termios.h HAVE_TERMIOS_H) -check_include_file(unistd.h HAVE_UNISTD_H) -check_include_file(utime.h HAVE_UTIME_H) -check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H) -check_include_file(windows.h HAVE_WINDOWS_H) -check_include_file(fenv.h HAVE_FENV_H) -check_include_file(mach/mach.h HAVE_MACH_MACH_H) -check_include_file(mach-o/dyld.h HAVE_MACH_O_DYLD_H) - -# library checks -if( NOT PURE_WINDOWS ) - check_library_exists(pthread pthread_create "" HAVE_LIBPTHREAD) - check_library_exists(pthread pthread_getspecific "" HAVE_PTHREAD_GETSPECIFIC) - check_library_exists(pthread pthread_rwlock_init "" HAVE_PTHREAD_RWLOCK_INIT) - check_library_exists(dl dlopen "" HAVE_LIBDL) -endif() - -# function checks -check_symbol_exists(getpagesize unistd.h HAVE_GETPAGESIZE) -check_symbol_exists(getrusage sys/resource.h HAVE_GETRUSAGE) -check_symbol_exists(setrlimit sys/resource.h HAVE_SETRLIMIT) -check_function_exists(isatty HAVE_ISATTY) -check_symbol_exists(index strings.h HAVE_INDEX) -check_symbol_exists(isinf cmath HAVE_ISINF_IN_CMATH) -check_symbol_exists(isinf math.h HAVE_ISINF_IN_MATH_H) -check_symbol_exists(finite ieeefp.h HAVE_FINITE_IN_IEEEFP_H) -check_symbol_exists(isnan cmath HAVE_ISNAN_IN_CMATH) -check_symbol_exists(isnan math.h HAVE_ISNAN_IN_MATH_H) -check_symbol_exists(ceilf math.h HAVE_CEILF) -check_symbol_exists(floorf math.h HAVE_FLOORF) -check_symbol_exists(fmodf math.h HAVE_FMODF) -if( HAVE_SETJMP_H ) - check_symbol_exists(longjmp setjmp.h HAVE_LONGJMP) - check_symbol_exists(setjmp setjmp.h HAVE_SETJMP) - check_symbol_exists(siglongjmp setjmp.h HAVE_SIGLONGJMP) - check_symbol_exists(sigsetjmp setjmp.h HAVE_SIGSETJMP) -endif() -if( HAVE_SYS_UIO_H ) - check_symbol_exists(writev sys/uio.h HAVE_WRITEV) -endif() -check_symbol_exists(nearbyintf math.h HAVE_NEARBYINTF) -check_symbol_exists(mallinfo malloc.h HAVE_MALLINFO) -check_symbol_exists(malloc_zone_statistics malloc/malloc.h - HAVE_MALLOC_ZONE_STATISTICS) -check_symbol_exists(mkdtemp "stdlib.h;unistd.h" HAVE_MKDTEMP) -check_symbol_exists(mkstemp "stdlib.h;unistd.h" HAVE_MKSTEMP) -check_symbol_exists(mktemp "stdlib.h;unistd.h" HAVE_MKTEMP) -check_symbol_exists(closedir "sys/types.h;dirent.h" HAVE_CLOSEDIR) -check_symbol_exists(opendir "sys/types.h;dirent.h" HAVE_OPENDIR) -check_symbol_exists(readdir "sys/types.h;dirent.h" HAVE_READDIR) -check_symbol_exists(getcwd unistd.h HAVE_GETCWD) -check_symbol_exists(gettimeofday sys/time.h HAVE_GETTIMEOFDAY) -check_symbol_exists(getrlimit "sys/types.h;sys/time.h;sys/resource.h" HAVE_GETRLIMIT) -check_symbol_exists(rindex strings.h HAVE_RINDEX) -check_symbol_exists(strchr string.h HAVE_STRCHR) -check_symbol_exists(strcmp string.h HAVE_STRCMP) -check_symbol_exists(strdup string.h HAVE_STRDUP) -check_symbol_exists(strrchr string.h HAVE_STRRCHR) -if( NOT PURE_WINDOWS ) - check_symbol_exists(pthread_mutex_lock pthread.h HAVE_PTHREAD_MUTEX_LOCK) -endif() -check_symbol_exists(sbrk unistd.h HAVE_SBRK) -check_symbol_exists(srand48 stdlib.h HAVE_RAND48_SRAND48) -if( HAVE_RAND48_SRAND48 ) - check_symbol_exists(lrand48 stdlib.h HAVE_RAND48_LRAND48) - if( HAVE_RAND48_LRAND48 ) - check_symbol_exists(drand48 stdlib.h HAVE_RAND48_DRAND48) - if( HAVE_RAND48_DRAND48 ) - set(HAVE_RAND48 1 CACHE INTERNAL "are srand48/lrand48/drand48 available?") - endif() - endif() -endif() -check_symbol_exists(strtoll stdlib.h HAVE_STRTOLL) -check_symbol_exists(strtoq stdlib.h HAVE_STRTOQ) -check_symbol_exists(strerror string.h HAVE_STRERROR) -check_symbol_exists(strerror_r string.h HAVE_STRERROR_R) -check_symbol_exists(strerror_s string.h HAVE_DECL_STRERROR_S) -check_symbol_exists(memcpy string.h HAVE_MEMCPY) -check_symbol_exists(memmove string.h HAVE_MEMMOVE) -check_symbol_exists(setenv stdlib.h HAVE_SETENV) -if( PURE_WINDOWS ) - check_symbol_exists(_chsize_s io.h HAVE__CHSIZE_S) - - check_function_exists(_alloca HAVE__ALLOCA) - check_function_exists(__alloca HAVE___ALLOCA) - check_function_exists(__chkstk HAVE___CHKSTK) - check_function_exists(___chkstk HAVE____CHKSTK) - - check_function_exists(__ashldi3 HAVE___ASHLDI3) - check_function_exists(__ashrdi3 HAVE___ASHRDI3) - check_function_exists(__divdi3 HAVE___DIVDI3) - check_function_exists(__fixdfdi HAVE___FIXDFDI) - check_function_exists(__fixsfdi HAVE___FIXSFDI) - check_function_exists(__floatdidf HAVE___FLOATDIDF) - check_function_exists(__lshrdi3 HAVE___LSHRDI3) - check_function_exists(__moddi3 HAVE___MODDI3) - check_function_exists(__udivdi3 HAVE___UDIVDI3) - check_function_exists(__umoddi3 HAVE___UMODDI3) - - check_function_exists(__main HAVE___MAIN) - check_function_exists(__cmpdi2 HAVE___CMPDI2) -endif() -if( HAVE_ARGZ_H ) - check_symbol_exists(argz_append argz.h HAVE_ARGZ_APPEND) - check_symbol_exists(argz_create_sep argz.h HAVE_ARGZ_CREATE_SEP) - check_symbol_exists(argz_insert argz.h HAVE_ARGZ_INSERT) - check_symbol_exists(argz_next argz.h HAVE_ARGZ_NEXT) - check_symbol_exists(argz_stringify argz.h HAVE_ARGZ_STRINGIFY) -endif() -if( HAVE_DLFCN_H ) - if( HAVE_LIBDL ) - list(APPEND CMAKE_REQUIRED_LIBRARIES dl) - endif() - check_symbol_exists(dlerror dlfcn.h HAVE_DLERROR) - check_symbol_exists(dlopen dlfcn.h HAVE_DLOPEN) - if( HAVE_LIBDL ) - list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES dl) - endif() -endif() - -#check_symbol_exists(__GLIBC__ stdio.h LLVM_USING_GLIBC) -#if( LLVM_USING_GLIBC ) -# add_llvm_definitions( -D_GNU_SOURCE ) -#endif() - -set(headers "") -if (HAVE_SYS_TYPES_H) - set(headers ${headers} "sys/types.h") -endif() - -if (HAVE_INTTYPES_H) - set(headers ${headers} "inttypes.h") -endif() - -if (HAVE_STDINT_H) - set(headers ${headers} "stdint.h") -endif() - -check_type_exists(int64_t "${headers}" HAVE_INT64_T) -check_type_exists(uint64_t "${headers}" HAVE_UINT64_T) -check_type_exists(u_int64_t "${headers}" HAVE_U_INT64_T) -check_type_exists(error_t errno.h HAVE_ERROR_T) - -# available programs checks -#function(llvm_find_program name) -# string(TOUPPER ${name} NAME) -# string(REGEX REPLACE "\\." "_" NAME ${NAME}) -# find_program(LLVM_PATH_${NAME} ${name}) -# mark_as_advanced(LLVM_PATH_${NAME}) -# if(LLVM_PATH_${NAME}) -# set(HAVE_${NAME} 1 CACHE INTERNAL "Is ${name} available ?") -# mark_as_advanced(HAVE_${NAME}) -# else(LLVM_PATH_${NAME}) -# set(HAVE_${NAME} "" CACHE INTERNAL "Is ${name} available ?") -# endif(LLVM_PATH_${NAME}) -#endfunction() -# -#llvm_find_program(gv) -#llvm_find_program(circo) -#llvm_find_program(twopi) -#llvm_find_program(neato) -#llvm_find_program(fdp) -#llvm_find_program(dot) -#llvm_find_program(dotty) -#llvm_find_program(xdot.py) -# -#if( LLVM_ENABLE_FFI ) -# find_path(FFI_INCLUDE_PATH ffi.h PATHS ${FFI_INCLUDE_DIR}) -# if( FFI_INCLUDE_PATH ) -# set(FFI_HEADER ffi.h CACHE INTERNAL "") -# set(HAVE_FFI_H 1 CACHE INTERNAL "") -# else() -# find_path(FFI_INCLUDE_PATH ffi/ffi.h PATHS ${FFI_INCLUDE_DIR}) -# if( FFI_INCLUDE_PATH ) -# set(FFI_HEADER ffi/ffi.h CACHE INTERNAL "") -# set(HAVE_FFI_FFI_H 1 CACHE INTERNAL "") -# endif() -# endif() -# -# if( NOT FFI_HEADER ) -# message(FATAL_ERROR "libffi includes are not found.") -# endif() -# -# find_library(FFI_LIBRARY_PATH ffi PATHS ${FFI_LIBRARY_DIR}) -# if( NOT FFI_LIBRARY_PATH ) -# message(FATAL_ERROR "libffi is not found.") -# endif() -# -# list(APPEND CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH}) -# list(APPEND CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH}) -# check_symbol_exists(ffi_call ${FFI_HEADER} HAVE_FFI_CALL) -# list(REMOVE_ITEM CMAKE_REQUIRED_INCLUDES ${FFI_INCLUDE_PATH}) -# list(REMOVE_ITEM CMAKE_REQUIRED_LIBRARIES ${FFI_LIBRARY_PATH}) -#else() -# unset(HAVE_FFI_FFI_H CACHE) -# unset(HAVE_FFI_H CACHE) -# unset(HAVE_FFI_CALL CACHE) -#endif( LLVM_ENABLE_FFI ) -# -# Define LLVM_HAS_ATOMICS if gcc or MSVC atomic builtins are supported. -#include(CheckAtomic) -# -#if( LLVM_ENABLE_PIC ) -# set(ENABLE_PIC 1) -#else() -# set(ENABLE_PIC 0) -#endif() -# -#include(CheckCXXCompilerFlag) -# -#check_cxx_compiler_flag("-Wno-variadic-macros" SUPPORTS_NO_VARIADIC_MACROS_FLAG) -# -#include(GetTargetTriple) -#get_target_triple(LLVM_HOSTTRIPLE) -# -## FIXME: We don't distinguish the target and the host. :( -#set(TARGET_TRIPLE "${LLVM_HOSTTRIPLE}") -# -## Determine the native architecture. -#string(TOLOWER "${LLVM_TARGET_ARCH}" LLVM_NATIVE_ARCH) -#if( LLVM_NATIVE_ARCH STREQUAL "host" ) -# string(REGEX MATCH "^[^-]*" LLVM_NATIVE_ARCH ${LLVM_HOSTTRIPLE}) -#endif () -# -#if (LLVM_NATIVE_ARCH MATCHES "i[2-6]86") -# set(LLVM_NATIVE_ARCH X86) -#elseif (LLVM_NATIVE_ARCH STREQUAL "x86") -# set(LLVM_NATIVE_ARCH X86) -#elseif (LLVM_NATIVE_ARCH STREQUAL "amd64") -# set(LLVM_NATIVE_ARCH X86) -#elseif (LLVM_NATIVE_ARCH STREQUAL "x86_64") -# set(LLVM_NATIVE_ARCH X86) -#elseif (LLVM_NATIVE_ARCH MATCHES "sparc") -# set(LLVM_NATIVE_ARCH Sparc) -#elseif (LLVM_NATIVE_ARCH MATCHES "powerpc") -# set(LLVM_NATIVE_ARCH PowerPC) -#elseif (LLVM_NATIVE_ARCH MATCHES "alpha") -# set(LLVM_NATIVE_ARCH Alpha) -#elseif (LLVM_NATIVE_ARCH MATCHES "arm") -# set(LLVM_NATIVE_ARCH ARM) -#elseif (LLVM_NATIVE_ARCH MATCHES "mips") -# set(LLVM_NATIVE_ARCH Mips) -#elseif (LLVM_NATIVE_ARCH MATCHES "xcore") -# set(LLVM_NATIVE_ARCH XCore) -#elseif (LLVM_NATIVE_ARCH MATCHES "msp430") -# set(LLVM_NATIVE_ARCH MSP430) -#else () -# message(FATAL_ERROR "Unknown architecture ${LLVM_NATIVE_ARCH}") -#endif () -# -#list(FIND LLVM_TARGETS_TO_BUILD ${LLVM_NATIVE_ARCH} NATIVE_ARCH_IDX) -#if (NATIVE_ARCH_IDX EQUAL -1) -# message(STATUS -# "Native target ${LLVM_NATIVE_ARCH} is not selected; lli will not JIT code") -#else () -# message(STATUS "Native target architecture is ${LLVM_NATIVE_ARCH}") -# set(LLVM_NATIVE_TARGET LLVMInitialize${LLVM_NATIVE_ARCH}Target) -# set(LLVM_NATIVE_TARGETINFO LLVMInitialize${LLVM_NATIVE_ARCH}TargetInfo) -# set(LLVM_NATIVE_TARGETMC LLVMInitialize${LLVM_NATIVE_ARCH}TargetMC) -# set(LLVM_NATIVE_ASMPRINTER LLVMInitialize${LLVM_NATIVE_ARCH}AsmPrinter) -#endif () -# -#if( MINGW ) -# set(HAVE_LIBIMAGEHLP 1) -# set(HAVE_LIBPSAPI 1) -# # TODO: Check existence of libraries. -# # include(CheckLibraryExists) -# # CHECK_LIBRARY_EXISTS(imagehlp ??? . HAVE_LIBIMAGEHLP) -#endif( MINGW ) -# -#if( MSVC ) -# set(error_t int) -# set(LTDL_SHLIBPATH_VAR "PATH") -# set(LTDL_SYSSEARCHPATH "") -# set(LTDL_DLOPEN_DEPLIBS 1) -# set(SHLIBEXT ".lib") -# set(LTDL_OBJDIR "_libs") -# set(HAVE_STRTOLL 1) -# set(strtoll "_strtoi64") -# set(strtoull "_strtoui64") -# set(stricmp "_stricmp") -# set(strdup "_strdup") -#else( MSVC ) -# set(LTDL_SHLIBPATH_VAR "LD_LIBRARY_PATH") -# set(LTDL_SYSSEARCHPATH "") # TODO -# set(LTDL_DLOPEN_DEPLIBS 0) # TODO -#endif( MSVC ) -# -#if( PURE_WINDOWS ) -# CHECK_CXX_SOURCE_COMPILES(" -# #include -# #include -# extern \"C\" void foo(PENUMLOADED_MODULES_CALLBACK); -# extern \"C\" void foo(BOOL(CALLBACK*)(PCSTR,ULONG_PTR,ULONG,PVOID)); -# int main(){return 0;}" -# HAVE_ELMCB_PCSTR) -# if( HAVE_ELMCB_PCSTR ) -# set(WIN32_ELMCB_PCSTR "PCSTR") -# else() -# set(WIN32_ELMCB_PCSTR "PSTR") -# endif() -#endif( PURE_WINDOWS ) -# -## FIXME: Signal handler return type, currently hardcoded to 'void' -#set(RETSIGTYPE void) - -configure_file( - ${CMAKE_CURRENT_SOURCE_DIR}/include/llvm/Support/DataTypes.h.cmake - ${CMAKE_CURRENT_BINARY_DIR}/include/llvm/Support/DataTypes.h) - -#configure_file( -# ${CMAKE_CURRENT_SOURCE_DIR}/include/llvm/Config/config.h.cmake -# ${CMAKE_CURRENT_BINARY_DIR}/include/llvm/Config/config.h) - -include_directories(${CMAKE_CURRENT_BINARY_DIR}/include) diff --git a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-clang.cmake b/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-clang.cmake deleted file mode 100644 index d540b7b..0000000 --- a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-clang.cmake +++ /dev/null @@ -1,9 +0,0 @@ -set(CMAKE_SYSTEM_NAME BlueGeneQ-static) -set(CMAKE_SYSTEM_VERSION 1) -set(CMAKE_C_COMPILER bgclang) -set(CMAKE_CXX_COMPILER bgclang++11) -set(CMAKE_FIND_ROOT_PATH) -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -set(USE_BGQ on) diff --git a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-tryrunresults.cmake b/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-tryrunresults.cmake deleted file mode 100644 index 5a54548..0000000 --- a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-tryrunresults.cmake +++ /dev/null @@ -1,22 +0,0 @@ -# This file was generated by CMake because it detected TRY_RUN() commands -# in crosscompiling mode. It will be overwritten by the next CMake run. -# Copy it to a safe location, set the variables to appropriate values -# and use it then to preset the CMake cache (using -C). - - -# HAVE_HUGEPAGES_EXITCODE -# indicates whether the executable would have been able to run on its -# target platform. If so, set HAVE_HUGEPAGES_EXITCODE to -# the exit code (in many cases 0 for success), otherwise enter "FAILED_TO_RUN". -# The HAVE_HUGEPAGES_COMPILED variable holds the build result for this TRY_RUN(). -# -# Source file : /g/g14/nguyen91/build/default/CMakeFiles/CMakeTmp/src.c -# Executable : /g/g14/nguyen91/build/default/CMakeFiles/cmTryCompileExec124295678-HAVE_HUGEPAGES_EXITCODE -# Run arguments : -# Called from: [2] /g/g14/nguyen91/w/GaloisDefault/cmake/Modules/CheckHugePages.cmake -# [1] /g/g14/nguyen91/w/GaloisDefault/CMakeLists.txt - -SET( HAVE_HUGEPAGES_EXITCODE - 0 - CACHE STRING "Result from TRY_RUN" FORCE) - diff --git a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-xlc.cmake b/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-xlc.cmake deleted file mode 100644 index cc0a0d6..0000000 --- a/maxflow/galois/cmake/Toolchain/ppc64-bgq-linux-xlc.cmake +++ /dev/null @@ -1,10 +0,0 @@ -set(CMAKE_SYSTEM_NAME BlueGeneQ-static) -set(CMAKE_SYSTEM_VERSION 1) -set(CMAKE_C_COMPILER bgxlc_r) -set(CMAKE_CXX_COMPILER bgxlc++_r) -set(CMAKE_FIND_ROOT_PATH) -set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) -set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) -set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -set(USE_BGQ on) -set(GALOIS_USE_CXX11_COMPAT on) diff --git a/maxflow/galois/include/Galois/Accumulator.h b/maxflow/galois/include/Galois/Accumulator.h deleted file mode 100644 index 7bd0c63..0000000 --- a/maxflow/galois/include/Galois/Accumulator.h +++ /dev/null @@ -1,307 +0,0 @@ -/** Accumulator type -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2011, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author Andrew Lenharth - */ - -#ifndef GALOIS_ACCUMULATOR_H -#define GALOIS_ACCUMULATOR_H - -#include "Galois/Runtime/PerThreadStorage.h" - -#include - -namespace Galois { - -/** - * GReducible stores per thread values of a variable of type T - * - * The final value is obtained by performing a reduction on per thread values - * using the provided binary functor BinFunc. BinFunc updates values in place - * and conforms to: - * - * void operator()(T& lhs, const T& rhs) - * - * Assumes that the initial value yields the identity element for binary functor. - */ -template -class GReducible { -protected: - BinFunc m_func; - Galois::Runtime::PerThreadStorage m_data; - const T m_initial; - - explicit GReducible(const BinFunc& f, const T& initial): m_func(f), m_initial(initial) { } - -public: - /** - * @param f the binary functor acting as the reduction operator - */ - explicit GReducible(const BinFunc& f = BinFunc()): m_func(f), m_initial(T()) { } - - /** - * Updates the thread local value by applying the reduction operator to - * current and newly provided value - */ - void update(const T& rhs) { - T& lhs = *m_data.getLocal(); - m_func(lhs, rhs); - } - - /** - * Returns the final reduction value. Only valid outside the parallel region. - */ - T& reduce() { - T& d0 = *m_data.getLocal(); - for (unsigned int i = 1; i < m_data.size(); ++i) { - T& d = *m_data.getRemote(i); - m_func(d0, d); - d = m_initial; - } - return d0; - } - - /** - * reset value - */ - void reset() { - for (unsigned int i = 0; i < m_data.size(); ++i) { - *m_data.getRemote(i) = m_initial; - } - } -}; - - -//! Operator form of max -template -struct gmax { - const T& operator()(const T& lhs, const T& rhs) const { - return std::max(lhs, rhs); - } -}; - -//! Operator form of min -template -struct gmin { - const T& operator()(const T& lhs, const T& rhs) const { - return std::min(lhs, rhs); - } -}; - -//! Turns binary functions over values into functions over references -//! -//! T operator()(const T& a, const T& b) => -//! void operator()(T& a, const T& b) -template -struct ReduceAssignWrap { - BinFunc fn; - ReduceAssignWrap(const BinFunc& f = BinFunc()): fn(f) { } - template - void operator()(T& lhs, const T& rhs) const { - lhs = fn(lhs, rhs); - } -}; - -//! Turns binary functions over item references into functions over vectors of items -//! -//! void operator()(T& a, const T& b) => -//! void operator()(std::vector& a, const std::vector& b) -template -struct ReduceVectorWrap { - BinFunc fn; - ReduceVectorWrap(const BinFunc& f = BinFunc()): fn(f) { } - template - void operator()(T& lhs, const T& rhs) const { - if (lhs.size() < rhs.size()) - lhs.resize(rhs.size()); - typename T::iterator ii = lhs.begin(); - for (typename T::const_iterator jj = rhs.begin(), ej = rhs.end(); jj != ej; ++ii, ++jj) { - fn(*ii, *jj); - } - } -}; - -//! Turns binary functions over item (value) references into functions over maps of items -//! -//! void operator()(V& a, const V& b) => -//! void operator()(std::map& a, const std::map& b) -template -struct ReduceMapWrap { - BinFunc fn; - ReduceMapWrap(const BinFunc& f = BinFunc()): fn(f) { } - template - void operator()(T& lhs, const T& rhs) const { - for (typename T::const_iterator jj = rhs.begin(), ej = rhs.end(); jj != ej; ++jj) { - fn(lhs[jj->first], jj->second); - } - } -}; - -//! Turns functions over elements of a range into functions over collections -//! -//! void operator()(T a) => -//! void operator()(Collection& a, const Collection& b) -template class AdaptorTy> -struct ReduceCollectionWrap { - typedef typename CollectionTy::value_type value_type; - - void operator()(CollectionTy& lhs, const CollectionTy& rhs) { - AdaptorTy adapt(lhs, lhs.begin()); - std::copy(rhs.begin(), rhs.end(), adapt); - } - - void operator()(CollectionTy& lhs, const value_type& rhs) { - AdaptorTy adapt(lhs, lhs.begin()); - *adapt = rhs; - } -}; - -/** - * Simplification of GReducible where BinFunc calculates results by - * value, i.e., BinFunc conforms to: - * - * T operator()(const T& a, const T& b); - */ -template -class GSimpleReducible: public GReducible > { - typedef GReducible > base_type; -public: - explicit GSimpleReducible(const BinFunc& func = BinFunc()): base_type(func) { } -}; - -//! Accumulator for T where accumulation is sum -template -class GAccumulator: public GReducible > > { - typedef GReducible > > base_type; - -public: - GAccumulator& operator+=(const T& rhs) { - base_type::update(rhs); - return *this; - } - - GAccumulator& operator-=(const T& rhs) { - base_type::update(-rhs); - return *this; - } - - T unsafeRead() const { - T d0 = *this->m_data.getRemote(0); - for (unsigned int i = 1; i < this->m_data.size(); ++i) { - const T& d = *this->m_data.getRemote(i); - this->m_func(d0, d); - } - return d0; - } -}; - -//! General accumulator for collections following STL interface where -//! accumulate means collection union. Since union/append/push_back are -//! not standard among collections, the AdaptorTy template parameter -//! allows users to provide an iterator adaptor along the lines of -//! std::inserter or std::back_inserter. -template class AdaptorTy> -class GCollectionAccumulator: public GReducible > { - typedef ReduceCollectionWrap Func; - typedef GReducible base_type; - typedef typename CollectionTy::value_type value_type; - - Func func; - -public: - void update(const value_type& rhs) { - CollectionTy& v = *this->m_data.getLocal(); - func(v, rhs); - } -}; - -//! Accumulator for set where accumulation is union -template -class GSetAccumulator: public GCollectionAccumulator { }; - -//! Accumulator for vector where accumulation is concatenation -template -class GVectorAccumulator: public GCollectionAccumulator { }; - -//! Accumulator for vector where a vector is treated as a map and accumulate -//! does element-wise addition among all entries -template -class GVectorElementAccumulator: public GReducible > > > { - typedef ReduceAssignWrap > ElementFunc; - typedef GReducible > base_type; - typedef typename VectorTy::value_type value_type; - - ElementFunc func; - -public: - - void resize(size_t s) { - for (int i = 0; i < this->m_data.size(); ++i) - this->m_data.getRemote(i)->resize(s); - } - - VectorTy& getLocal() { - return *this->m_data.getLocal(); - } - - void update(size_t index, const value_type& rhs) { - VectorTy& v = *this->m_data.getLocal(); - if (v.size() <= index) - v.resize(index + 1); - func(v[index], rhs); - } -}; - -//! Accumulator for map where accumulate does element-wise addition among -//! all entries -template -class GMapElementAccumulator: public GReducible > > > { - typedef ReduceAssignWrap > ElementFunc; - typedef GReducible > base_type; - typedef typename MapTy::mapped_type mapped_type; - typedef typename MapTy::key_type key_type; - - ElementFunc func; - -public: - void update(const key_type& index, const mapped_type& rhs) { - MapTy& v = *this->m_data.getLocal(); - func(v[index], rhs); - } -}; - -//! Accumulator for T where accumulation is max -template -class GReduceMax: public GReducible > > { - typedef GReducible > > base_type; -public: - GReduceMax(): base_type(ReduceAssignWrap >(), std::numeric_limits::min()) { } -}; - -//! Accumulator for T where accumulation is min -template -class GReduceMin: public GReducible > > { - typedef GReducible > > base_type; -public: - GReduceMin(): base_type(ReduceAssignWrap >(), std::numeric_limits::max()) { } -}; - -} -#endif diff --git a/maxflow/galois/include/Galois/Atomic.h b/maxflow/galois/include/Galois/Atomic.h deleted file mode 100644 index 3489d1f..0000000 --- a/maxflow/galois/include/Galois/Atomic.h +++ /dev/null @@ -1,265 +0,0 @@ -/** Atomic Types type -*- C++ -*- - * @file - * @section License - * - * Galois, a framework to exploit amorphous data-parallelism in irregular - * programs. - * - * Copyright (C) 2012, The University of Texas at Austin. All rights reserved. - * UNIVERSITY EXPRESSLY DISCLAIMS ANY AND ALL WARRANTIES CONCERNING THIS - * SOFTWARE AND DOCUMENTATION, INCLUDING ANY WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR ANY PARTICULAR PURPOSE, NON-INFRINGEMENT AND WARRANTIES OF - * PERFORMANCE, AND ANY WARRANTY THAT MIGHT OTHERWISE ARISE FROM COURSE OF - * DEALING OR USAGE OF TRADE. NO WARRANTY IS EITHER EXPRESS OR IMPLIED WITH - * RESPECT TO THE USE OF THE SOFTWARE OR DOCUMENTATION. Under no circumstances - * shall University be liable for incidental, special, indirect, direct or - * consequential damages or loss of profits, interruption of business, or - * related expenses which may arise from use of Software or Documentation, - * including but not limited to those resulting from defects in Software and/or - * Documentation, or loss or inaccuracy of data of any kind. - * - * @author M. Amber Hassaan - */ - -#ifndef GALOIS_ATOMIC_H -#define GALOIS_ATOMIC_H - -#include "Galois/Runtime/ll/CacheLineStorage.h" - -#include - -namespace Galois { - -namespace AtomicImpl { -/** - * Common implementation. - */ -template class W> -class GAtomicImpl { - // Galois::Runtime::LL::CacheLineStorage val; - W val; - -public: - //! Initialize with a value - explicit GAtomicImpl(const T& i): val(i) {} - //! default constructor - GAtomicImpl() {} - - //! atomic add and fetch - T operator+=(const T& rhs) { - return __sync_add_and_fetch(&val.data, rhs); - } - //! atomic sub and fetch - T operator-=(const T& rhs) { - return __sync_sub_and_fetch(&(val.data), rhs); - } - //! atomic increment and fetch - T operator++() { - return __sync_add_and_fetch(&(val.data), 1); - } - //! atomic fetch and increment - T operator++(int) { - return __sync_fetch_and_add(&(val.data), 1); - } - //! atomic decrement and fetch - T operator--() { - return __sync_sub_and_fetch(&(val.data), 1); - } - //! atomic fetch and decrement - T operator--(int) { - return __sync_fetch_and_sub(&(val.data), 1); - } - //! conversion operator to base data type - operator T() const { - return val.data; - } - //! assign from underlying type - T& operator=(const T& i) { - return val.data = i; - } - //! assignment operator - T& operator=(const GAtomicImpl& i) { - return val.data = i.val.data; - } - //! direct compare and swap - bool cas (const T& expected, const T& updated) { - if (val.data != expected) { return false; } -#if defined(__INTEL_COMPILER) - return __sync_bool_compare_and_swap( - &val.data, - *reinterpret_cast(&expected), - *reinterpret_cast(&updated)); -#else - return __sync_bool_compare_and_swap (&val.data, expected, updated); -#endif - } -}; - -//! Basic atomic -template class W> -class GAtomicBase: public GAtomicImpl { - typedef GAtomicImpl Super_ty; - -public: - //! Initialize with a value - explicit GAtomicBase(const T& i): Super_ty (i) {} - - //! default constructor - GAtomicBase(): Super_ty () {} - - T& operator=(const GAtomicBase& that) { - return Super_ty::operator=(that); - } - - T& operator=(const T& that) { - return Super_ty::operator=(that); - } -}; - -//! Specialization for pointers -template class W> -class GAtomicBase: public GAtomicImpl { - typedef GAtomicImpl Super_ty; - -public: - typedef typename std::iterator_traits::difference_type difference_type; - - GAtomicBase(): Super_ty() {} - - GAtomicBase(T* i): Super_ty(i) {} - - T*& operator=(const GAtomicBase& that) { - return Super_ty::operator=(that); - } - - T*& operator=(T* that) { - return Super_ty::operator=(that); - } - - T* operator+=(const difference_type& rhs) { - return __sync_add_and_fetch(&Super_ty::val.data, rhs); - } - - T* operator-=(const difference_type& rhs) { - return __sync_sub_and_fetch(&Super_ty::val.data, rhs); - } -}; - -//! Specialization for const pointers -template class W> -class GAtomicBase: public GAtomicImpl { - typedef GAtomicImpl Super_ty; - -public: - typedef typename std::iterator_traits::difference_type difference_type; - - GAtomicBase(): Super_ty() {} - - GAtomicBase(const T* i): Super_ty(i) {} - - const T*& operator=(const GAtomicBase& that) { - return Super_ty::operator=(that); - } - - const T*& operator=(const T* that) { - return Super_ty::operator=(that); - } - - const T* operator+=(const difference_type& rhs) { - return __sync_add_and_fetch(&Super_ty::val.data, rhs); - } - - const T* operator-=(const difference_type& rhs) { - return __sync_sub_and_fetch(&Super_ty::val.data, rhs); - } -}; - -//! Specialization for bools -template