# Copyright (c) 2010-2021 Intel Corporation. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of Intel Corporation nor the names of its contributors # may be used to endorse or promote products derived from this software # without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # Intel Corporation is the author of the Materials, and requests that all # problem reports or change requests be submitted to it directly. # # Intel(R) CPU Runtime for OpenCL(TM) Applications Configuration file # ======================================================================= # # Notes: # ----------- # For size with units, accepted units are: "GB", "G", "MB", "M", "KB", "K", "B" # # Parameters: # ----------- # CL_CONFIG_LOG_FILE [string] - set the full path of the log file # CL_CONFIG_DEVICES [string1; string2, string3 | string4 ...] - list of available devices. # First device is a default device. # # CL_CONFIG_DUMP_BIN [bool] - # Control whether object binary of OpenCL program is dumped. # Destination is the current directory. # CL_CONFIG_DUMP_DISASSEMBLY [bool] - # Control whether disassembled assembly of OpenCL program is dumped. # Destination is the current directory. # # CL_CONFIG_USE_ITT_API [bool] - determine whether GPA/ITT traces collection is active or not # CL_CONFIG_USE_VECTORIZER [bool] - determine whether auto-vectorization module is active or not # CL_CONFIG_USE_VTUNE [bool] - determine whether VTune AmplifierXE instrumentation is active or not # CL_CONFIG_USE_TRAPPING [bool] - determine whether CPU workers trapping is active or not # CL_CONFIG_USE_FAST_RELAXED_MATH [bool] - determine whether build kernels with -cl-fast-relaxed-math option # # CL_CONFIG_CPU_FORCE_MAX_WORK_GROUP_SIZE [size] - # examples: 16384, 131072, 67108864 # force CPU to work with specified maximum work-group size. # Out of bounds values are clamped to [8192, 67108864]. # # CL_CONFIG_CPU_FORCE_WORK_GROUP_SIZE [size or array of size] - # examples: export CL_CONFIG_CPU_FORCE_WORK_GROUP_SIZE=128 # export CL_CONFIG_CPU_FORCE_WORK_GROUP_SIZE=128,1,1 # Force CPU to work with specified workgroup size. If this env is set, # local_work_size of clEnqueueNDRangeKernel is ignored. # NOTE: If array size is larger than work_dim of clEnqueueNDRangeKernel, # only the first work_dim values of the array are used. # If array size is smaller than work_dim, workgroup size of higher # dim will be set to 1. # clEnqueueNDRangeKernel returns CL_INVALID_WORK_GROUP_SIZE if # * any of the first work_dim values is negative. # * or any of the first work_dim values is larger than global_work_size. # * or maximum of the first work_dim values is larger than query result of # CL_DEVICE_MAX_WORK_GROUP_SIZE. # # CL_CONFIG_CPU_FORCE_GLOBAL_MEM_SIZE [size with units] - # examples: 2GB, 512MB, 65536KB, 16777216B # force CPU to work with specified global memory size, this will also affect max memory # allocation size unless CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE is used too # # CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE [size with units] - # examples: 2GB, 512MB, 65536KB, 16777216B # force CPU to work with specified max memory allocation size. # # CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE [size with units] - # examples: 8MB, 32KB (default), 8388608B # force CL_DEVICE_LOCAL_MEM_SIZE for CPU device to be the given value. # NOTE: Usage of this configuration option requires additional amount of # stack. You have to compile your application with enough size of # stack: OpenCL runtime requires at least 512KB + # value of CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE + # value of CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE. # # CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE [size with units] - # examples: 7648KB (default), 16MB # set CPU_DEV_MAX_WG_PRIVATE_SIZE for CPU device to be the given value. # NOTE: Usage of this configuration option requires additional amount of # stack. You have to compile your application with enough size of # stack: OpenCL runtime requires at least 512KB + # value of CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE + # value of CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE. # If your applicatoin fails with CL_OUT_OF_RESOURCES, consider # increasing CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE. # # CL_CONFIG_CPU_VECTORIZER_MODE [mode] # Control vectorization mode of the CPU compiler. Applies only then CL_CONFIG_USE_VECTORIZER = True. # 0: (default) Automonous vectorization. The compiler makes heuristic decisions whether to vectorize # each kernel, and if so to what vector length. # 1: No compiler vectorization. Explicit kernel vector data types are left intact. # The same as if CL_CONFIG_USE_VECTORIZER = False. # [4, 8, 16, 32, 64]: Disable heuristic and force vectorization to the specified length. # # NOTE: Some kernels do not support vectorization from functional aspect. Such kernels cannot be vectorized in any mode. # # CL_CONFIG_CPU_RT_LOOP_UNROLL_FACTOR [factor] # Control loop unrolling of loops with non-constant trip count. Out of bounds values are clamped to [1, 16]. # examples: 1, 2, 3, 16 # 1: no RT unrolling (default) # [2, 16]: unrolling factor # # CL_CONFIG_CPU_TBB_NUM_WORKERS [unsigned number] - # Control the number of TBB workers/threads. Work-groups can be executed # parallelly by CL_CONFIG_CPU_TBB_NUM_WORKERS threads. # Out of bounds values are clamped to [1, MAX_NUM]. # cpu device: MAX_NUM is the number of logical CPU cores. # Default value is MAX_NUM. # fpga-emu device: MAX_NUM is 256. Default value is 32. # NOTE: If CL_CONFIG_CPU_TBB_NUM_WORKERS is 1, work-groups are executed # sequentially by a single thread. # # CL_CONFIG_CPU_TARGET_ARCH [cpu target arch] - # examples: skx, core-avx2, corei7-avx, corei7 # force a SIMD instruction set used for OpenCL(TM) kernel compilation. # # CL_CONFIG_AUTO_MEMORY [bool] # True: (default, except with x86 32bit windows build) the stack size for # kernel execution will be allocated automatically and # CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE, # CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE should also be set as the max # private and local memory for WG execution. # False: the stack size for kernel execution is under control of # CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE and CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE. # NOTE: Support CPU and FPGA Emulator device. Usage of this configuration # option may also require compiling your application with enough # size of stack. # # CL_CONFIG_STACK_DEFAULT_SIZE # Applies only when CL_CONFIG_AUTO_MEMORY = True # examples: 4MB (default), 8MB, 16MB # This value is the default stack size for kernel execution. # If actual stack size is greater than this value, stack reallocation will occur. # # CL_CONFIG_STACK_EXTRA_SIZE # Applies only when CL_CONFIG_AUTO_MEMORY = True # examples: 512KB, 1MB (default), 2MB # This value is the extra stack size for execution of builtin and third-party functions. # # CL_CONFIG_CPU_STREAMING_ALWAYS [bool] - determine whether Non-Temporal instructions are used or not # # CL_CONFIG_CPU_EXPENSIVE_MEM_OPT [unsigned number] - # examples: 0 (default), 0x0001 # A bitmap indicating enabled expensive memory optimizations. These # optimizations may lead to more JIT compilation time, but give some # performance benefit. # Available bits: # 0: OpenCL address space alias analysis # # CL_CONFIG_ENABLE_PARALLEL_COPY [bool] # True: (default) parallel copy/fill is enabled. Memory copy/fill in # clEnqueueReadBuffer, clEnqueueWriteBuffer, clEnqueueCopyBuffer, # clEnqueueSVMMemcpy, clEnqueueMemcpyINTEL, clEnqueueSVMMemFill, # clEnqueueMemsetINTEL and clEnqueueMemFillINTEL is executed # parallelly using internal kernels. # False: parallel copy/fill is disabled. # # CL_CONFIG_TBB_VERSION [string] - # Define the TBB version used by task executor. # The full tbb library path is recorded in Windows registry key # under the location with this version number. It should be updated # when TBB library is uplifted to a new version. # # CL_CONFIG_TBB_DLL_PATH [string] - # Define the TBB DLL PATH used by task executor. # As a solution for Conda release channel. The tbb dll full path can be # added by this field. # It's only used on Windows. # #CL_CONFIG_LOG_FILE = C:\cl.log #CL_CONFIG_USER_LOGGER = stderr CL_CONFIG_DUMP_BIN = False CL_CONFIG_DUMP_DISASSEMBLY = False CL_CONFIG_USE_ITT_API = False CL_CONFIG_USE_VECTORIZER = True CL_CONFIG_USE_VTUNE = False CL_CONFIG_USE_TRAPPING = False CL_CONFIG_USE_FAST_RELAXED_MATH = False CL_CONFIG_CPU_STREAMING_ALWAYS = False CL_GPA_CONFIG_ENABLE_API_TRACING = False CL_GPA_CONFIG_ENABLE_CONTEXT_TRACING = True CL_GPA_CONFIG_SHOW_QUEUED_MARKER = True CL_GPA_CONFIG_SHOW_SUBMITTED_MARKER = False CL_GPA_CONFIG_SHOW_RUNNING_MARKER = False CL_GPA_CONFIG_SHOW_COMPLETED_MARKER = True #CL_CONFIG_AUTO_MEMORY = True #CL_CONFIG_STACK_DEFAULT_SIZE = 4MB #CL_CONFIG_STACK_EXTRA_SIZE = 1MB #CL_CONFIG_CPU_FORCE_GLOBAL_MEM_SIZE = 256MB #CL_CONFIG_CPU_FORCE_LOCAL_MEM_SIZE = 32KB #CL_CONFIG_CPU_FORCE_MAX_MEM_ALLOC_SIZE = 128MB #CL_CONFIG_CPU_FORCE_PRIVATE_MEM_SIZE = 7648KB #CL_CONFIG_CPU_RT_LOOP_UNROLL_FACTOR = 1 #CL_CONFIG_CPU_TBB_NUM_WORKERS = 32 #CL_CONFIG_CPU_VECTORIZER_MODE = 0 #CL_CONFIG_CPU_EXPENSIVE_MEM_OPT = 0 #CL_CONFIG_ENABLE_PARALLEL_COPY = True CL_CONFIG_TBB_VERSION = 2021.11.0 CL_CONFIG_DEVICES = cpu CL_CONFIG_TBB_DLL_PATH =