#ifndef OPENMM_CUDAKERNEL_H_
#define OPENMM_CUDAKERNEL_H_
/* -------------------------------------------------------------------------- *
* OpenMM *
* -------------------------------------------------------------------------- *
* This is part of the OpenMM molecular simulation toolkit originating from *
* Simbios, the NIH National Center for Physics-Based Simulation of *
* Biological Structures at Stanford, funded under the NIH Roadmap for *
* Medical Research, grant U54 GM072970. See https://simtk.org. *
* *
* Portions copyright (c) 2019 Stanford University and the Authors. *
* Authors: Peter Eastman *
* Contributors: *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU Lesser General Public License as published *
* by the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU Lesser General Public License for more details. *
* *
* You should have received a copy of the GNU Lesser General Public License *
* along with this program. If not, see . *
* -------------------------------------------------------------------------- */
#include "CudaArray.h"
#include "CudaContext.h"
#include
#include
namespace OpenMM {
/**
* This is the CUDA implementation of the ComputeKernelImpl interface.
*/
class CudaKernel : public ComputeKernelImpl {
public:
/**
* Create a new CudaKernel.
*
* @param context the context this kernel belongs to
* @param kernel the kernel to be invoked
* @param name the name of the kernel function
*/
CudaKernel(CudaContext& context, CUfunction kernel, const std::string& name);
/**
* Get the name of this kernel.
*/
std::string getName() const;
/**
* Execute this kernel.
*
* @param threads the maximum number of threads that should be used. Depending on the
* computing device, it may choose to use fewer threads than this number.
* @param blockSize the number of threads in each thread block. If this is omitted, a
* default size that is appropriate for the computing device is used.
*/
void execute(int threads, int blockSize=-1);
protected:
/**
* Add an argument to pass the kernel when it is invoked, where the value is a
* subclass of ArrayInterface.
*
* @param value the value to pass to the kernel
*/
void addArrayArg(ArrayInterface& value);
/**
* Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
*
* @param value a pointer to the argument value
* @param size the size of the value in bytes
*/
void addPrimitiveArg(const void* value, int size);
/**
* Add a placeholder for an argument without specifying its value.
*/
void addEmptyArg();
/**
* Add an argument to pass the kernel when it is invoked, where the value is a
* subclass of ArrayInterface.
*
* @param index the index of the argument to set
* @param value the value to pass to the kernel
*/
void setArrayArg(int index, ArrayInterface& value);
/**
* Add an argument to pass the kernel when it is invoked, where the value is a primitive type.
*
* @param index the index of the argument to set
* @param value a pointer to the argument value
* @param size the size of the value in bytes
*/
void setPrimitiveArg(int index, const void* value, int size);
private:
CudaContext& context;
CUfunction kernel;
std::string name;
std::vector primitiveArgs;
std::vector arrayArgs;
std::vector argPointers;
};
} // namespace OpenMM
#endif /*OPENMM_CUDAKERNEL_H_*/