1170 lines
48 KiB
C
1170 lines
48 KiB
C
/*
|
|
* Copyright 1993-2020 NVIDIA Corporation. All rights reserved.
|
|
*
|
|
* NOTICE TO LICENSEE:
|
|
*
|
|
* This source code and/or documentation ("Licensed Deliverables") are
|
|
* subject to NVIDIA intellectual property rights under U.S. and
|
|
* international Copyright laws.
|
|
*
|
|
* These Licensed Deliverables contained herein is PROPRIETARY and
|
|
* CONFIDENTIAL to NVIDIA and is being provided under the terms and
|
|
* conditions of a form of NVIDIA software license agreement by and
|
|
* between NVIDIA and Licensee ("License Agreement") or electronically
|
|
* accepted by Licensee. Notwithstanding any terms or conditions to
|
|
* the contrary in the License Agreement, reproduction or disclosure
|
|
* of the Licensed Deliverables to any third party without the express
|
|
* written consent of NVIDIA is prohibited.
|
|
*
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
|
|
* SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
|
|
* PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
|
|
* NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
|
|
* DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
|
|
* NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
* NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
|
|
* LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
|
|
* SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
|
|
* DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
|
* WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
|
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
|
* OF THESE LICENSED DELIVERABLES.
|
|
*
|
|
* U.S. Government End Users. These Licensed Deliverables are a
|
|
* "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
|
|
* 1995), consisting of "commercial computer software" and "commercial
|
|
* computer software documentation" as such terms are used in 48
|
|
* C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
|
|
* only as a commercial end item. Consistent with 48 C.F.R.12.212 and
|
|
* 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
|
|
* U.S. Government End Users acquire the Licensed Deliverables with
|
|
* only those rights set forth herein.
|
|
*
|
|
* Any use of the Licensed Deliverables in individual and commercial
|
|
* software must include, in the user documentation and internal
|
|
* comments to the code, the above Disclaimer and U.S. Government End
|
|
* Users Notice.
|
|
*/
|
|
|
|
/*
|
|
* cudnn_ops_infer : cuDNN's basic definitions and inference operations.
|
|
*/
|
|
|
|
#if !defined(CUDNN_OPS_INFER_H_)
|
|
#define CUDNN_OPS_INFER_H_
|
|
|
|
#include <cuda_runtime.h>
|
|
#include <stdint.h>
|
|
|
|
#include "cudnn_version.h"
|
|
|
|
/* These version numbers are autogenerated, do not edit manually. */
|
|
#define CUDNN_OPS_INFER_MAJOR 8
|
|
#define CUDNN_OPS_INFER_MINOR 1
|
|
#define CUDNN_OPS_INFER_PATCH 0
|
|
|
|
#if (CUDNN_OPS_INFER_MAJOR != CUDNN_MAJOR) || (CUDNN_OPS_INFER_MINOR != CUDNN_MINOR) || \
|
|
(CUDNN_OPS_INFER_PATCH != CUDNN_PATCHLEVEL)
|
|
#error Version mismatch in cuDNN OPS INFER!!!
|
|
#endif
|
|
|
|
#ifndef CUDNNWINAPI
|
|
#ifdef _WIN32
|
|
#define CUDNNWINAPI __stdcall
|
|
#else
|
|
#define CUDNNWINAPI
|
|
#endif
|
|
#endif
|
|
|
|
/* Warnings for deprecated API-s are enabled using the CUDNN_WARN_DEPRECATED macro */
|
|
#if defined(CUDNN_WARN_DEPRECATED) && (defined(__GNUC__) || defined(__clang__))
|
|
/* GCC, Intel C/C++, Cray C/C++, CLANG, IBM XL C/C++ little endian */
|
|
#define CUDNN_DEPRECATED __attribute__((deprecated))
|
|
#elif defined(CUDNN_WARN_DEPRECATED) && defined(_MSC_VER)
|
|
/* Microsoft Visual C++ */
|
|
#define CUDNN_DEPRECATED __declspec(deprecated)
|
|
#elif defined(CUDNN_WARN_DEPRECATED) && (__cplusplus >= 201402L)
|
|
/* C++14 compilers */
|
|
#define CUDNN_DEPRECATED [[deprecated]]
|
|
#else
|
|
/* No support for the deprecated attribute */
|
|
#define CUDNN_DEPRECATED
|
|
#endif
|
|
|
|
#if defined(__cplusplus)
|
|
extern "C" {
|
|
#endif
|
|
|
|
struct cudnnContext;
|
|
typedef struct cudnnContext *cudnnHandle_t;
|
|
|
|
size_t CUDNNWINAPI
|
|
cudnnGetVersion(void);
|
|
|
|
/* Returns CUDA Runtime version statically linked against cudnn */
|
|
size_t CUDNNWINAPI
|
|
cudnnGetCudartVersion(void);
|
|
|
|
/*
|
|
* CUDNN return codes
|
|
*/
|
|
typedef enum {
|
|
CUDNN_STATUS_SUCCESS = 0,
|
|
CUDNN_STATUS_NOT_INITIALIZED = 1,
|
|
CUDNN_STATUS_ALLOC_FAILED = 2,
|
|
CUDNN_STATUS_BAD_PARAM = 3,
|
|
CUDNN_STATUS_INTERNAL_ERROR = 4,
|
|
CUDNN_STATUS_INVALID_VALUE = 5,
|
|
CUDNN_STATUS_ARCH_MISMATCH = 6,
|
|
CUDNN_STATUS_MAPPING_ERROR = 7,
|
|
CUDNN_STATUS_EXECUTION_FAILED = 8,
|
|
CUDNN_STATUS_NOT_SUPPORTED = 9,
|
|
CUDNN_STATUS_LICENSE_ERROR = 10,
|
|
CUDNN_STATUS_RUNTIME_PREREQUISITE_MISSING = 11,
|
|
CUDNN_STATUS_RUNTIME_IN_PROGRESS = 12,
|
|
CUDNN_STATUS_RUNTIME_FP_OVERFLOW = 13,
|
|
CUDNN_STATUS_VERSION_MISMATCH = 14,
|
|
} cudnnStatus_t;
|
|
|
|
/* human-readable error messages */
|
|
const char *CUDNNWINAPI
|
|
cudnnGetErrorString(cudnnStatus_t status);
|
|
|
|
/* Forward definition in this version only */
|
|
typedef struct cudnnRuntimeTag_t cudnnRuntimeTag_t;
|
|
|
|
typedef enum {
|
|
CUDNN_ERRQUERY_RAWCODE = 0,
|
|
CUDNN_ERRQUERY_NONBLOCKING = 1,
|
|
CUDNN_ERRQUERY_BLOCKING = 2,
|
|
} cudnnErrQueryMode_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnQueryRuntimeError(cudnnHandle_t handle, cudnnStatus_t *rstatus, cudnnErrQueryMode_t mode, cudnnRuntimeTag_t *tag);
|
|
|
|
#ifndef __LIBRARY_TYPES_H__
|
|
|
|
typedef enum libraryPropertyType_t { MAJOR_VERSION, MINOR_VERSION, PATCH_LEVEL } libraryPropertyType;
|
|
|
|
#endif
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetProperty(libraryPropertyType type, int *value);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreate(cudnnHandle_t *handle);
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroy(cudnnHandle_t handle);
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetStream(cudnnHandle_t handle, cudaStream_t streamId);
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetStream(cudnnHandle_t handle, cudaStream_t *streamId);
|
|
|
|
/* Data structures to represent Image/Filter and the Neural Network Layer */
|
|
typedef struct cudnnTensorStruct *cudnnTensorDescriptor_t;
|
|
typedef struct cudnnPoolingStruct *cudnnPoolingDescriptor_t;
|
|
typedef struct cudnnFilterStruct *cudnnFilterDescriptor_t;
|
|
typedef struct cudnnLRNStruct *cudnnLRNDescriptor_t;
|
|
typedef struct cudnnActivationStruct *cudnnActivationDescriptor_t;
|
|
typedef struct cudnnSpatialTransformerStruct *cudnnSpatialTransformerDescriptor_t;
|
|
typedef struct cudnnOpTensorStruct *cudnnOpTensorDescriptor_t;
|
|
typedef struct cudnnReduceTensorStruct *cudnnReduceTensorDescriptor_t;
|
|
typedef struct cudnnCTCLossStruct *cudnnCTCLossDescriptor_t;
|
|
typedef struct cudnnTensorTransformStruct *cudnnTensorTransformDescriptor_t;
|
|
/*
|
|
* CUDNN data type
|
|
*/
|
|
typedef enum {
|
|
CUDNN_DATA_FLOAT = 0,
|
|
CUDNN_DATA_DOUBLE = 1,
|
|
CUDNN_DATA_HALF = 2,
|
|
CUDNN_DATA_INT8 = 3,
|
|
CUDNN_DATA_INT32 = 4,
|
|
CUDNN_DATA_INT8x4 = 5,
|
|
CUDNN_DATA_UINT8 = 6,
|
|
CUDNN_DATA_UINT8x4 = 7,
|
|
CUDNN_DATA_INT8x32 = 8,
|
|
CUDNN_DATA_BFLOAT16 = 9,
|
|
CUDNN_DATA_INT64 = 10,
|
|
} cudnnDataType_t;
|
|
|
|
/*
|
|
* CUDNN math type
|
|
*/
|
|
typedef enum {
|
|
CUDNN_DEFAULT_MATH = 0,
|
|
CUDNN_TENSOR_OP_MATH = 1,
|
|
CUDNN_TENSOR_OP_MATH_ALLOW_CONVERSION = 2,
|
|
CUDNN_FMA_MATH = 3,
|
|
} cudnnMathType_t;
|
|
|
|
/*
|
|
* CUDNN propagate Nan
|
|
*/
|
|
typedef enum {
|
|
CUDNN_NOT_PROPAGATE_NAN = 0,
|
|
CUDNN_PROPAGATE_NAN = 1,
|
|
} cudnnNanPropagation_t;
|
|
|
|
/*
|
|
* CUDNN Determinism
|
|
*/
|
|
typedef enum {
|
|
CUDNN_NON_DETERMINISTIC = 0,
|
|
CUDNN_DETERMINISTIC = 1,
|
|
} cudnnDeterminism_t;
|
|
|
|
/* Maximum supported number of tensor dimensions */
|
|
#define CUDNN_DIM_MAX 8
|
|
|
|
/* Create an instance of a generic Tensor descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateTensorDescriptor(cudnnTensorDescriptor_t *tensorDesc);
|
|
|
|
typedef enum {
|
|
CUDNN_TENSOR_NCHW = 0, /* row major (wStride = 1, hStride = w) */
|
|
CUDNN_TENSOR_NHWC = 1, /* feature maps interleaved ( cStride = 1 )*/
|
|
CUDNN_TENSOR_NCHW_VECT_C = 2, /* each image point is vector of element of C, vector length in data type */
|
|
} cudnnTensorFormat_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensor4dDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
|
cudnnTensorFormat_t format,
|
|
cudnnDataType_t dataType, /* image data type */
|
|
int n, /* number of inputs (batch size) */
|
|
int c, /* number of input feature maps */
|
|
int h, /* height of input section */
|
|
int w); /* width of input section */
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensor4dDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
|
cudnnDataType_t dataType, /* image data type */
|
|
int n, /* number of inputs (batch size) */
|
|
int c, /* number of input feature maps */
|
|
int h, /* height of input section */
|
|
int w, /* width of input section */
|
|
int nStride,
|
|
int cStride,
|
|
int hStride,
|
|
int wStride);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetTensor4dDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
|
cudnnDataType_t *dataType, /* image data type */
|
|
int *n, /* number of inputs (batch size) */
|
|
int *c, /* number of input feature maps */
|
|
int *h, /* height of input section */
|
|
int *w, /* width of input section */
|
|
int *nStride,
|
|
int *cStride,
|
|
int *hStride,
|
|
int *wStride);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensorNdDescriptor(cudnnTensorDescriptor_t tensorDesc,
|
|
cudnnDataType_t dataType,
|
|
int nbDims,
|
|
const int dimA[],
|
|
const int strideA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensorNdDescriptorEx(cudnnTensorDescriptor_t tensorDesc,
|
|
cudnnTensorFormat_t format,
|
|
cudnnDataType_t dataType,
|
|
int nbDims,
|
|
const int dimA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetTensorNdDescriptor(const cudnnTensorDescriptor_t tensorDesc,
|
|
int nbDimsRequested,
|
|
cudnnDataType_t *dataType,
|
|
int *nbDims,
|
|
int dimA[],
|
|
int strideA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetTensorSizeInBytes(const cudnnTensorDescriptor_t tensorDesc, size_t *size);
|
|
|
|
/* PixelOffset( n, c, h, w ) = n *input_stride + c * feature_stride + h * h_stride + w * w_stride
|
|
|
|
1)Example of all images in row major order one batch of features after the other (with an optional padding on row)
|
|
input_stride : c x h x h_stride
|
|
feature_stride : h x h_stride
|
|
h_stride : >= w ( h_stride = w if no padding)
|
|
w_stride : 1
|
|
|
|
|
|
2)Example of all images in row major with features maps interleaved
|
|
input_stride : c x h x h_stride
|
|
feature_stride : 1
|
|
h_stride : w x c
|
|
w_stride : c
|
|
|
|
3)Example of all images in column major order one batch of features after the other (with optional padding on column)
|
|
input_stride : c x w x w_stride
|
|
feature_stride : w x w_stride
|
|
h_stride : 1
|
|
w_stride : >= h
|
|
|
|
*/
|
|
|
|
/* Destroy an instance of Tensor4d descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyTensorDescriptor(cudnnTensorDescriptor_t tensorDesc);
|
|
|
|
/* Fold/unfold transforms */
|
|
typedef enum {
|
|
CUDNN_TRANSFORM_FOLD = 0U,
|
|
CUDNN_TRANSFORM_UNFOLD = 1U,
|
|
} cudnnFoldingDirection_t;
|
|
|
|
/** Create a destination descriptor for cudnnTransformTensor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnInitTransformDest(const cudnnTensorTransformDescriptor_t transformDesc,
|
|
const cudnnTensorDescriptor_t srcDesc,
|
|
cudnnTensorDescriptor_t destDesc,
|
|
size_t *destSizeInBytes);
|
|
|
|
/** Create an empty tensor transform descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateTensorTransformDescriptor(cudnnTensorTransformDescriptor_t *transformDesc);
|
|
|
|
/** Initialize a previously created tensor transform descriptor. */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
|
const uint32_t nbDims,
|
|
const cudnnTensorFormat_t destFormat,
|
|
const int32_t padBeforeA[],
|
|
const int32_t padAfterA[],
|
|
const uint32_t foldA[],
|
|
const cudnnFoldingDirection_t direction);
|
|
|
|
/**
|
|
* Retrieves the values stored in a previously initialized tensor transform
|
|
* descriptor.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc,
|
|
uint32_t nbDimsRequested,
|
|
cudnnTensorFormat_t *destFormat,
|
|
int32_t padBeforeA[],
|
|
int32_t padAfterA[],
|
|
uint32_t foldA[],
|
|
cudnnFoldingDirection_t *direction);
|
|
|
|
/**
|
|
* Destroys a previously created tensor transform descriptor.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyTensorTransformDescriptor(cudnnTensorTransformDescriptor_t transformDesc);
|
|
|
|
/* Tensor layout conversion helper (y = alpha * x + beta * y) */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnTransformTensor(cudnnHandle_t handle,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnTransformTensorEx(cudnnHandle_t handle,
|
|
const cudnnTensorTransformDescriptor_t transDesc,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t srcDesc,
|
|
const void *srcData,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t destDesc,
|
|
void *destData);
|
|
|
|
/* Tensor Bias addition : C = alpha * A + beta * C */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnAddTensor(cudnnHandle_t handle,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t aDesc,
|
|
const void *A,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t cDesc,
|
|
void *C);
|
|
|
|
/*
|
|
* CUDNN OpTensor op type
|
|
*/
|
|
typedef enum {
|
|
CUDNN_OP_TENSOR_ADD = 0,
|
|
CUDNN_OP_TENSOR_MUL = 1,
|
|
CUDNN_OP_TENSOR_MIN = 2,
|
|
CUDNN_OP_TENSOR_MAX = 3,
|
|
CUDNN_OP_TENSOR_SQRT = 4,
|
|
CUDNN_OP_TENSOR_NOT = 5,
|
|
} cudnnOpTensorOp_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateOpTensorDescriptor(cudnnOpTensorDescriptor_t *opTensorDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc,
|
|
cudnnOpTensorOp_t opTensorOp,
|
|
cudnnDataType_t opTensorCompType,
|
|
cudnnNanPropagation_t opTensorNanOpt);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetOpTensorDescriptor(const cudnnOpTensorDescriptor_t opTensorDesc,
|
|
cudnnOpTensorOp_t *opTensorOp,
|
|
cudnnDataType_t *opTensorCompType,
|
|
cudnnNanPropagation_t *opTensorNanOpt);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyOpTensorDescriptor(cudnnOpTensorDescriptor_t opTensorDesc);
|
|
|
|
/* Tensor operation : C = op( alpha1 * A, alpha2 * B ) + beta * C */
|
|
/* B tensor is ignored for CUDNN_OP_TENSOR_SQRT, CUDNN_OP_TENSOR_NOT. */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnOpTensor(cudnnHandle_t handle,
|
|
const cudnnOpTensorDescriptor_t opTensorDesc,
|
|
const void *alpha1,
|
|
const cudnnTensorDescriptor_t aDesc,
|
|
const void *A,
|
|
const void *alpha2,
|
|
const cudnnTensorDescriptor_t bDesc,
|
|
const void *B,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t cDesc,
|
|
void *C);
|
|
|
|
/*
|
|
* CUDNN ReduceTensor op type
|
|
*/
|
|
typedef enum {
|
|
CUDNN_REDUCE_TENSOR_ADD = 0,
|
|
CUDNN_REDUCE_TENSOR_MUL = 1,
|
|
CUDNN_REDUCE_TENSOR_MIN = 2,
|
|
CUDNN_REDUCE_TENSOR_MAX = 3,
|
|
CUDNN_REDUCE_TENSOR_AMAX = 4,
|
|
CUDNN_REDUCE_TENSOR_AVG = 5,
|
|
CUDNN_REDUCE_TENSOR_NORM1 = 6,
|
|
CUDNN_REDUCE_TENSOR_NORM2 = 7,
|
|
CUDNN_REDUCE_TENSOR_MUL_NO_ZEROS = 8,
|
|
} cudnnReduceTensorOp_t;
|
|
|
|
/*
|
|
* CUDNN ReduceTensor indices type
|
|
*/
|
|
typedef enum {
|
|
CUDNN_REDUCE_TENSOR_NO_INDICES = 0,
|
|
CUDNN_REDUCE_TENSOR_FLATTENED_INDICES = 1,
|
|
} cudnnReduceTensorIndices_t;
|
|
|
|
/*
|
|
* CUDNN tensor indices type size (all unsigned)
|
|
* Currently not supported, default is 32 bit unsigned.
|
|
*/
|
|
typedef enum {
|
|
CUDNN_32BIT_INDICES = 0,
|
|
CUDNN_64BIT_INDICES = 1,
|
|
CUDNN_16BIT_INDICES = 2,
|
|
CUDNN_8BIT_INDICES = 3,
|
|
} cudnnIndicesType_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateReduceTensorDescriptor(cudnnReduceTensorDescriptor_t *reduceTensorDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
cudnnReduceTensorOp_t reduceTensorOp,
|
|
cudnnDataType_t reduceTensorCompType,
|
|
cudnnNanPropagation_t reduceTensorNanOpt,
|
|
cudnnReduceTensorIndices_t reduceTensorIndices,
|
|
cudnnIndicesType_t reduceTensorIndicesType);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetReduceTensorDescriptor(const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
cudnnReduceTensorOp_t *reduceTensorOp,
|
|
cudnnDataType_t *reduceTensorCompType,
|
|
cudnnNanPropagation_t *reduceTensorNanOpt,
|
|
cudnnReduceTensorIndices_t *reduceTensorIndices,
|
|
cudnnIndicesType_t *reduceTensorIndicesType);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyReduceTensorDescriptor(cudnnReduceTensorDescriptor_t reduceTensorDesc);
|
|
|
|
/* Helper function to return the minimum size of the index space to be passed to the reduction given the input and
|
|
* output tensors */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetReductionIndicesSize(cudnnHandle_t handle,
|
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
const cudnnTensorDescriptor_t aDesc,
|
|
const cudnnTensorDescriptor_t cDesc,
|
|
size_t *sizeInBytes);
|
|
|
|
/* Helper function to return the minimum size of the workspace to be passed to the reduction given the input and output
|
|
* tensors */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetReductionWorkspaceSize(cudnnHandle_t handle,
|
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
const cudnnTensorDescriptor_t aDesc,
|
|
const cudnnTensorDescriptor_t cDesc,
|
|
size_t *sizeInBytes);
|
|
|
|
/* Tensor operation : C = reduce op( alpha * A ) + beta * C */
|
|
/* The NaN propagation enum applies to only the min and max reduce ops; the other reduce ops propagate NaN as usual. */
|
|
/* The indices space is ignored for reduce ops other than min or max. */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnReduceTensor(cudnnHandle_t handle,
|
|
const cudnnReduceTensorDescriptor_t reduceTensorDesc,
|
|
void *indices,
|
|
size_t indicesSizeInBytes,
|
|
void *workspace,
|
|
size_t workspaceSizeInBytes,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t aDesc,
|
|
const void *A,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t cDesc,
|
|
void *C);
|
|
|
|
/* Set all values of a tensor to a given value : y[i] = value[0] */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *valuePtr);
|
|
|
|
/* Scale all values of a tensor by a given factor : y[i] = alpha * y[i] */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnScaleTensor(cudnnHandle_t handle, const cudnnTensorDescriptor_t yDesc, void *y, const void *alpha);
|
|
|
|
/* Create an instance of FilterStruct */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateFilterDescriptor(cudnnFilterDescriptor_t *filterDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetFilter4dDescriptor(cudnnFilterDescriptor_t filterDesc,
|
|
cudnnDataType_t dataType, /* image data type */
|
|
cudnnTensorFormat_t format,
|
|
int k, /* number of output feature maps */
|
|
int c, /* number of input feature maps */
|
|
int h, /* height of each input filter */
|
|
int w); /* width of each input filter */
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetFilter4dDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
|
cudnnDataType_t *dataType, /* image data type */
|
|
cudnnTensorFormat_t *format,
|
|
int *k, /* number of output feature maps */
|
|
int *c, /* number of input feature maps */
|
|
int *h, /* height of each input filter */
|
|
int *w); /* width of each input filter */
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetFilterNdDescriptor(cudnnFilterDescriptor_t filterDesc,
|
|
cudnnDataType_t dataType, /* image data type */
|
|
cudnnTensorFormat_t format,
|
|
int nbDims,
|
|
const int filterDimA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetFilterNdDescriptor(const cudnnFilterDescriptor_t filterDesc,
|
|
int nbDimsRequested,
|
|
cudnnDataType_t *dataType, /* image data type */
|
|
cudnnTensorFormat_t *format,
|
|
int *nbDims,
|
|
int filterDimA[]);
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetFilterSizeInBytes(const cudnnFilterDescriptor_t filterDesc, size_t *size);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnTransformFilter(cudnnHandle_t handle,
|
|
const cudnnTensorTransformDescriptor_t transDesc,
|
|
const void *alpha,
|
|
const cudnnFilterDescriptor_t srcDesc,
|
|
const void *srcData,
|
|
const void *beta,
|
|
const cudnnFilterDescriptor_t destDesc,
|
|
void *destData);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyFilterDescriptor(cudnnFilterDescriptor_t filterDesc);
|
|
|
|
/*
|
|
* softmax algorithm
|
|
*/
|
|
typedef enum {
|
|
CUDNN_SOFTMAX_FAST = 0, /* straightforward implementation */
|
|
CUDNN_SOFTMAX_ACCURATE = 1, /* subtract max from every point to avoid overflow */
|
|
CUDNN_SOFTMAX_LOG = 2
|
|
} cudnnSoftmaxAlgorithm_t;
|
|
|
|
typedef enum {
|
|
CUDNN_SOFTMAX_MODE_INSTANCE = 0, /* compute the softmax over all C, H, W for each N */
|
|
CUDNN_SOFTMAX_MODE_CHANNEL = 1 /* compute the softmax over all C for each H, W, N */
|
|
} cudnnSoftmaxMode_t;
|
|
|
|
/* Softmax functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
|
|
/* Function to perform forward softmax */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSoftmaxForward(cudnnHandle_t handle,
|
|
cudnnSoftmaxAlgorithm_t algo,
|
|
cudnnSoftmaxMode_t mode,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
/*
|
|
* pooling mode
|
|
*/
|
|
typedef enum {
|
|
CUDNN_POOLING_MAX = 0,
|
|
CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING = 1, /* count for average includes padded values */
|
|
CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING = 2, /* count for average does not include padded values */
|
|
CUDNN_POOLING_MAX_DETERMINISTIC = 3
|
|
} cudnnPoolingMode_t;
|
|
|
|
/* Create an instance of pooling descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreatePoolingDescriptor(cudnnPoolingDescriptor_t *poolingDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetPooling2dDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
cudnnPoolingMode_t mode,
|
|
cudnnNanPropagation_t maxpoolingNanOpt,
|
|
int windowHeight,
|
|
int windowWidth,
|
|
int verticalPadding,
|
|
int horizontalPadding,
|
|
int verticalStride,
|
|
int horizontalStride);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetPooling2dDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
cudnnPoolingMode_t *mode,
|
|
cudnnNanPropagation_t *maxpoolingNanOpt,
|
|
int *windowHeight,
|
|
int *windowWidth,
|
|
int *verticalPadding,
|
|
int *horizontalPadding,
|
|
int *verticalStride,
|
|
int *horizontalStride);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetPoolingNdDescriptor(cudnnPoolingDescriptor_t poolingDesc,
|
|
const cudnnPoolingMode_t mode,
|
|
const cudnnNanPropagation_t maxpoolingNanOpt,
|
|
int nbDims,
|
|
const int windowDimA[],
|
|
const int paddingA[],
|
|
const int strideA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetPoolingNdDescriptor(const cudnnPoolingDescriptor_t poolingDesc,
|
|
int nbDimsRequested,
|
|
cudnnPoolingMode_t *mode,
|
|
cudnnNanPropagation_t *maxpoolingNanOpt,
|
|
int *nbDims,
|
|
int windowDimA[],
|
|
int paddingA[],
|
|
int strideA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetPoolingNdForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
|
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
int nbDims,
|
|
int outputTensorDimA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetPooling2dForwardOutputDim(const cudnnPoolingDescriptor_t poolingDesc,
|
|
const cudnnTensorDescriptor_t inputTensorDesc,
|
|
int *n,
|
|
int *c,
|
|
int *h,
|
|
int *w);
|
|
|
|
/* Destroy an instance of pooling descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyPoolingDescriptor(cudnnPoolingDescriptor_t poolingDesc);
|
|
|
|
/* Pooling functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
|
|
/* Function to perform forward pooling */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnPoolingForward(cudnnHandle_t handle,
|
|
const cudnnPoolingDescriptor_t poolingDesc,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
/*
|
|
* activation mode
|
|
*/
|
|
typedef enum {
|
|
CUDNN_ACTIVATION_SIGMOID = 0,
|
|
CUDNN_ACTIVATION_RELU = 1,
|
|
CUDNN_ACTIVATION_TANH = 2,
|
|
CUDNN_ACTIVATION_CLIPPED_RELU = 3,
|
|
CUDNN_ACTIVATION_ELU = 4,
|
|
CUDNN_ACTIVATION_IDENTITY = 5
|
|
} cudnnActivationMode_t;
|
|
|
|
/* Activation functions: All of the form "output = alpha * Op(inputs) + beta * output" */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateActivationDescriptor(cudnnActivationDescriptor_t *activationDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetActivationDescriptor(cudnnActivationDescriptor_t activationDesc,
|
|
cudnnActivationMode_t mode,
|
|
cudnnNanPropagation_t reluNanOpt,
|
|
double coef); /* ceiling for clipped RELU, alpha for ELU */
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetActivationDescriptor(const cudnnActivationDescriptor_t activationDesc,
|
|
cudnnActivationMode_t *mode,
|
|
cudnnNanPropagation_t *reluNanOpt,
|
|
double *coef); /* ceiling for clipped RELU, alpha for ELU */
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyActivationDescriptor(cudnnActivationDescriptor_t activationDesc);
|
|
|
|
/* Function to perform forward activation */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnActivationForward(cudnnHandle_t handle,
|
|
cudnnActivationDescriptor_t activationDesc,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
/*
|
|
* Create an instance of LRN (Local Response Normalization) descriptor
|
|
* Uses lrnN=5, lrnAlpha=1e-4, lrnBeta=0.75, lrnK=2.0 as defaults from Krizhevsky'12 ImageNet paper
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateLRNDescriptor(cudnnLRNDescriptor_t *normDesc);
|
|
|
|
#define CUDNN_LRN_MIN_N 1 /* minimum allowed lrnN */
|
|
#define CUDNN_LRN_MAX_N 16 /* maximum allowed lrnN */
|
|
#define CUDNN_LRN_MIN_K 1e-5 /* minimum allowed lrnK */
|
|
#define CUDNN_LRN_MIN_BETA 0.01 /* minimum allowed lrnBeta */
|
|
|
|
/* LRN layer mode */
|
|
typedef enum {
|
|
CUDNN_LRN_CROSS_CHANNEL_DIM1 = 0, /* Normalize across tensor's dimA[1] dimension */
|
|
} cudnnLRNMode_t;
|
|
|
|
/*
|
|
* Uses a window [center-lookBehind, center+lookAhead], where
|
|
* lookBehind = floor( (lrnN-1)/2 ), lookAhead = lrnN-lookBehind-1.
|
|
* Values of double parameters cast to tensor data type.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned lrnN, double lrnAlpha, double lrnBeta, double lrnK);
|
|
/*
|
|
* Retrieve the settings currently stored in an LRN layer descriptor
|
|
* Any of the provided pointers can be NULL (no corresponding value will be returned)
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetLRNDescriptor(cudnnLRNDescriptor_t normDesc, unsigned *lrnN, double *lrnAlpha, double *lrnBeta, double *lrnK);
|
|
|
|
/* Destroy an instance of LRN descriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyLRNDescriptor(cudnnLRNDescriptor_t lrnDesc);
|
|
|
|
/* LRN functions: output = alpha * normalize(x) + beta * old_y */
|
|
|
|
/* LRN cross-channel forward computation. Double parameters cast to tensor data type */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnLRNCrossChannelForward(cudnnHandle_t handle,
|
|
cudnnLRNDescriptor_t normDesc,
|
|
cudnnLRNMode_t lrnMode,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
typedef enum {
|
|
CUDNN_DIVNORM_PRECOMPUTED_MEANS = 0,
|
|
} cudnnDivNormMode_t;
|
|
|
|
/* LCN/divisive normalization functions: y = alpha * normalize(x) + beta * y */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDivisiveNormalizationForward(cudnnHandle_t handle,
|
|
cudnnLRNDescriptor_t normDesc,
|
|
cudnnDivNormMode_t mode,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc, /* same desc for means, temp, temp2 */
|
|
const void *x,
|
|
const void *means, /* if NULL, means are assumed to be zero */
|
|
void *temp,
|
|
void *temp2,
|
|
const void *beta,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
typedef enum {
|
|
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
|
CUDNN_BATCHNORM_PER_ACTIVATION = 0,
|
|
|
|
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
|
CUDNN_BATCHNORM_SPATIAL = 1,
|
|
|
|
/*
|
|
* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors).
|
|
* May be faster than CUDNN_BATCHNORM_SPATIAL but imposes some limits on the range of values
|
|
*/
|
|
CUDNN_BATCHNORM_SPATIAL_PERSISTENT = 2,
|
|
} cudnnBatchNormMode_t;
|
|
|
|
#define CUDNN_BN_MIN_EPSILON 0.0 /* Minimum epsilon allowed to be used in the Batch Normalization formula */
|
|
|
|
/*
|
|
* Derives a tensor descriptor from layer data descriptor for BatchNormalization
|
|
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
|
* bnScaleBiasMeanVarDesc and bnScaleBiasDiffDesc in Batch Normalization forward and backward functions.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDeriveBNTensorDescriptor(cudnnTensorDescriptor_t derivedBnDesc,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
cudnnBatchNormMode_t mode);
|
|
|
|
typedef enum {
|
|
CUDNN_BATCHNORM_OPS_BN = 0, /* do batch normalization only */
|
|
CUDNN_BATCHNORM_OPS_BN_ACTIVATION = 1, /* do batchNorm, then activation */
|
|
CUDNN_BATCHNORM_OPS_BN_ADD_ACTIVATION = 2, /* do batchNorm, then elemWiseAdd, then activation */
|
|
} cudnnBatchNormOps_t;
|
|
|
|
/*
|
|
* Performs Batch Normalization during Inference:
|
|
* y[i] = bnScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + bnBias[k]
|
|
* with bnScale, bnBias, runningMean, runningInvVariance tensors indexed
|
|
* according to spatial or per-activation mode. Refer to cudnnBatchNormalizationForwardTraining
|
|
* above for notes on function arguments.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnBatchNormalizationForwardInference(cudnnHandle_t handle,
|
|
cudnnBatchNormMode_t mode,
|
|
const void *alpha, /* alpha[0] = result blend factor */
|
|
const void *beta, /* beta[0] = dest layer blend factor */
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x, /* NxCxHxW */
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y, /* NxCxHxW */
|
|
const cudnnTensorDescriptor_t bnScaleBiasMeanVarDesc,
|
|
const void *bnScale,
|
|
const void *bnBias,
|
|
const void *estimatedMean,
|
|
const void *estimatedVariance,
|
|
double epsilon);
|
|
|
|
typedef enum {
|
|
/* bnScale, bnBias tensor dims are 1xCxHxWx.. (one value per CHW...-slice, normalized over N slice) */
|
|
CUDNN_NORM_PER_ACTIVATION = 0,
|
|
|
|
/* bnScale, bnBias tensor dims are 1xCx1x1 (one value per C-dim normalized over Nx1xHxW subtensors) */
|
|
CUDNN_NORM_PER_CHANNEL = 1,
|
|
} cudnnNormMode_t;
|
|
|
|
typedef enum { CUDNN_NORM_ALGO_STANDARD = 0, CUDNN_NORM_ALGO_PERSIST = 1 } cudnnNormAlgo_t;
|
|
|
|
/*
|
|
* Derives a tensor descriptor from layer data descriptor for Normalization
|
|
* scale, invVariance, bnBias, bnScale tensors. Use this tensor desc for
|
|
* normScaleBiasMeanVarDesc and normScaleBiasDiffDesc in Normalization forward and backward functions.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDeriveNormTensorDescriptor(cudnnTensorDescriptor_t derivedNormScaleBiasDesc,
|
|
cudnnTensorDescriptor_t derivedNormMeanVarDesc,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
cudnnNormMode_t mode,
|
|
int groupCnt); /* Place hold for future work, should be set to 1 now*/
|
|
|
|
typedef enum {
|
|
CUDNN_NORM_OPS_NORM = 0, /* do normalization only */
|
|
CUDNN_NORM_OPS_NORM_ACTIVATION = 1, /* do Norm, then activation */
|
|
CUDNN_NORM_OPS_NORM_ADD_ACTIVATION = 2, /* do Norm, then elemWiseAdd, then activation */
|
|
} cudnnNormOps_t;
|
|
|
|
/*
|
|
* Performs Normalization during Inference:
|
|
* y[i] = normScale[k]*(x[i]-estimatedMean[k])/sqrt(epsilon+estimatedVariance[k]) + normBias[k]
|
|
* with normScale, normBias, runningMean, runningInvVariance tensors indexed
|
|
* according to per-channel or per-activation mode. Refer to cudnnNormalizationForwardTraining
|
|
* above for notes on function arguments.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnNormalizationForwardInference(cudnnHandle_t handle,
|
|
cudnnNormMode_t mode,
|
|
cudnnNormOps_t normOps,
|
|
cudnnNormAlgo_t algo,
|
|
const void *alpha, /* alpha[0] = result blend factor */
|
|
const void *beta, /* beta[0] = dest layer blend factor */
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x, /* NxCxHxW */
|
|
const cudnnTensorDescriptor_t normScaleBiasDesc,
|
|
const void *normScale,
|
|
const void *normBias,
|
|
const cudnnTensorDescriptor_t normMeanVarDesc,
|
|
const void *estimatedMean,
|
|
const void *estimatedVariance,
|
|
const cudnnTensorDescriptor_t zDesc,
|
|
const void *z,
|
|
cudnnActivationDescriptor_t activationDesc,
|
|
const cudnnTensorDescriptor_t yDesc,
|
|
void *y, /* NxCxHxW */
|
|
double epsilon,
|
|
int groupCnt); /* Place hold for future work*/
|
|
|
|
/* APIs for spatial transformer network*/
|
|
typedef enum {
|
|
CUDNN_SAMPLER_BILINEAR = 0,
|
|
} cudnnSamplerType_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateSpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t *stDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetSpatialTransformerNdDescriptor(cudnnSpatialTransformerDescriptor_t stDesc,
|
|
cudnnSamplerType_t samplerType,
|
|
cudnnDataType_t dataType,
|
|
const int nbDims,
|
|
const int dimA[]);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroySpatialTransformerDescriptor(cudnnSpatialTransformerDescriptor_t stDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSpatialTfGridGeneratorForward(cudnnHandle_t handle,
|
|
const cudnnSpatialTransformerDescriptor_t stDesc,
|
|
const void *theta,
|
|
void *grid);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSpatialTfSamplerForward(cudnnHandle_t handle,
|
|
cudnnSpatialTransformerDescriptor_t stDesc,
|
|
const void *alpha,
|
|
const cudnnTensorDescriptor_t xDesc,
|
|
const void *x,
|
|
const void *grid,
|
|
const void *beta,
|
|
cudnnTensorDescriptor_t yDesc,
|
|
void *y);
|
|
|
|
typedef struct cudnnDropoutStruct *cudnnDropoutDescriptor_t;
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateDropoutDescriptor(cudnnDropoutDescriptor_t *dropoutDesc);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc);
|
|
|
|
/*helper function to determine size of the states to be passed to cudnnSetDropoutDescriptor */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDropoutGetStatesSize(cudnnHandle_t handle, size_t *sizeInBytes);
|
|
|
|
/*helper function to determine size of the reserve space to be passed to dropout forward/backward calls */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDropoutGetReserveSpaceSize(cudnnTensorDescriptor_t xdesc, size_t *sizeInBytes);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
|
cudnnHandle_t handle,
|
|
float dropout,
|
|
void *states,
|
|
size_t stateSizeInBytes,
|
|
unsigned long long seed);
|
|
|
|
/* Restores the dropout descriptor to a previously saved-off state */
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnRestoreDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
|
cudnnHandle_t handle,
|
|
float dropout,
|
|
void *states,
|
|
size_t stateSizeInBytes,
|
|
unsigned long long seed);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetDropoutDescriptor(cudnnDropoutDescriptor_t dropoutDesc,
|
|
cudnnHandle_t handle,
|
|
float *dropout,
|
|
void **states,
|
|
unsigned long long *seed);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnDropoutForward(cudnnHandle_t handle,
|
|
const cudnnDropoutDescriptor_t dropoutDesc,
|
|
const cudnnTensorDescriptor_t xdesc,
|
|
const void *x,
|
|
const cudnnTensorDescriptor_t ydesc,
|
|
void *y,
|
|
void *reserveSpace,
|
|
size_t reserveSpaceSizeInBytes);
|
|
|
|
/* TODO: remove */
|
|
|
|
typedef struct cudnnAlgorithmStruct *cudnnAlgorithmDescriptor_t;
|
|
typedef struct cudnnAlgorithmPerformanceStruct *cudnnAlgorithmPerformance_t;
|
|
|
|
/* TODO: move these enums out to the appropriate submodule */
|
|
typedef enum {
|
|
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM = 0,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_PRECOMP_GEMM = 1,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_GEMM = 2,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_DIRECT = 3,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_FFT = 4,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_FFT_TILING = 5,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD = 6,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED = 7,
|
|
CUDNN_CONVOLUTION_FWD_ALGO_COUNT = 8
|
|
} cudnnConvolutionFwdAlgo_t;
|
|
|
|
typedef enum {
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0 = 0, /* non-deterministic */
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_1 = 1,
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT = 2,
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_3 = 3, /* non-deterministic */
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD = 4, /* not implemented */
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_WINOGRAD_NONFUSED = 5,
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_FFT_TILING = 6,
|
|
CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT = 7
|
|
} cudnnConvolutionBwdFilterAlgo_t;
|
|
|
|
typedef enum {
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_0 = 0, /* non-deterministic */
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_1 = 1,
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT = 2,
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_FFT_TILING = 3,
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD = 4,
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED = 5,
|
|
CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT = 6
|
|
} cudnnConvolutionBwdDataAlgo_t;
|
|
|
|
typedef enum {
|
|
CUDNN_RNN_ALGO_STANDARD = 0,
|
|
CUDNN_RNN_ALGO_PERSIST_STATIC = 1,
|
|
CUDNN_RNN_ALGO_PERSIST_DYNAMIC = 2,
|
|
CUDNN_RNN_ALGO_PERSIST_STATIC_SMALL_H = 3,
|
|
CUDNN_RNN_ALGO_COUNT = 4,
|
|
} cudnnRNNAlgo_t;
|
|
|
|
typedef enum { CUDNN_CTC_LOSS_ALGO_DETERMINISTIC = 0, CUDNN_CTC_LOSS_ALGO_NON_DETERMINISTIC = 1 } cudnnCTCLossAlgo_t;
|
|
|
|
/* TODO: remove */
|
|
typedef struct {
|
|
union Algorithm {
|
|
cudnnConvolutionFwdAlgo_t convFwdAlgo;
|
|
cudnnConvolutionBwdFilterAlgo_t convBwdFilterAlgo;
|
|
cudnnConvolutionBwdDataAlgo_t convBwdDataAlgo;
|
|
cudnnRNNAlgo_t RNNAlgo;
|
|
cudnnCTCLossAlgo_t CTCLossAlgo;
|
|
} algo;
|
|
} cudnnAlgorithm_t;
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateAlgorithmDescriptor(cudnnAlgorithmDescriptor_t *algoDesc);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t algorithm);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t algoDesc, cudnnAlgorithm_t *algorithm);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnCopyAlgorithmDescriptor(const cudnnAlgorithmDescriptor_t src, cudnnAlgorithmDescriptor_t dest);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyAlgorithmDescriptor(cudnnAlgorithmDescriptor_t algoDesc);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnCreateAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToCreate);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetAlgorithmPerformance(cudnnAlgorithmPerformance_t algoPerf,
|
|
cudnnAlgorithmDescriptor_t algoDesc,
|
|
cudnnStatus_t status,
|
|
float time,
|
|
size_t memory);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetAlgorithmPerformance(const cudnnAlgorithmPerformance_t algoPerf,
|
|
cudnnAlgorithmDescriptor_t *algoDesc,
|
|
cudnnStatus_t *status,
|
|
float *time,
|
|
size_t *memory);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnDestroyAlgorithmPerformance(cudnnAlgorithmPerformance_t *algoPerf, int numberToDestroy);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetAlgorithmSpaceSize(cudnnHandle_t handle, cudnnAlgorithmDescriptor_t algoDesc, size_t *algoSpaceSizeInBytes);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnSaveAlgorithm(cudnnHandle_t handle,
|
|
cudnnAlgorithmDescriptor_t algoDesc,
|
|
void *algoSpace,
|
|
size_t algoSpaceSizeInBytes);
|
|
|
|
CUDNN_DEPRECATED cudnnStatus_t CUDNNWINAPI
|
|
cudnnRestoreAlgorithm(cudnnHandle_t handle,
|
|
void *algoSpace,
|
|
size_t algoSpaceSizeInBytes,
|
|
cudnnAlgorithmDescriptor_t algoDesc);
|
|
|
|
typedef enum {
|
|
CUDNN_SEV_FATAL = 0,
|
|
CUDNN_SEV_ERROR = 1,
|
|
CUDNN_SEV_WARNING = 2,
|
|
CUDNN_SEV_INFO = 3,
|
|
} cudnnSeverity_t;
|
|
|
|
/* Message masks to be used with cudnnSetCallback() */
|
|
#define CUDNN_SEV_ERROR_EN (1U << CUDNN_SEV_ERROR)
|
|
#define CUDNN_SEV_WARNING_EN (1U << CUDNN_SEV_WARNING)
|
|
#define CUDNN_SEV_INFO_EN (1U << CUDNN_SEV_INFO)
|
|
|
|
/* struct containing useful informaiton for each API call */
|
|
typedef struct {
|
|
unsigned cudnn_version;
|
|
cudnnStatus_t cudnnStatus;
|
|
unsigned time_sec; /* epoch time in seconds */
|
|
unsigned time_usec; /* microseconds part of epoch time */
|
|
unsigned time_delta; /* time since start in seconds */
|
|
cudnnHandle_t handle; /* cudnn handle */
|
|
cudaStream_t stream; /* cuda stream ID */
|
|
unsigned long long pid; /* process ID */
|
|
unsigned long long tid; /* thread ID */
|
|
int cudaDeviceId; /* CUDA device ID */
|
|
int reserved[15]; /* reserved for future use */
|
|
} cudnnDebug_t;
|
|
|
|
typedef void (*cudnnCallback_t)(cudnnSeverity_t sev, void *udata, const cudnnDebug_t *dbg, const char *msg);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnSetCallback(unsigned mask, void *udata, cudnnCallback_t fptr);
|
|
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnGetCallback(unsigned *mask, void **udata, cudnnCallback_t *fptr);
|
|
|
|
/*
|
|
* \brief Cross-library version checker.
|
|
* This function is implemented differently in each sub-library. Each sublib
|
|
* checks whether its own version matches that of its dependencies.
|
|
* \returns CUDNN_STATUS_SUCCESS if the version check passes,
|
|
* CUDNN_STATUS_VERSION_MISMATCH if the versions are inconsistent.
|
|
*/
|
|
cudnnStatus_t CUDNNWINAPI
|
|
cudnnOpsInferVersionCheck(void);
|
|
|
|
#if defined(__cplusplus)
|
|
}
|
|
#endif
|
|
|
|
#endif /* CUDNN_OPS_INFER_H_ */
|