Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
da51d42
CMake: Fix error on windows using TARGET_FILE to launch dynamically c…
ptheywood Aug 5, 2025
094b1b5
CMake: Ensure the check target behaves with single-target and multi-t…
ptheywood Aug 5, 2025
dc03bd9
Suppress C4996 warnigns under MSVC
ptheywood Aug 5, 2025
6a5b097
Fix C4129 unrecognised character escape sequence warnings on windows
ptheywood Aug 5, 2025
93a1809
Fix MSVC error C2110: '+': cannot add two pointers
ptheywood Aug 5, 2025
d44ad37
Fix MSVC error C2026: string too big
ptheywood Aug 5, 2025
80e72ae
Use std::invoke_result_t for >= C++17 in place of std::result_of
ptheywood Aug 5, 2025
72a38c7
Suppress conversion from size_t to int warning with explicit cast
ptheywood Aug 5, 2025
fb5e701
Suppress deprecated method warnings for jtiify2::CompilerProgramData:…
ptheywood Aug 5, 2025
1ca0583
Suppress NVCC warning 550-D set but never used variable pch_verbose
ptheywood Aug 5, 2025
0d629d8
Remove the deletion of the Arg copy consturctor so it is trivially co…
ptheywood Aug 6, 2025
a4c8a36
Check CUDA_PATH after JITIFY_CUDA_HOME and CUDA_HOME in guess_cuda_home
ptheywood Aug 20, 2025
b477cbe
Windows: Use CUDA_VERSION from cuda.h as the fallback default version…
ptheywood Aug 20, 2025
3d09df5
Wrap paths with quotes for run_system_command and -I when required.
ptheywood Aug 20, 2025
9826e87
Windows: Use GetLongPathNameA to get the expanded temporary directory…
ptheywood Aug 21, 2025
a5e7028
Windows: Use std::filesystem::remove_all when built as c++17 on windo…
ptheywood Aug 21, 2025
4d8a5f2
Windows: Fix expected cuda include dir index checked in EncodedQuoteI…
ptheywood Aug 21, 2025
4882141
Return the command exit code from run_system_command, and check agian…
ptheywood Aug 22, 2025
37ddc14
Suppress warning #3013-D: a volatile function parameter is deprecated…
ptheywood Sep 2, 2025
2c90d02
c++20: Use std::invoke_result instead of std::result_of depending on …
ptheywood Sep 2, 2025
6dda7aa
Use compute capabillity 75 for -arch tests for CUDA 13 support
ptheywood Sep 3, 2025
3b8babf
CCCL/Thrust 3.0 requires c++17
ptheywood Sep 3, 2025
9d116e4
CUDA 13.0 splits CCCL into a separate include directory, which is req…
ptheywood Sep 3, 2025
184c588
Fixup: CCCL serparate include directory for CUDA 13 under linux
ptheywood Sep 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 39 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set (CMAKE_CXX_STANDARD 17)
set (CMAKE_CUDA_STANDARD 17) # Doesn't work?
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ")
if (MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /WX -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /O2")
else()
set(CMAKE_CXX_FLAGS
Expand All @@ -30,7 +30,7 @@ endif()
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
if (MSVC)
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -rdc=true")
"${CMAKE_CUDA_FLAGS} -Xcompiler=\"/WX\" -D_CRT_SECURE_NO_WARNINGS -rdc=true")
set(CMAKE_CUDA_FLAGS_RELEASE
"${CMAKE_CUDA_FLAGS_RELEASE} -O3 -Xcompiler=\"/O2\"")
else()
Expand All @@ -46,9 +46,35 @@ endif()
find_package(CUDA REQUIRED) # Required for CUDA_INCLUDE_DIRS

# Add macro definitions used in tests.
add_compile_definitions(
CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
CUB_DIR=${CUDA_INC_DIR})
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0.0)
# CCCL's include directories have moved in CUDA 13 compared to CUDA 12.
# On Windows, CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES includes both include paths, which must be separated before including in compile definitions.
# On *nix, only the main ctk include dir is included
# It may be cleaner to switch to the more modern find_package(CUDAToolkit) and find_package(CCCL)
list(LENGTH CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES ctk_inc_dirs_length)
if (ctk_inc_dirs_length GREATER 1)
list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir)
list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 1 cccl_inc_dir)
else()
list(GET CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES 0 cuda_inc_dir)
# Check the default location within the CTK if not in the variable.
if (EXISTS "${cuda_inc_dir}/cccl")
set(cccl_inc_dir "${cuda_inc_dir}/cccl")
else()
set(cccl_inc_dir "${cuda_inc_dir}")
endif()
endif()
add_compile_definitions(
CUDA_INC_DIR="${cuda_inc_dir}"
CUB_DIR="${cccl_inc_dir}")
unset(cccl_inc_dir)
unset(cuda_inc_dir)
unset(inc_dirs_length)
else()
add_compile_definitions(
CUDA_INC_DIR="${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
CUB_DIR=${CUDA_INC_DIR})
endif()
# Copy the example_headers directory for use at runtime by tests.
file(COPY example_headers DESTINATION ${CMAKE_CURRENT_BINARY_DIR})

Expand Down Expand Up @@ -100,7 +126,7 @@ function(add_stringify_command arg)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${arg}.jit
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMAND ./stringify ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit
COMMAND $<TARGET_FILE:stringify> ${CMAKE_CURRENT_SOURCE_DIR}/${arg} > ${arg}.jit
DEPENDS stringify)
endfunction()
add_executable(jitify2_preprocess jitify2_preprocess.cpp)
Expand All @@ -118,7 +144,7 @@ add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.jit.hpp
${CMAKE_CURRENT_BINARY_DIR}/jitify2_test_kernels.cu.headers.jit.cpp
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
COMMAND ${CMAKE_CURRENT_BINARY_DIR}/jitify2_preprocess -i --minify
COMMAND $<TARGET_FILE:jitify2_preprocess> -i --minify
-o ${CMAKE_CURRENT_BINARY_DIR}
-s jitify2_test_kernels.cu.headers
jitify2_test_kernels.cu
Expand Down Expand Up @@ -172,8 +198,12 @@ foreach(test ${TESTS})
endforeach(test)
# Add "check" command that *builds and* runs tests, with verbose output.
# (The default "test" command neither builds nor gives verbose output).
add_custom_target(check ALL COMMAND ${CMAKE_CTEST_COMMAND} --verbose
DEPENDS ${TESTS})
# --build-config is required for multi-config generators, and uses COMMAND_EXPAND_LISTS to ensure that the flag and value are not provided as a single string to ctest
add_custom_target(check ALL
COMMAND ${CMAKE_CTEST_COMMAND} --verbose "$<IF:$<STREQUAL:$<CONFIG>,>,,--build-config;$<CONFIG>>"
DEPENDS ${TESTS}
COMMAND_EXPAND_LISTS
)

# ----
# Docs
Expand Down
3 changes: 0 additions & 3 deletions example_headers/class_arg_kernel.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ class Managed {
struct Arg : public Managed {
const int x;
Arg(int x_) : x(x_) {}

// there can be no call to the copy constructor
Arg(const Arg& arg) = delete;
};

template <typename T>
Expand Down
99 changes: 80 additions & 19 deletions jitify2.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@
#include <type_traits>
#include <unordered_set>

#if __cplusplus >= 201703L
#if JITIFY_CPLUSPLUS >= 201703L
#include <filesystem>
#endif

Expand Down Expand Up @@ -194,7 +194,7 @@
#include <dbghelp.h> // For UndecorateSymbolName
#include <direct.h> // For mkdir
#include <fcntl.h> // For open, O_RDWR etc.
#include <fileapi.h> // For GetTempPath2A
#include <fileapi.h> // For GetTempPath2A, GetLongPathNameA
#include <io.h> // For _sopen_s
#include <process.h> // For _getpid
#include <shlobj_core.h> // For SHGetFolderPathA
Expand Down Expand Up @@ -2786,6 +2786,15 @@ inline bool path_exists(const char* filename, bool* is_dir = nullptr) {
return ret;
}

inline std::string quoted_path_if_needed(const std::string& p) {
// If a path includes spaces or single backslashes, the full path may need warpping with quotes when passed to run_system_command, either as the executable or an include path.
if (p.find(' ') == std::string::npos && p.find('\\') == std::string::npos) {
return p;
} else {
return "\"" + p + "\"";
}
}

inline const char* get_current_executable_path() {
static const char* path = []() -> const char* {
static char buffer[JITIFY_PATH_MAX + 1] = {};
Expand Down Expand Up @@ -4117,15 +4126,31 @@ inline int run_system_command(const char* command,
if (output) {
output->clear();
std::array<char, 128> buffer;
while (fgets(buffer.data(), buffer.size(), pipe)) {
while (fgets(buffer.data(), static_cast<int>(buffer.size()), pipe)) {
*output += buffer.data();
}
} else {
// Must always read from the pipe for the exit code from the command to be available
std::array<char, 128> buffer;
while (fgets(buffer.data(), static_cast<int>(buffer.size()), pipe)) { }
}
const int result = JITIFY_PCLOSE(pipe);
if (result == -1 && failure) {
*failure = get_errno_string();
}
return result;

// Extract the exit code from the called program if possible, otherwise return -1;
int exitCode = -1;
#ifdef _MSC_VER
// _pclose is documented as having the same return code format as for _cwait, but with the high and low order bytes swapped. However the _cwait docs do not describe a corresponding value. Just extracting the lsb seems to behave
exitCode = result & 0xFF;
#else
// Extract the exit code from the pclose result if it was a 'normal' exit
if (WIFEXITED(result)){
exitCode = WEXITSTATUS(result);
}
#endif
return exitCode;
}
#endif // JITIFY_ENABLE_NVCC

Expand All @@ -4135,9 +4160,15 @@ inline const char* guess_cuda_home() {
if (env_jitify_cuda_home) return env_jitify_cuda_home;
const char* env_cuda_home = std::getenv("CUDA_HOME");
if (env_cuda_home) return env_cuda_home;
// CUDA_PATH is set by the CUDA installer on windows
const char* env_cuda_path = std::getenv("CUDA_PATH");
if (env_cuda_path) return env_cuda_path;
// Guess the default location.
#if defined _WIN32 || defined _WIN64
return "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA";
constexpr int cuda_version_major = CUDA_VERSION / 1000;
constexpr int cuda_version_minor = (CUDA_VERSION % 1000) / 10;
std::string default_path = std::string("C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v") + std::to_string(cuda_version_major) + "." + std::to_string(cuda_version_minor) + "\\";
return default_path.c_str();
#else
return "/usr/local/cuda";
#endif
Expand All @@ -4150,7 +4181,7 @@ class Nvcc {
std::string nvcc_path_;

static bool is_valid_nvcc(std::string nvcc_path) {
return run_system_command((nvcc_path + " --version").c_str());
return run_system_command((quoted_path_if_needed(nvcc_path) + " --version").c_str()) == 0;
}

static std::string find_nvcc_path() {
Expand Down Expand Up @@ -4180,7 +4211,7 @@ class Nvcc {
std::string* failure = nullptr) const {
// Note: We redirect stderr to stdout so that we capture it too.
const std::string command =
detail::string_concat(nvcc_path_, " ", options, " ", "2>&1");
detail::string_concat(quoted_path_if_needed(nvcc_path_), " ", options, " ", "2>&1");
return run_system_command(command.c_str(), output, failure);
}
};
Expand All @@ -4206,7 +4237,9 @@ inline std::string make_temp_dir() {
char tmpdir[JITIFY_PATH_MAX + 1];
// Note: tmpdir is guaranteed to end with a '\'.
if (!GetTempPath2A(sizeof(tmpdir), tmpdir)) return "";
std::string path = tmpdir + "__jitify_" + std::to_string(uid);
// Get the long-form of the tmpdir
GetLongPathNameA(tmpdir, tmpdir, sizeof(tmpdir));
std::string path = std::string(tmpdir) + "__jitify_" + std::to_string(uid);
if (::_mkdir(path.c_str()) != 0) return "";
return path;
#else
Expand All @@ -4216,7 +4249,7 @@ inline std::string make_temp_dir() {
#endif
}

#if __cplusplus < 201703L && (!defined(_WIN32) && !defined(_WIN64))
#if JITIFY_CPLUSPLUS < 201703L && (!defined(_WIN32) && !defined(_WIN64))
inline int delete_file_visitor(const char* path, const struct stat* sbuf,
int type, struct FTW* ftwb) {
(void)sbuf;
Expand All @@ -4227,11 +4260,11 @@ inline int delete_file_visitor(const char* path, const struct stat* sbuf,
#endif

inline bool remove_all(const std::string& path) {
#if __cplusplus >= 201703L
#if JITIFY_CPLUSPLUS >= 201703L
std::error_code ec;
return std::filesystem::remove_all(path, ec) !=
static_cast<std::uintmax_t>(-1);
#else // __cplusplus < 201703L
#else // JITIFY_CPLUSPLUS < 201703L
#if defined(_WIN32) || defined(_WIN64)
// TODO: Implement this if anyone cares about it.
return false;
Expand All @@ -4244,7 +4277,7 @@ inline bool remove_all(const std::string& path) {
const int max_depth = 20;
return ::nftw(path.c_str(), delete_file_visitor, max_depth, flags) == 0;
#endif // not Windows
#endif // __cplusplus < 201703L
#endif // JITIFY_CPLUSPLUS < 201703L
}

class TempDirectory {
Expand Down Expand Up @@ -4349,8 +4382,8 @@ class NvccProgram {
// Note: This ensures the cuda toolkit headers are found before any that
// were embedded during preprocessing (which probably won't work with nvcc).
options.emplace_back(
"-I", detail::path_join(detail::guess_cuda_home(), "include"));
options.emplace_back("-I", tmp_include_dir);
"-I", detail::quoted_path_if_needed(detail::path_join(detail::guess_cuda_home(), "include")));
options.emplace_back("-I", detail::quoted_path_if_needed(tmp_include_dir));

static const char* const kJitifyExpressionPrefix = "__jitify_expression";

Expand Down Expand Up @@ -4388,7 +4421,7 @@ class NvccProgram {
if (!options.find({"--dlink-time-opt, -dlto"}).empty()) {
options.emplace_back("-ltoir", "");
options.emplace_back(tmp_source_file, "");
if (nvcc(options, &log_, error)) return infer_nvcc_error_type();
if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type();
if (!read_binary_file(tmp_ltoir_file, &nvvm_)) {
if (error) *error = "Failed to read binary file: " + tmp_ltoir_file;
return NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
Expand All @@ -4399,7 +4432,7 @@ class NvccProgram {
options.emplace_back("-ptx", "");
options.emplace_back(tmp_source_file, "");
options.emplace_back("-o", tmp_ptx_file);
if (nvcc(options, &log_, error)) return infer_nvcc_error_type();
if (nvcc(options, &log_, error) != 0) return infer_nvcc_error_type();
options.pop_back(); // Remove -o option
options.pop_back(); // Remove source file
options.pop_back(); // Remove -ptx
Expand Down Expand Up @@ -4439,7 +4472,7 @@ class NvccProgram {
options.emplace_back("-cubin", "");
options.emplace_back(tmp_ptx_file, "");
options.emplace_back("-o", tmp_cubin_file);
if (nvcc(options, &log_, error)) {
if (nvcc(options, &log_, error) != 0) {
return NVRTC_ERROR_PROGRAM_CREATION_FAILURE;
}
if (!read_binary_file(tmp_cubin_file, &cubin_)) {
Expand Down Expand Up @@ -4531,7 +4564,24 @@ inline nvrtcResult compile_program_nvrtc(
header_sources_c.push_back(name_source.second.c_str());
}

#if defined(__CUDACC__)
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_suppress 550
#else // __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma diag_suppress 550
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
#endif // defined(__CUDACC__)

bool pch_verbose = true;

#if defined(__CUDACC__)
#ifdef __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma nv_diag_default 550
#else // __NVCC_DIAG_PRAGMA_SUPPORT__
#pragma diag_default 550
#endif // __NVCC_DIAG_PRAGMA_SUPPORT__
#endif // #if defined(__CUDACC__)

std::vector<const char*> options_c;
options_c.reserve(options.size());
for (const Option& option : options) {
Expand Down Expand Up @@ -6626,7 +6676,8 @@ struct __add_reference_helper<_Tp, true> {
};
template <typename _Tp>
struct add_reference : public __add_reference_helper<_Tp> {};

)"
R"(
namespace __jitify_detail {
template <typename T>
struct is_int_or_cref {
Expand Down Expand Up @@ -9687,9 +9738,19 @@ class LRUFileCache {
file_suffix_(sanitize_filename(file_suffix)),
lock_file_name_(path_join(path_, file_prefix_ + "lock")) {}


// std::result_of was deprecated in c++17 and removed in c++20.
#if JITIFY_CPLUSPLUS >= 201703L
template <typename T>
using invoke_result_type = typename std::invoke_result<T>::type;
#else // JITIFY_CPLUSPLUS >= 201703L
template <typename T>
using invoke_result_type = typename std::result_of<T()>::type;
#endif // JITIFY_CPLUSPLUS >= 201703L

template <class Construct, class Serialize, class Deserialize>
std::string get(const std::string& name,
typename std::result_of<Construct()>::type* result,
invoke_result_type<Construct>* result,
Construct construct, Serialize serialize,
Deserialize deserialize, bool* hit = nullptr) const {
if (path_.empty() || max_size_ == 0) {
Expand Down
Loading