Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 14 additions & 14 deletions src/vmaware.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4819,7 +4819,7 @@
// we used a rng before running the traditional rdtsc-cpuid-rdtsc trick

// sometimes not intercepted in some hvs (like VirtualBox) under compat mode
thread_local u32 aux = 0;

Check warning on line 4822 in src/vmaware.hpp

View workflow job for this annotation

GitHub Actions / Analyze (cpp, gcc-14, Ninja Multi-Config, Debug, ON)

unused variable 'aux' [-Wunused-variable]
auto cpuid = [&](unsigned int leaf) noexcept -> u64 {
#if (MSVC)
// make regs volatile so writes cannot be optimized out, if this isn't added and the code is compiled in release mode, cycles would be around 40 even under Hyper-V
Expand Down Expand Up @@ -5013,12 +5013,12 @@
#else
__attribute__((noinline))
#endif
std::uint64_t operator()() const noexcept {
u64 operator()() const noexcept {
// TO prevent hoisting across this call
std::atomic_signal_fence(std::memory_order_seq_cst);

// start state (golden ratio)
volatile std::uint64_t v = UINT64_C(0x9E3779B97F4A7C15);
volatile u64 v = UINT64_C(0x9E3779B97F4A7C15);

// mix in addresses (ASLR gives entropy but if ASLR disabled or bypassed we have some tricks still)
// Take addresses of various locals/statics and mark some volatile so they cannot be optimized away
Expand All @@ -5030,17 +5030,17 @@
std::uintptr_t pc = reinterpret_cast<std::uintptr_t>(&module_static);
std::uintptr_t pd = reinterpret_cast<std::uintptr_t>(&probe_lambda);

v ^= static_cast<std::uint64_t>(pa) + UINT64_C(0x9E3779B97F4A7C15) + (v << 6) + (v >> 2);
v ^= static_cast<std::uint64_t>(pb) + (v << 7);
v ^= static_cast<std::uint64_t>(pc) + (v >> 11);
v ^= static_cast<std::uint64_t>(pd) + UINT64_C(0xBF58476D1CE4E5B9);
v ^= static_cast<u64>(pa) + UINT64_C(0x9E3779B97F4A7C15) + (v << 6) + (v >> 2);
v ^= static_cast<u64>(pb) + (v << 7);
v ^= static_cast<u64>(pc) + (v >> 11);
v ^= static_cast<u64>(pd) + UINT64_C(0xBF58476D1CE4E5B9);

// dependent operations on volatile locals to prevent elimination
for (int i = 0; i < 24; ++i) {
volatile int stack_local = i ^ static_cast<int>(v);
// take address each iteration and fold it in
std::uintptr_t la = reinterpret_cast<std::uintptr_t>(&stack_local);
v ^= (static_cast<std::uint64_t>(la) + (static_cast<std::uint64_t>(i) * UINT64_C(0x9E3779B97F4A7C)));
v ^= (static_cast<u64>(la) + (static_cast<u64>(i) * UINT64_C(0x9E3779B97F4A7C)));
// dependent shifts to spread any small differences
v ^= (v << ((i & 31)));
v ^= (v >> (((i + 13) & 31)));
Expand All @@ -5058,20 +5058,20 @@
// another compiler fence to prevent hoisting results
std::atomic_signal_fence(std::memory_order_seq_cst);

return static_cast<std::uint64_t>(v);
return static_cast<u64>(v);
}
};

// rejection sampling as before to avoid modulo bias
auto rng = [](std::uint64_t min, std::uint64_t max, auto getrand) noexcept -> std::uint64_t {
const std::uint64_t range = max - min + 1;
const std::uint64_t max_val = std::numeric_limits<std::uint64_t>::max();
const std::uint64_t limit = max_val - (max_val % range);
auto rng = [](u64 min, u64 max, auto getrand) noexcept -> u64 {
const u64 range = max - min + 1;
const u64 max_val = std::numeric_limits<u64>::max();
const u64 limit = max_val - (max_val % range);
for (;;) {
const std::uint64_t r = getrand();
const u64 r = getrand();
if (r < limit) return min + (r % range);
// small local mix to change subsequent outputs (still in user-mode and not a syscall)
volatile std::uint64_t scrub = r;
volatile u64 scrub = r;
scrub ^= (scrub << 11);
scrub ^= (scrub >> 9);
(void)scrub;
Expand Down
Loading