Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
135 changes: 135 additions & 0 deletions internal-api/src/jmh/java/datadog/trace/util/HashingBenchmark.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package datadog.trace.util;

import java.util.concurrent.ThreadLocalRandom;
import java.util.function.Supplier;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Threads;
import org.openjdk.jmh.annotations.Warmup;

/**
* In contrast to java.util.Objects.hash, datadog.util.HashingUtils.hash has overrides for different
* parameter counts that allow most callers to avoid calling the var-arg version. This avoids the
* common situation where the JIT's escape analysis is unable to elide the var-arg array allocation.
*
* <p>This results in 3-4x throughput, but more importantly no allocation as compared to GiBs / sec
* with var-args. <code>
* MacBook M1 using 8 threads/cores with -prof gc
*
* Benchmark Mode Cnt Score Error Units
*
* HashingBenchmark.hash2 thrpt 6 3365779949.250 ± 270198455.226 ops/s
* HashingBenchmark.hash2:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
*
* HashingBenchmark.hash2_varargs thrpt 6 1194884232.767 ± 39724408.823 ops/s
* HashingBenchmark.hash2_varargs:gc.alloc.rate thrpt 6 27330.473 ± 909.029 MB/sec
*
*
* HashingBenchmark.hash3 thrpt 6 2314013984.714 ± 181952393.469 ops/s
* HashingBenchmark.hash3:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
*
* HashingBenchmark.hash3_varags thrpt 6 869246242.250 ± 121680442.505 ops/s
* HashingBenchmark.hash3_varags:gc.alloc.rate thrpt 6 26514.569 ± 3709.819 MB/sec
*
*
* HashingBenchmark.hash4 thrpt 6 1866997193.226 ± 181198915.326 ops/s
* HashingBenchmark.hash4:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
*
* HashingBenchmark.hash4_varargs thrpt 6 702697142.147 ± 24458612.481 ops/s
* HashingBenchmark.hash4_varargs:gc.alloc.rate thrpt 6 21437.996 ± 748.911 MB/sec
*
*
* HashingBenchmark.hash5 thrpt 6 1803117534.112 ± 242918817.144 ops/s
* HashingBenchmark.hash5:gc.alloc.rate thrpt 6 0.001 ± 0.001 MB/sec
*
* HashingBenchmark.hash5_varargs thrpt 6 579139583.196 ± 29525483.594 ops/s
* HashingBenchmark.hash5_varargs:gc.alloc.rate thrpt 6 22082.357 ± 1125.413 MB/sec
* </code>
*/
@Fork(2)
@Warmup(iterations = 2)
@Measurement(iterations = 3)
@Threads(8)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps this adds only noise comparing doing it on a single core

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Actually, the opposite. What I've found is that the allocation / GC impact often isn't clear when you only use one core, so lately, I've been preferring more threads to show the "real" impact in an application.

public class HashingBenchmark {
static <T> T init(Supplier<T> supplier) {
return supplier.get();
}

// strings used in hashing are set up ahead of time, so that the only allocation is from var-args
static String[] TEST_STRINGS =
init(
() -> {
ThreadLocalRandom random = ThreadLocalRandom.current();

String[] strings = new String[1024];
for (int i = 0; i < strings.length; ++i) {
strings[i] = Double.toString(random.nextDouble());
}
return strings;
});

static {
Thread updaterThread =
new Thread(
() -> {
ThreadLocalRandom random = ThreadLocalRandom.current();

while (!Thread.interrupted()) {
str0 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
str1 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
str2 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
str3 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
str4 = TEST_STRINGS[random.nextInt(0, TEST_STRINGS.length)];
}
});
updaterThread.setDaemon(true);
updaterThread.start();
}

static String str0;
static String str1;
static String str2;
static String str3;
static String str4;

@Benchmark
public int hash2() {
return datadog.trace.util.HashingUtils.hash(str0, str1);
}

@Benchmark
public int hash2_varargs() {
return java.util.Objects.hash(str0, str1);
}

@Benchmark
public int hash3() {
return datadog.trace.util.HashingUtils.hash(str0, str1, str2);
}

@Benchmark
public int hash3_varags() {
return java.util.Objects.hash(str0, str1, str2);
}

@Benchmark
public int hash4() {
return datadog.trace.util.HashingUtils.hash(str0, str1, str2, str3);
}

@Benchmark
public int hash4_varargs() {
return java.util.Objects.hash(str0, str1, str2, str3);
}

@Benchmark
public int hash5() {
return datadog.trace.util.HashingUtils.hash(str0, str1, str2, str3, str4);
}

@Benchmark
public int hash5_varargs() {
return java.util.Objects.hash(str0, str1, str2, str3, str4);
}
}
158 changes: 158 additions & 0 deletions internal-api/src/main/java/datadog/trace/util/HashingUtils.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
package datadog.trace.util;

/**
* This class is intended to be a drop-in replacement for the hashing portions of java.util.Objects.
* This class provides more convenience methods for hashing primitives and includes overrides for
* <code>hash</code> that take many argument lengths to avoid var-args allocation.
*/
public final class HashingUtils {
private HashingUtils() {}

public static final int hashCode(Object obj) {
return obj != null ? obj.hashCode() : 0;
}

public static final int hash(boolean value) {
return Boolean.hashCode(value);
}

public static final int hash(char value) {
return Character.hashCode(value);
}

public static final int hash(byte value) {
return Byte.hashCode(value);
}

public static final int hash(short value) {
return Short.hashCode(value);
}

public static final int hash(int value) {
return Integer.hashCode(value);
}

public static final int hash(long value) {
return Long.hashCode(value);
}

public static final int hash(float value) {
return Float.hashCode(value);
}

public static final int hash(double value) {
return Double.hashCode(value);
}

public static final int hash(Object obj) {
return obj != null ? obj.hashCode() : 0;
}

public static final int hash(Object obj0, Object obj1) {
return hash(hash(obj0), hash(obj1));
}

public static final int hash(int hash0, int hash1) {
return 31 * hash0 + hash1;
}

public static final int hash(Object obj0, Object obj1, Object obj2) {
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2));
}

public static final int hash(int hash0, int hash1, int hash2) {
// DQH - Micro-optimizing, 31 * 31 will constant fold
// Since there are multiple execution ports for load & store,
// this will make good use of the core.
return 31 * 31 * hash0 + 31 * hash1 + hash2;
}

public static final int hash(Object obj0, Object obj1, Object obj2, Object obj3) {
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3));
}

public static final int hash(int hash0, int hash1, int hash2, int hash3) {
// DQH - Micro-optimizing, 31 * 31 will constant fold
// Since there are multiple execution ports for load & store,
// this will make good use of the core.
return 31 * 31 * 31 * hash0 + 31 * 31 * hash1 + 31 * hash2 + hash3;
}

public static final int hash(Object obj0, Object obj1, Object obj2, Object obj3, Object obj4) {
return hash(hashCode(obj0), hashCode(obj1), hashCode(obj2), hashCode(obj3));
}

public static final int hash(int hash0, int hash1, int hash2, int hash3, int hash4) {
// DQH - Micro-optimizing, 31 * 31 will constant fold
// Since there are multiple execution ports for load & store,
// this will make good use of the core.
return 31 * 31 * 31 * 31 * hash0 + 31 * 31 * 31 * hash1 + 31 * 31 * hash2 + 31 * hash3 + hash4;
}

@Deprecated
public static final int hash(int[] hashes) {
int result = 0;
for (int hash : hashes) {
result = addToHash(result, hash);
}
return result;
}

public static final int addToHash(int hash, int value) {
return 31 * hash + value;
}

public static final int addToHash(int hash, Object obj) {
return addToHash(hash, hashCode(obj));
}

public static final int addToHash(int hash, boolean value) {
return addToHash(hash, Boolean.hashCode(value));
}

public static final int addToHash(int hash, char value) {
return addToHash(hash, Character.hashCode(value));
}

public static final int addToHash(int hash, byte value) {
return addToHash(hash, Byte.hashCode(value));
}

public static final int addToHash(int hash, short value) {
return addToHash(hash, Short.hashCode(value));
}

public static final int addToHash(int hash, long value) {
return addToHash(hash, Long.hashCode(value));
}

public static final int addToHash(int hash, float value) {
return addToHash(hash, Float.hashCode(value));
}

public static final int addToHash(int hash, double value) {
return addToHash(hash, Double.hashCode(value));
}

public static final int hash(Iterable<?> objs) {
int result = 0;
for (Object obj : objs) {
result = addToHash(result, obj);
}
return result;
}

/**
* Calling this var-arg version can result in large amounts of allocation (see HashingBenchmark)
* Rather than calliing this method, add another override of hash that handles a larger number of
* arguments or use calls to addToHash.
*/
@Deprecated
public static final int hash(Object[] objs) {
int result = 0;
for (Object obj : objs) {
result = addToHash(result, obj);
}
return result;
}
}
Loading