From 65ab2335ea0abca58a87142ef0fcb86c9bc9b8ce Mon Sep 17 00:00:00 2001 From: Matt Jacobs Date: Fri, 22 May 2015 20:29:35 -0700 Subject: [PATCH 1/2] Unit test demonstrating unsafe usage of IntCountsHistogram on write path, and fix --- .../util/HystrixRollingPercentile.java | 15 ++++---- .../util/HystrixRollingPercentileTest.java | 36 +++++++++++++++++++ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java b/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java index 2d7ab448d..2c6b09b67 100644 --- a/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java +++ b/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java @@ -24,6 +24,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.HdrHistogram.IntCountsHistogram; +import org.HdrHistogram.Recorder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -281,22 +282,18 @@ public void reset() { buckets.clear(); } - private static class PercentileBucketData { - private final IntCountsHistogram histogram; + /*package-private*/ static class PercentileBucketData { + final Recorder recorder; public PercentileBucketData() { - this.histogram = new IntCountsHistogram(4); + this.recorder = new Recorder(4); } public void addValue(int... latency) { for (int l: latency) { - histogram.recordValue(l); + recorder.recordValue(l); } } - - public int length() { - return (int) histogram.getTotalCount(); - } } /** @@ -331,7 +328,7 @@ public int length() { /* package for testing */ PercentileSnapshot(Bucket[] buckets) { aggregateHistogram = new IntCountsHistogram(4); for (Bucket bucket: buckets) { - aggregateHistogram.add(bucket.bucketData.histogram); + aggregateHistogram.add(bucket.bucketData.recorder.getIntervalHistogram()); } count = aggregateHistogram.getTotalCount(); diff --git a/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java b/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java index fba6d421a..5be811cca 100644 --- a/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java +++ b/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java @@ -394,6 +394,42 @@ public void run() { System.out.println(p.getMean() + " : " + p.getPercentile(50) + " : " + p.getPercentile(75) + " : " + p.getPercentile(90) + " : " + p.getPercentile(95) + " : " + p.getPercentile(99)); } + @Test + public void testWriteThreadSafety() { + final MockedTime time = new MockedTime(); + final HystrixRollingPercentile p = new HystrixRollingPercentile(time, HystrixProperty.Factory.asProperty(100), HystrixProperty.Factory.asProperty(25), HystrixProperty.Factory.asProperty(true)); + + final int NUM_THREADS = 1000; + final int NUM_ITERATIONS = 1000000; + + final CountDownLatch latch = new CountDownLatch(NUM_THREADS); + + final Random r = new Random(); + + final AtomicInteger added = new AtomicInteger(0); + + for (int i = 0; i < NUM_THREADS; i++) { + threadPool.submit(new Runnable() { + @Override + public void run() { + for (int j = 1; j < NUM_ITERATIONS / NUM_THREADS + 1; j++) { + int nextInt = r.nextInt(100); + p.addValue(nextInt); + added.getAndIncrement(); + } + latch.countDown(); + } + }); + } + + try { + latch.await(100, TimeUnit.SECONDS); + assertEquals(added.get(), p.buckets.peekLast().bucketData.recorder.getIntervalHistogram().getTotalCount()); + } catch (InterruptedException ex) { + fail("Timeout on all threads writing percentiles"); + } + } + @Test public void testThreadSafetyMulti() { for (int i = 0; i < 100; i++) { From 2639ce4e60818e1710835111e990035d2d2e6b8a Mon Sep 17 00:00:00 2001 From: Matt Jacobs Date: Sat, 23 May 2015 14:39:09 -0700 Subject: [PATCH 2/2] Fix to only get an interval histogram once per recorder --- .../hystrix/util/HystrixRollingPercentile.java | 9 +++++++-- .../hystrix/util/HystrixRollingPercentileTest.java | 11 +++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java b/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java index 2c6b09b67..c3c8b14e6 100644 --- a/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java +++ b/hystrix-core/src/main/java/com/netflix/hystrix/util/HystrixRollingPercentile.java @@ -23,6 +23,7 @@ import java.util.concurrent.atomic.AtomicReferenceArray; import java.util.concurrent.locks.ReentrantLock; +import org.HdrHistogram.Histogram; import org.HdrHistogram.IntCountsHistogram; import org.HdrHistogram.Recorder; import org.slf4j.Logger; @@ -284,6 +285,7 @@ public void reset() { /*package-private*/ static class PercentileBucketData { final Recorder recorder; + final AtomicReference stableHistogram = new AtomicReference(null); public PercentileBucketData() { this.recorder = new Recorder(4); @@ -300,7 +302,7 @@ public void addValue(int... latency) { * @NotThreadSafe */ /* package for testing */ static class PercentileSnapshot { - private final IntCountsHistogram aggregateHistogram; + /* package-private*/ final IntCountsHistogram aggregateHistogram; private final long count; private final int mean; private final int p0; @@ -328,7 +330,10 @@ public void addValue(int... latency) { /* package for testing */ PercentileSnapshot(Bucket[] buckets) { aggregateHistogram = new IntCountsHistogram(4); for (Bucket bucket: buckets) { - aggregateHistogram.add(bucket.bucketData.recorder.getIntervalHistogram()); + PercentileBucketData bucketData = bucket.bucketData; + //if stable snapshot not already generated, generate it now + bucketData.stableHistogram.compareAndSet(null, bucketData.recorder.getIntervalHistogram()); + aggregateHistogram.add(bucket.bucketData.stableHistogram.get()); } count = aggregateHistogram.getTotalCount(); diff --git a/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java b/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java index 5be811cca..336740db1 100644 --- a/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java +++ b/hystrix-core/src/test/java/com/netflix/hystrix/util/HystrixRollingPercentileTest.java @@ -162,9 +162,16 @@ public void testSampleDataOverTime1() { MockedTime time = new MockedTime(); HystrixRollingPercentile p = new HystrixRollingPercentile(time, timeInMilliseconds, numberOfBuckets, enabled); int previousTime = 0; + + int maxSoFar = -1; + for (int i = 0; i < SampleDataHolder1.data.length; i++) { int timeInMillisecondsSinceStart = SampleDataHolder1.data[i][0]; int latency = SampleDataHolder1.data[i][1]; + if (latency > maxSoFar) { + System.out.println("New MAX latency : " + latency); + maxSoFar = latency; + } time.increment(timeInMillisecondsSinceStart - previousTime); previousTime = timeInMillisecondsSinceStart; p.addValue(latency); @@ -181,6 +188,8 @@ public void testSampleDataOverTime1() { System.out.println("Median: " + p.getPercentile(50)); System.out.println("Median: " + p.getPercentile(50)); + System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue()); + /* * In a loop as a use case was found where very different values were calculated in subsequent requests. */ @@ -216,6 +225,8 @@ public void testSampleDataOverTime2() { System.out.println("99.5th: " + p.getPercentile(99.5)); System.out.println("99.99: " + p.getPercentile(99.99)); + System.out.println("MAX : " + p.currentPercentileSnapshot.aggregateHistogram.getMaxValue()); + if (p.getPercentile(50) > 90 || p.getPercentile(50) < 50) { fail("We expect around 60-70 but got: " + p.getPercentile(50)); }