Clean up ProbabilitySampler for 64 bit trace IDs (open-telemetry#238)

c24t · codeboten · web-flow · commit c50aab8c70e6 · 2020-02-13T09:32:17.000-08:00
Co-authored-by: alrex &lt;alrex.boten@gmail.com&gt;
diff --git a/opentelemetry-api/src/opentelemetry/trace/sampling.py b/opentelemetry-api/src/opentelemetry/trace/sampling.py
@@ -82,13 +82,13 @@ def __init__(self, rate: float):
         self._rate = rate
         self._bound = self.get_bound_for_rate(self._rate)
 
-    # The sampler checks the last 8 bytes of the trace ID to decide whether to
-    # sample a given trace.
-    CHECK_BYTES = 0xFFFFFFFFFFFFFFFF
+    # For compatibility with 64 bit trace IDs, the sampler checks the 64
+    # low-order bits of the trace ID to decide whether to sample a given trace.
+    TRACE_ID_LIMIT = (1 << 64) - 1
 
     @classmethod
     def get_bound_for_rate(cls, rate: float) -> int:
-        return round(rate * (cls.CHECK_BYTES + 1))
+        return round(rate * (cls.TRACE_ID_LIMIT + 1))
 
     @property
     def rate(self) -> float:
@@ -115,7 +115,7 @@ def should_sample(
         if parent_context is not None:
             return Decision(parent_context.trace_options.sampled)
 
-        return Decision(trace_id & self.CHECK_BYTES < self.bound)
+        return Decision(trace_id & self.TRACE_ID_LIMIT < self.bound)
 
 
 # Samplers that ignore the parent sampling decision and never/always sample.
diff --git a/opentelemetry-api/tests/trace/test_sampling.py b/opentelemetry-api/tests/trace/test_sampling.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import sys
 import unittest
 
 from opentelemetry import trace
@@ -137,7 +138,7 @@ def test_probability_sampler(self):
                 trace.SpanContext(
                     0xDEADBEF0, 0xDEADBEF1, trace_options=TO_DEFAULT
                 ),
-                0x8000000000000000,
+                0x7FFFFFFFFFFFFFFF,
                 0xDEADBEEF,
                 "span name",
             ).sampled
@@ -147,7 +148,7 @@ def test_probability_sampler(self):
                 trace.SpanContext(
                     0xDEADBEF0, 0xDEADBEF1, trace_options=TO_SAMPLED
                 ),
-                0x8000000000000001,
+                0x8000000000000000,
                 0xDEADBEEF,
                 "span name",
             ).sampled
@@ -189,14 +190,13 @@ def test_probability_sampler_limits(self):
             sampling.ProbabilitySampler.get_bound_for_rate(2 ** -64), 0x1
         )
 
-        # Sample every trace with (last 8 bytes of) trace ID less than
-        # 0xffffffffffffffff. In principle this is the highest possible
-        # sampling rate less than 1, but we can't actually express this rate as
-        # a float!
+        # Sample every trace with trace ID less than 0xffffffffffffffff. In
+        # principle this is the highest possible sampling rate less than 1, but
+        # we can't actually express this rate as a float!
         #
         # In practice, the highest possible sampling rate is:
         #
-        #     round(sys.float_info.epsilon * 2 ** 64)
+        #     1 - sys.float_info.epsilon
 
         almost_always_on = sampling.ProbabilitySampler(1 - 2 ** -64)
         self.assertTrue(
@@ -212,12 +212,29 @@ def test_probability_sampler_limits(self):
         # self.assertFalse(
         #     almost_always_on.should_sample(
         #         None,
-        #         0xffffffffffffffff,
-        #         0xdeadbeef,
+        #         0xFFFFFFFFFFFFFFFF,
+        #         0xDEADBEEF,
         #         "span name",
         #     ).sampled
         # )
         # self.assertEqual(
         #     sampling.ProbabilitySampler.get_bound_for_rate(1 - 2 ** -64)),
-        #     0xffffffffffffffff,
+        #     0xFFFFFFFFFFFFFFFF,
         # )
+
+        # Check that a sampler with the highest effective sampling rate < 1
+        # refuses to sample traces with trace ID 0xffffffffffffffff.
+        almost_almost_always_on = sampling.ProbabilitySampler(
+            1 - sys.float_info.epsilon
+        )
+        self.assertFalse(
+            almost_almost_always_on.should_sample(
+                None, 0xFFFFFFFFFFFFFFFF, 0xDEADBEEF, "span name"
+            ).sampled
+        )
+        # Check that the higest effective sampling rate is actually lower than
+        # the highest theoretical sampling rate. If this test fails the test
+        # above is wrong.
+        self.assertLess(
+            almost_almost_always_on.bound, 0xFFFFFFFFFFFFFFFF,
+        )