|
14 | 14 | #include "postgres.h"
|
15 | 15 |
|
16 | 16 | #include "access/hash.h"
|
| 17 | +#include "lib/hyperloglog.h" |
17 | 18 | #include "libpq/pqformat.h"
|
| 19 | +#include "port/pg_bswap.h" |
18 | 20 | #include "utils/builtins.h"
|
| 21 | +#include "utils/guc.h" |
19 | 22 | #include "utils/inet.h"
|
| 23 | +#include "utils/sortsupport.h" |
20 | 24 |
|
21 | 25 |
|
22 | 26 | /*
|
|
29 | 33 | #define lobits(addr) \
|
30 | 34 | ((unsigned long)(((addr)->d<<16)|((addr)->e<<8)|((addr)->f)))
|
31 | 35 |
|
| 36 | +/* sortsupport for macaddr */ |
| 37 | +typedef struct |
| 38 | +{ |
| 39 | + int64 input_count; /* number of non-null values seen */ |
| 40 | + bool estimating; /* true if estimating cardinality */ |
| 41 | + |
| 42 | + hyperLogLogState abbr_card; /* cardinality estimator */ |
| 43 | +} macaddr_sortsupport_state; |
| 44 | + |
| 45 | +static int macaddr_cmp_internal(macaddr *a1, macaddr *a2); |
| 46 | +static int macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup); |
| 47 | +static int macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup); |
| 48 | +static bool macaddr_abbrev_abort(int memtupcount, SortSupport ssup); |
| 49 | +static Datum macaddr_abbrev_convert(Datum original, SortSupport ssup); |
| 50 | + |
32 | 51 | /*
|
33 | 52 | * MAC address reader. Accepts several common notations.
|
34 | 53 | */
|
@@ -159,7 +178,7 @@ macaddr_send(PG_FUNCTION_ARGS)
|
159 | 178 | * Comparison function for sorting:
|
160 | 179 | */
|
161 | 180 |
|
162 |
| -static int32 |
| 181 | +static int |
163 | 182 | macaddr_cmp_internal(macaddr *a1, macaddr *a2)
|
164 | 183 | {
|
165 | 184 | if (hibits(a1) < hibits(a2))
|
@@ -326,3 +345,194 @@ macaddr_trunc(PG_FUNCTION_ARGS)
|
326 | 345 |
|
327 | 346 | PG_RETURN_MACADDR_P(result);
|
328 | 347 | }
|
| 348 | + |
| 349 | +/* |
| 350 | + * SortSupport strategy function. Populates a SortSupport struct with the |
| 351 | + * information necessary to use comparison by abbreviated keys. |
| 352 | + */ |
| 353 | +Datum |
| 354 | +macaddr_sortsupport(PG_FUNCTION_ARGS) |
| 355 | +{ |
| 356 | + SortSupport ssup = (SortSupport) PG_GETARG_POINTER(0); |
| 357 | + |
| 358 | + ssup->comparator = macaddr_fast_cmp; |
| 359 | + ssup->ssup_extra = NULL; |
| 360 | + |
| 361 | + if (ssup->abbreviate) |
| 362 | + { |
| 363 | + macaddr_sortsupport_state *uss; |
| 364 | + MemoryContext oldcontext; |
| 365 | + |
| 366 | + oldcontext = MemoryContextSwitchTo(ssup->ssup_cxt); |
| 367 | + |
| 368 | + uss = palloc(sizeof(macaddr_sortsupport_state)); |
| 369 | + uss->input_count = 0; |
| 370 | + uss->estimating = true; |
| 371 | + initHyperLogLog(&uss->abbr_card, 10); |
| 372 | + |
| 373 | + ssup->ssup_extra = uss; |
| 374 | + |
| 375 | + ssup->comparator = macaddr_cmp_abbrev; |
| 376 | + ssup->abbrev_converter = macaddr_abbrev_convert; |
| 377 | + ssup->abbrev_abort = macaddr_abbrev_abort; |
| 378 | + ssup->abbrev_full_comparator = macaddr_fast_cmp; |
| 379 | + |
| 380 | + MemoryContextSwitchTo(oldcontext); |
| 381 | + } |
| 382 | + |
| 383 | + PG_RETURN_VOID(); |
| 384 | +} |
| 385 | + |
| 386 | +/* |
| 387 | + * SortSupport "traditional" comparison function. Pulls two MAC addresses from |
| 388 | + * the heap and runs a standard comparison on them. |
| 389 | + */ |
| 390 | +static int |
| 391 | +macaddr_fast_cmp(Datum x, Datum y, SortSupport ssup) |
| 392 | +{ |
| 393 | + macaddr *arg1 = DatumGetMacaddrP(x); |
| 394 | + macaddr *arg2 = DatumGetMacaddrP(y); |
| 395 | + |
| 396 | + return macaddr_cmp_internal(arg1, arg2); |
| 397 | +} |
| 398 | + |
| 399 | +/* |
| 400 | + * SortSupport abbreviated key comparison function. Compares two MAC addresses |
| 401 | + * quickly by treating them like integers, and without having to go the heap. |
| 402 | + */ |
| 403 | +static int |
| 404 | +macaddr_cmp_abbrev(Datum x, Datum y, SortSupport ssup) |
| 405 | +{ |
| 406 | + if (x > y) |
| 407 | + return 1; |
| 408 | + else if (x == y) |
| 409 | + return 0; |
| 410 | + else |
| 411 | + return -1; |
| 412 | +} |
| 413 | + |
| 414 | +/* |
| 415 | + * Callback for estimating effectiveness of abbreviated key optimization. |
| 416 | + * |
| 417 | + * We pay no attention to the cardinality of the non-abbreviated data, because |
| 418 | + * there is no equality fast-path within authoritative macaddr comparator. |
| 419 | + */ |
| 420 | +static bool |
| 421 | +macaddr_abbrev_abort(int memtupcount, SortSupport ssup) |
| 422 | +{ |
| 423 | + macaddr_sortsupport_state *uss = ssup->ssup_extra; |
| 424 | + double abbr_card; |
| 425 | + |
| 426 | + if (memtupcount < 10000 || uss->input_count < 10000 || !uss->estimating) |
| 427 | + return false; |
| 428 | + |
| 429 | + abbr_card = estimateHyperLogLog(&uss->abbr_card); |
| 430 | + |
| 431 | + /* |
| 432 | + * If we have >100k distinct values, then even if we were sorting many |
| 433 | + * billion rows we'd likely still break even, and the penalty of undoing |
| 434 | + * that many rows of abbrevs would probably not be worth it. At this point |
| 435 | + * we stop counting because we know that we're now fully committed. |
| 436 | + */ |
| 437 | + if (abbr_card > 100000.0) |
| 438 | + { |
| 439 | +#ifdef TRACE_SORT |
| 440 | + if (trace_sort) |
| 441 | + elog(LOG, |
| 442 | + "macaddr_abbrev: estimation ends at cardinality %f" |
| 443 | + " after " INT64_FORMAT " values (%d rows)", |
| 444 | + abbr_card, uss->input_count, memtupcount); |
| 445 | +#endif |
| 446 | + uss->estimating = false; |
| 447 | + return false; |
| 448 | + } |
| 449 | + |
| 450 | + /* |
| 451 | + * Target minimum cardinality is 1 per ~2k of non-null inputs. 0.5 row |
| 452 | + * fudge factor allows us to abort earlier on genuinely pathological data |
| 453 | + * where we've had exactly one abbreviated value in the first 2k |
| 454 | + * (non-null) rows. |
| 455 | + */ |
| 456 | + if (abbr_card < uss->input_count / 2000.0 + 0.5) |
| 457 | + { |
| 458 | +#ifdef TRACE_SORT |
| 459 | + if (trace_sort) |
| 460 | + elog(LOG, |
| 461 | + "macaddr_abbrev: aborting abbreviation at cardinality %f" |
| 462 | + " below threshold %f after " INT64_FORMAT " values (%d rows)", |
| 463 | + abbr_card, uss->input_count / 2000.0 + 0.5, uss->input_count, |
| 464 | + memtupcount); |
| 465 | +#endif |
| 466 | + return true; |
| 467 | + } |
| 468 | + |
| 469 | +#ifdef TRACE_SORT |
| 470 | + if (trace_sort) |
| 471 | + elog(LOG, |
| 472 | + "macaddr_abbrev: cardinality %f after " INT64_FORMAT |
| 473 | + " values (%d rows)", abbr_card, uss->input_count, memtupcount); |
| 474 | +#endif |
| 475 | + |
| 476 | + return false; |
| 477 | +} |
| 478 | + |
| 479 | +/* |
| 480 | + * SortSupport converstion routine. Converts original macaddr representation |
| 481 | + * to abbreviated key representation. |
| 482 | + * |
| 483 | + * Packs the bytes of a 6-byte MAC address into a Datum and treats it as an |
| 484 | + * unsigned integer for purposes of comparison. On a 64-bit machine, there |
| 485 | + * will be two zeroed bytes of padding. The integer is converted to native |
| 486 | + * endianness to facilitate easy comparison. |
| 487 | + */ |
| 488 | +static Datum |
| 489 | +macaddr_abbrev_convert(Datum original, SortSupport ssup) |
| 490 | +{ |
| 491 | + macaddr_sortsupport_state *uss = ssup->ssup_extra; |
| 492 | + macaddr *authoritative = DatumGetMacaddrP(original); |
| 493 | + Datum res; |
| 494 | + |
| 495 | + /* |
| 496 | + * On a 64-bit machine, zero out the 8-byte datum and copy the 6 bytes of |
| 497 | + * the MAC address in. There will be two bytes of zero padding on the end |
| 498 | + * of the least significant bits. |
| 499 | + */ |
| 500 | +#if SIZEOF_DATUM == 8 |
| 501 | + memset(&res, 0, SIZEOF_DATUM); |
| 502 | + memcpy(&res, authoritative, sizeof(macaddr)); |
| 503 | +#else /* SIZEOF_DATUM != 8 */ |
| 504 | + memcpy(&res, authoritative, SIZEOF_DATUM); |
| 505 | +#endif |
| 506 | + uss->input_count += 1; |
| 507 | + |
| 508 | + /* |
| 509 | + * Cardinality estimation. The estimate uses uint32, so on a 64-bit |
| 510 | + * architecture, XOR the two 32-bit halves together to produce slightly |
| 511 | + * more entropy. The two zeroed bytes won't have any practical impact on |
| 512 | + * this operation. |
| 513 | + */ |
| 514 | + if (uss->estimating) |
| 515 | + { |
| 516 | + uint32 tmp; |
| 517 | + |
| 518 | +#if SIZEOF_DATUM == 8 |
| 519 | + tmp = (uint32) res ^ (uint32) ((uint64) res >> 32); |
| 520 | +#else /* SIZEOF_DATUM != 8 */ |
| 521 | + tmp = (uint32) res; |
| 522 | +#endif |
| 523 | + |
| 524 | + addHyperLogLog(&uss->abbr_card, DatumGetUInt32(hash_uint32(tmp))); |
| 525 | + } |
| 526 | + |
| 527 | + /* |
| 528 | + * Byteswap on little-endian machines. |
| 529 | + * |
| 530 | + * This is needed so that macaddr_cmp_abbrev() (an unsigned integer 3-way |
| 531 | + * comparator) works correctly on all platforms. Without this, the |
| 532 | + * comparator would have to call memcmp() with a pair of pointers to the |
| 533 | + * first byte of each abbreviated key, which is slower. |
| 534 | + */ |
| 535 | + res = DatumBigEndianToNative(res); |
| 536 | + |
| 537 | + return res; |
| 538 | +} |
0 commit comments