|
15 | 15 | #include "postgres.h"
|
16 | 16 |
|
17 | 17 | #include <ctype.h>
|
| 18 | +#include <math.h> |
18 | 19 |
|
19 | 20 | #include "access/htup_details.h"
|
| 21 | +#include "catalog/pg_type.h" |
20 | 22 | #include "funcapi.h"
|
21 | 23 | #include "libpq/pqformat.h"
|
22 | 24 | #include "utils/array.h"
|
@@ -130,6 +132,15 @@ static ArrayType *array_replace_internal(ArrayType *array,
|
130 | 132 | Datum replace, bool replace_isnull,
|
131 | 133 | bool remove, Oid collation,
|
132 | 134 | FunctionCallInfo fcinfo);
|
| 135 | +static int width_bucket_array_float8(Datum operand, ArrayType *thresholds); |
| 136 | +static int width_bucket_array_fixed(Datum operand, |
| 137 | + ArrayType *thresholds, |
| 138 | + Oid collation, |
| 139 | + TypeCacheEntry *typentry); |
| 140 | +static int width_bucket_array_variable(Datum operand, |
| 141 | + ArrayType *thresholds, |
| 142 | + Oid collation, |
| 143 | + TypeCacheEntry *typentry); |
133 | 144 |
|
134 | 145 |
|
135 | 146 | /*
|
@@ -5502,3 +5513,235 @@ array_replace(PG_FUNCTION_ARGS)
|
5502 | 5513 | fcinfo);
|
5503 | 5514 | PG_RETURN_ARRAYTYPE_P(array);
|
5504 | 5515 | }
|
| 5516 | + |
| 5517 | +/* |
| 5518 | + * Implements width_bucket(anyelement, anyarray). |
| 5519 | + * |
| 5520 | + * 'thresholds' is an array containing lower bound values for each bucket; |
| 5521 | + * these must be sorted from smallest to largest, or bogus results will be |
| 5522 | + * produced. If N thresholds are supplied, the output is from 0 to N: |
| 5523 | + * 0 is for inputs < first threshold, N is for inputs >= last threshold. |
| 5524 | + */ |
| 5525 | +Datum |
| 5526 | +width_bucket_array(PG_FUNCTION_ARGS) |
| 5527 | +{ |
| 5528 | + Datum operand = PG_GETARG_DATUM(0); |
| 5529 | + ArrayType *thresholds = PG_GETARG_ARRAYTYPE_P(1); |
| 5530 | + Oid collation = PG_GET_COLLATION(); |
| 5531 | + Oid element_type = ARR_ELEMTYPE(thresholds); |
| 5532 | + int result; |
| 5533 | + |
| 5534 | + /* Check input */ |
| 5535 | + if (ARR_NDIM(thresholds) > 1) |
| 5536 | + ereport(ERROR, |
| 5537 | + (errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR), |
| 5538 | + errmsg("thresholds must be one-dimensional array"))); |
| 5539 | + |
| 5540 | + if (array_contains_nulls(thresholds)) |
| 5541 | + ereport(ERROR, |
| 5542 | + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), |
| 5543 | + errmsg("thresholds array must not contain NULLs"))); |
| 5544 | + |
| 5545 | + /* We have a dedicated implementation for float8 data */ |
| 5546 | + if (element_type == FLOAT8OID) |
| 5547 | + result = width_bucket_array_float8(operand, thresholds); |
| 5548 | + else |
| 5549 | + { |
| 5550 | + TypeCacheEntry *typentry; |
| 5551 | + |
| 5552 | + /* Cache information about the input type */ |
| 5553 | + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; |
| 5554 | + if (typentry == NULL || |
| 5555 | + typentry->type_id != element_type) |
| 5556 | + { |
| 5557 | + typentry = lookup_type_cache(element_type, |
| 5558 | + TYPECACHE_CMP_PROC_FINFO); |
| 5559 | + if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) |
| 5560 | + ereport(ERROR, |
| 5561 | + (errcode(ERRCODE_UNDEFINED_FUNCTION), |
| 5562 | + errmsg("could not identify a comparison function for type %s", |
| 5563 | + format_type_be(element_type)))); |
| 5564 | + fcinfo->flinfo->fn_extra = (void *) typentry; |
| 5565 | + } |
| 5566 | + |
| 5567 | + /* |
| 5568 | + * We have separate implementation paths for fixed- and variable-width |
| 5569 | + * types, since indexing the array is a lot cheaper in the first case. |
| 5570 | + */ |
| 5571 | + if (typentry->typlen > 0) |
| 5572 | + result = width_bucket_array_fixed(operand, thresholds, |
| 5573 | + collation, typentry); |
| 5574 | + else |
| 5575 | + result = width_bucket_array_variable(operand, thresholds, |
| 5576 | + collation, typentry); |
| 5577 | + } |
| 5578 | + |
| 5579 | + /* Avoid leaking memory when handed toasted input. */ |
| 5580 | + PG_FREE_IF_COPY(thresholds, 1); |
| 5581 | + |
| 5582 | + PG_RETURN_INT32(result); |
| 5583 | +} |
| 5584 | + |
| 5585 | +/* |
| 5586 | + * width_bucket_array for float8 data. |
| 5587 | + */ |
| 5588 | +static int |
| 5589 | +width_bucket_array_float8(Datum operand, ArrayType *thresholds) |
| 5590 | +{ |
| 5591 | + float8 op = DatumGetFloat8(operand); |
| 5592 | + float8 *thresholds_data; |
| 5593 | + int left; |
| 5594 | + int right; |
| 5595 | + |
| 5596 | + /* |
| 5597 | + * Since we know the array contains no NULLs, we can just index it |
| 5598 | + * directly. |
| 5599 | + */ |
| 5600 | + thresholds_data = (float8 *) ARR_DATA_PTR(thresholds); |
| 5601 | + |
| 5602 | + left = 0; |
| 5603 | + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); |
| 5604 | + |
| 5605 | + /* |
| 5606 | + * If the probe value is a NaN, it's greater than or equal to all possible |
| 5607 | + * threshold values (including other NaNs), so we need not search. Note |
| 5608 | + * that this would give the same result as searching even if the array |
| 5609 | + * contains multiple NaNs (as long as they're correctly sorted), since the |
| 5610 | + * loop logic will find the rightmost of multiple equal threshold values. |
| 5611 | + */ |
| 5612 | + if (isnan(op)) |
| 5613 | + return right; |
| 5614 | + |
| 5615 | + /* Find the bucket */ |
| 5616 | + while (left < right) |
| 5617 | + { |
| 5618 | + int mid = (left + right) / 2; |
| 5619 | + |
| 5620 | + if (isnan(thresholds_data[mid]) || op < thresholds_data[mid]) |
| 5621 | + right = mid; |
| 5622 | + else |
| 5623 | + left = mid + 1; |
| 5624 | + } |
| 5625 | + |
| 5626 | + return left; |
| 5627 | +} |
| 5628 | + |
| 5629 | +/* |
| 5630 | + * width_bucket_array for generic fixed-width data types. |
| 5631 | + */ |
| 5632 | +static int |
| 5633 | +width_bucket_array_fixed(Datum operand, |
| 5634 | + ArrayType *thresholds, |
| 5635 | + Oid collation, |
| 5636 | + TypeCacheEntry *typentry) |
| 5637 | +{ |
| 5638 | + char *thresholds_data; |
| 5639 | + int typlen = typentry->typlen; |
| 5640 | + bool typbyval = typentry->typbyval; |
| 5641 | + FunctionCallInfoData locfcinfo; |
| 5642 | + int left; |
| 5643 | + int right; |
| 5644 | + |
| 5645 | + /* |
| 5646 | + * Since we know the array contains no NULLs, we can just index it |
| 5647 | + * directly. |
| 5648 | + */ |
| 5649 | + thresholds_data = (char *) ARR_DATA_PTR(thresholds); |
| 5650 | + |
| 5651 | + InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2, |
| 5652 | + collation, NULL, NULL); |
| 5653 | + |
| 5654 | + /* Find the bucket */ |
| 5655 | + left = 0; |
| 5656 | + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); |
| 5657 | + while (left < right) |
| 5658 | + { |
| 5659 | + int mid = (left + right) / 2; |
| 5660 | + char *ptr; |
| 5661 | + int32 cmpresult; |
| 5662 | + |
| 5663 | + ptr = thresholds_data + mid * typlen; |
| 5664 | + |
| 5665 | + locfcinfo.arg[0] = operand; |
| 5666 | + locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen); |
| 5667 | + locfcinfo.argnull[0] = false; |
| 5668 | + locfcinfo.argnull[1] = false; |
| 5669 | + locfcinfo.isnull = false; |
| 5670 | + |
| 5671 | + cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo)); |
| 5672 | + |
| 5673 | + if (cmpresult < 0) |
| 5674 | + right = mid; |
| 5675 | + else |
| 5676 | + left = mid + 1; |
| 5677 | + } |
| 5678 | + |
| 5679 | + return left; |
| 5680 | +} |
| 5681 | + |
| 5682 | +/* |
| 5683 | + * width_bucket_array for generic variable-width data types. |
| 5684 | + */ |
| 5685 | +static int |
| 5686 | +width_bucket_array_variable(Datum operand, |
| 5687 | + ArrayType *thresholds, |
| 5688 | + Oid collation, |
| 5689 | + TypeCacheEntry *typentry) |
| 5690 | +{ |
| 5691 | + char *thresholds_data; |
| 5692 | + int typlen = typentry->typlen; |
| 5693 | + bool typbyval = typentry->typbyval; |
| 5694 | + char typalign = typentry->typalign; |
| 5695 | + FunctionCallInfoData locfcinfo; |
| 5696 | + int left; |
| 5697 | + int right; |
| 5698 | + |
| 5699 | + thresholds_data = (char *) ARR_DATA_PTR(thresholds); |
| 5700 | + |
| 5701 | + InitFunctionCallInfoData(locfcinfo, &typentry->cmp_proc_finfo, 2, |
| 5702 | + collation, NULL, NULL); |
| 5703 | + |
| 5704 | + /* Find the bucket */ |
| 5705 | + left = 0; |
| 5706 | + right = ArrayGetNItems(ARR_NDIM(thresholds), ARR_DIMS(thresholds)); |
| 5707 | + while (left < right) |
| 5708 | + { |
| 5709 | + int mid = (left + right) / 2; |
| 5710 | + char *ptr; |
| 5711 | + int i; |
| 5712 | + int32 cmpresult; |
| 5713 | + |
| 5714 | + /* Locate mid'th array element by advancing from left element */ |
| 5715 | + ptr = thresholds_data; |
| 5716 | + for (i = left; i < mid; i++) |
| 5717 | + { |
| 5718 | + ptr = att_addlength_pointer(ptr, typlen, ptr); |
| 5719 | + ptr = (char *) att_align_nominal(ptr, typalign); |
| 5720 | + } |
| 5721 | + |
| 5722 | + locfcinfo.arg[0] = operand; |
| 5723 | + locfcinfo.arg[1] = fetch_att(ptr, typbyval, typlen); |
| 5724 | + locfcinfo.argnull[0] = false; |
| 5725 | + locfcinfo.argnull[1] = false; |
| 5726 | + locfcinfo.isnull = false; |
| 5727 | + |
| 5728 | + cmpresult = DatumGetInt32(FunctionCallInvoke(&locfcinfo)); |
| 5729 | + |
| 5730 | + if (cmpresult < 0) |
| 5731 | + right = mid; |
| 5732 | + else |
| 5733 | + { |
| 5734 | + left = mid + 1; |
| 5735 | + |
| 5736 | + /* |
| 5737 | + * Move the thresholds pointer to match new "left" index, so we |
| 5738 | + * don't have to seek over those elements again. This trick |
| 5739 | + * ensures we do only O(N) array indexing work, not O(N^2). |
| 5740 | + */ |
| 5741 | + ptr = att_addlength_pointer(ptr, typlen, ptr); |
| 5742 | + thresholds_data = (char *) att_align_nominal(ptr, typalign); |
| 5743 | + } |
| 5744 | + } |
| 5745 | + |
| 5746 | + return left; |
| 5747 | +} |
0 commit comments