|
15 | 15 | #include "catalog/pg_type.h"
|
16 | 16 | #include "libpq/pqformat.h"
|
17 | 17 | #include "common/int.h"
|
| 18 | +#include "common/pg_prng.h" |
18 | 19 | #include "port/pg_bitutils.h"
|
19 | 20 | #include "utils/array.h"
|
20 | 21 | #include "utils/datum.h"
|
@@ -1525,3 +1526,168 @@ array_positions(PG_FUNCTION_ARGS)
|
1525 | 1526 |
|
1526 | 1527 | PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
|
1527 | 1528 | }
|
| 1529 | + |
| 1530 | +/* |
| 1531 | + * array_shuffle_n |
| 1532 | + * Return a copy of array with n randomly chosen items. |
| 1533 | + * |
| 1534 | + * The number of items must not exceed the size of the first dimension of the |
| 1535 | + * array. We preserve the first dimension's lower bound if keep_lb, |
| 1536 | + * else it's set to 1. Lower-order dimensions are preserved in any case. |
| 1537 | + * |
| 1538 | + * NOTE: it would be cleaner to look up the elmlen/elmbval/elmalign info |
| 1539 | + * from the system catalogs, given only the elmtyp. However, the caller is |
| 1540 | + * in a better position to cache this info across multiple calls. |
| 1541 | + */ |
| 1542 | +static ArrayType * |
| 1543 | +array_shuffle_n(ArrayType *array, int n, bool keep_lb, |
| 1544 | + Oid elmtyp, TypeCacheEntry *typentry) |
| 1545 | +{ |
| 1546 | + ArrayType *result; |
| 1547 | + int ndim, |
| 1548 | + *dims, |
| 1549 | + *lbs, |
| 1550 | + nelm, |
| 1551 | + nitem, |
| 1552 | + rdims[MAXDIM], |
| 1553 | + rlbs[MAXDIM]; |
| 1554 | + int16 elmlen; |
| 1555 | + bool elmbyval; |
| 1556 | + char elmalign; |
| 1557 | + Datum *elms, |
| 1558 | + *ielms; |
| 1559 | + bool *nuls, |
| 1560 | + *inuls; |
| 1561 | + |
| 1562 | + ndim = ARR_NDIM(array); |
| 1563 | + dims = ARR_DIMS(array); |
| 1564 | + lbs = ARR_LBOUND(array); |
| 1565 | + |
| 1566 | + elmlen = typentry->typlen; |
| 1567 | + elmbyval = typentry->typbyval; |
| 1568 | + elmalign = typentry->typalign; |
| 1569 | + |
| 1570 | + /* If the target array is empty, exit fast */ |
| 1571 | + if (ndim < 1 || dims[0] < 1 || n < 1) |
| 1572 | + return construct_empty_array(elmtyp); |
| 1573 | + |
| 1574 | + deconstruct_array(array, elmtyp, elmlen, elmbyval, elmalign, |
| 1575 | + &elms, &nuls, &nelm); |
| 1576 | + |
| 1577 | + nitem = dims[0]; /* total number of items */ |
| 1578 | + nelm /= nitem; /* number of elements per item */ |
| 1579 | + |
| 1580 | + Assert(n <= nitem); /* else it's caller error */ |
| 1581 | + |
| 1582 | + /* |
| 1583 | + * Shuffle array using Fisher-Yates algorithm. Scan the array and swap |
| 1584 | + * current item (nelm datums starting at ielms) with a randomly chosen |
| 1585 | + * later item (nelm datums starting at jelms) in each iteration. We can |
| 1586 | + * stop once we've done n iterations; then first n items are the result. |
| 1587 | + */ |
| 1588 | + ielms = elms; |
| 1589 | + inuls = nuls; |
| 1590 | + for (int i = 0; i < n; i++) |
| 1591 | + { |
| 1592 | + int j = (int) pg_prng_uint64_range(&pg_global_prng_state, i, nitem - 1) * nelm; |
| 1593 | + Datum *jelms = elms + j; |
| 1594 | + bool *jnuls = nuls + j; |
| 1595 | + |
| 1596 | + /* Swap i'th and j'th items; advance ielms/inuls to next item */ |
| 1597 | + for (int k = 0; k < nelm; k++) |
| 1598 | + { |
| 1599 | + Datum elm = *ielms; |
| 1600 | + bool nul = *inuls; |
| 1601 | + |
| 1602 | + *ielms++ = *jelms; |
| 1603 | + *inuls++ = *jnuls; |
| 1604 | + *jelms++ = elm; |
| 1605 | + *jnuls++ = nul; |
| 1606 | + } |
| 1607 | + } |
| 1608 | + |
| 1609 | + /* Set up dimensions of the result */ |
| 1610 | + memcpy(rdims, dims, ndim * sizeof(int)); |
| 1611 | + memcpy(rlbs, lbs, ndim * sizeof(int)); |
| 1612 | + rdims[0] = n; |
| 1613 | + if (!keep_lb) |
| 1614 | + rlbs[0] = 1; |
| 1615 | + |
| 1616 | + result = construct_md_array(elms, nuls, ndim, rdims, rlbs, |
| 1617 | + elmtyp, elmlen, elmbyval, elmalign); |
| 1618 | + |
| 1619 | + pfree(elms); |
| 1620 | + pfree(nuls); |
| 1621 | + |
| 1622 | + return result; |
| 1623 | +} |
| 1624 | + |
| 1625 | +/* |
| 1626 | + * array_shuffle |
| 1627 | + * |
| 1628 | + * Returns an array with the same dimensions as the input array, with its |
| 1629 | + * first-dimension elements in random order. |
| 1630 | + */ |
| 1631 | +Datum |
| 1632 | +array_shuffle(PG_FUNCTION_ARGS) |
| 1633 | +{ |
| 1634 | + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); |
| 1635 | + ArrayType *result; |
| 1636 | + Oid elmtyp; |
| 1637 | + TypeCacheEntry *typentry; |
| 1638 | + |
| 1639 | + /* |
| 1640 | + * There is no point in shuffling empty arrays or arrays with less than |
| 1641 | + * two items. |
| 1642 | + */ |
| 1643 | + if (ARR_NDIM(array) < 1 || ARR_DIMS(array)[0] < 2) |
| 1644 | + PG_RETURN_ARRAYTYPE_P(array); |
| 1645 | + |
| 1646 | + elmtyp = ARR_ELEMTYPE(array); |
| 1647 | + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; |
| 1648 | + if (typentry == NULL || typentry->type_id != elmtyp) |
| 1649 | + { |
| 1650 | + typentry = lookup_type_cache(elmtyp, 0); |
| 1651 | + fcinfo->flinfo->fn_extra = (void *) typentry; |
| 1652 | + } |
| 1653 | + |
| 1654 | + result = array_shuffle_n(array, ARR_DIMS(array)[0], true, elmtyp, typentry); |
| 1655 | + |
| 1656 | + PG_RETURN_ARRAYTYPE_P(result); |
| 1657 | +} |
| 1658 | + |
| 1659 | +/* |
| 1660 | + * array_sample |
| 1661 | + * |
| 1662 | + * Returns an array of n randomly chosen first-dimension elements |
| 1663 | + * from the input array. |
| 1664 | + */ |
| 1665 | +Datum |
| 1666 | +array_sample(PG_FUNCTION_ARGS) |
| 1667 | +{ |
| 1668 | + ArrayType *array = PG_GETARG_ARRAYTYPE_P(0); |
| 1669 | + int n = PG_GETARG_INT32(1); |
| 1670 | + ArrayType *result; |
| 1671 | + Oid elmtyp; |
| 1672 | + TypeCacheEntry *typentry; |
| 1673 | + int nitem; |
| 1674 | + |
| 1675 | + nitem = (ARR_NDIM(array) < 1) ? 0 : ARR_DIMS(array)[0]; |
| 1676 | + |
| 1677 | + if (n < 0 || n > nitem) |
| 1678 | + ereport(ERROR, |
| 1679 | + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| 1680 | + errmsg("sample size must be between 0 and %d", nitem))); |
| 1681 | + |
| 1682 | + elmtyp = ARR_ELEMTYPE(array); |
| 1683 | + typentry = (TypeCacheEntry *) fcinfo->flinfo->fn_extra; |
| 1684 | + if (typentry == NULL || typentry->type_id != elmtyp) |
| 1685 | + { |
| 1686 | + typentry = lookup_type_cache(elmtyp, 0); |
| 1687 | + fcinfo->flinfo->fn_extra = (void *) typentry; |
| 1688 | + } |
| 1689 | + |
| 1690 | + result = array_shuffle_n(array, n, false, elmtyp, typentry); |
| 1691 | + |
| 1692 | + PG_RETURN_ARRAYTYPE_P(result); |
| 1693 | +} |
0 commit comments