Skip to content

Commit ade493e

Browse files
author
Neil Conway
committed
Add a hash function for "numeric". Mark the equality operator for
numerics as "oprcanhash", and make the corresponding system catalog updates. As a result, hash indexes, hashed aggregation, and hash joins can now be used with the numeric type. Bump the catversion. The only tricky aspect to doing this is writing a correct hash function: it's possible for two Numerics to be equal according to their equality operator, but have different in-memory bit patterns. To cope with this, the hash function doesn't consider the Numeric's "scale" or "sign", and explictly skips any leading or trailing zeros in the Numeric's digit buffer (the current implementation should suppress any such zeros, but it seems unwise to rely upon this). See discussion on pgsql-patches for more details.
1 parent 97f7969 commit ade493e

File tree

9 files changed

+95
-11
lines changed

9 files changed

+95
-11
lines changed

src/backend/utils/adt/numeric.c

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
* Copyright (c) 1998-2007, PostgreSQL Global Development Group
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.101 2007/02/27 23:48:08 tgl Exp $
17+
* $PostgreSQL: pgsql/src/backend/utils/adt/numeric.c,v 1.102 2007/05/08 18:56:47 neilc Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -26,6 +26,7 @@
2626
#include <limits.h>
2727
#include <math.h>
2828

29+
#include "access/hash.h"
2930
#include "catalog/pg_type.h"
3031
#include "libpq/pqformat.h"
3132
#include "utils/array.h"
@@ -1149,6 +1150,81 @@ cmp_numerics(Numeric num1, Numeric num2)
11491150
return result;
11501151
}
11511152

1153+
Datum
1154+
hash_numeric(PG_FUNCTION_ARGS)
1155+
{
1156+
Numeric key = PG_GETARG_NUMERIC(0);
1157+
Datum digit_hash;
1158+
Datum result;
1159+
int weight;
1160+
int start_offset;
1161+
int end_offset;
1162+
int i;
1163+
int hash_len;
1164+
1165+
/* If it's NaN, don't try to hash the rest of the fields */
1166+
if (NUMERIC_IS_NAN(key))
1167+
PG_RETURN_UINT32(0);
1168+
1169+
weight = key->n_weight;
1170+
start_offset = 0;
1171+
end_offset = 0;
1172+
1173+
/*
1174+
* Omit any leading or trailing zeros from the input to the
1175+
* hash. The numeric implementation *should* guarantee that
1176+
* leading and trailing zeros are suppressed, but we're
1177+
* paranoid. Note that we measure the starting and ending offsets
1178+
* in units of NumericDigits, not bytes.
1179+
*/
1180+
for (i = 0; i < NUMERIC_NDIGITS(key); i++)
1181+
{
1182+
if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
1183+
break;
1184+
1185+
start_offset++;
1186+
/*
1187+
* The weight is effectively the # of digits before the
1188+
* decimal point, so decrement it for each leading zero we
1189+
* skip.
1190+
*/
1191+
weight--;
1192+
}
1193+
1194+
/*
1195+
* If there are no non-zero digits, then the value of the number
1196+
* is zero, regardless of any other fields.
1197+
*/
1198+
if (NUMERIC_NDIGITS(key) == start_offset)
1199+
PG_RETURN_UINT32(-1);
1200+
1201+
for (i = NUMERIC_NDIGITS(key) - 1; i >= 0; i--)
1202+
{
1203+
if (NUMERIC_DIGITS(key)[i] != (NumericDigit) 0)
1204+
break;
1205+
1206+
end_offset++;
1207+
}
1208+
1209+
/* If we get here, there should be at least one non-zero digit */
1210+
Assert(start_offset + end_offset < NUMERIC_NDIGITS(key));
1211+
1212+
/*
1213+
* Note that we don't hash on the Numeric's scale, since two
1214+
* numerics can compare equal but have different scales. We also
1215+
* don't hash on the sign, although we could: since a sign
1216+
* difference implies inequality, this shouldn't affect correctness.
1217+
*/
1218+
hash_len = NUMERIC_NDIGITS(key) - start_offset - end_offset;
1219+
digit_hash = hash_any((unsigned char *) (NUMERIC_DIGITS(key) + start_offset),
1220+
hash_len * sizeof(NumericDigit));
1221+
1222+
/* Mix in the weight, via XOR */
1223+
result = digit_hash ^ weight;
1224+
1225+
PG_RETURN_DATUM(result);
1226+
}
1227+
11521228

11531229
/* ----------------------------------------------------------------------
11541230
*

src/include/catalog/catversion.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
3838
* Portions Copyright (c) 1994, Regents of the University of California
3939
*
40-
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.404 2007/04/15 10:56:27 ishii Exp $
40+
* $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.405 2007/05/08 18:56:47 neilc Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 200704151
56+
#define CATALOG_VERSION_NO 200705081
5757

5858
#endif

src/include/catalog/pg_amop.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
3030
* Portions Copyright (c) 1994, Regents of the University of California
3131
*
32-
* $PostgreSQL: pgsql/src/include/catalog/pg_amop.h,v 1.80 2007/04/02 03:49:40 tgl Exp $
32+
* $PostgreSQL: pgsql/src/include/catalog/pg_amop.h,v 1.81 2007/05/08 18:56:47 neilc Exp $
3333
*
3434
* NOTES
3535
* the genbki.sh script reads this file and generates .bki
@@ -568,6 +568,8 @@ DATA(insert ( 2232 19 19 1 f 2334 405 ));
568568
DATA(insert ( 2235 1033 1033 1 f 974 405 ));
569569
/* uuid_ops */
570570
DATA(insert ( 2969 2950 2950 1 f 2972 405 ));
571+
/* numeric_ops */
572+
DATA(insert ( 1998 1700 1700 1 f 1752 405 ));
571573

572574

573575
/*

src/include/catalog/pg_amproc.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
2323
* Portions Copyright (c) 1994, Regents of the University of California
2424
*
25-
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.64 2007/04/02 03:49:40 tgl Exp $
25+
* $PostgreSQL: pgsql/src/include/catalog/pg_amproc.h,v 1.65 2007/05/08 18:56:47 neilc Exp $
2626
*
2727
* NOTES
2828
* the genbki.sh script reads this file and generates .bki
@@ -148,6 +148,7 @@ DATA(insert ( 1990 26 26 1 453 ));
148148
DATA(insert ( 1992 30 30 1 457 ));
149149
DATA(insert ( 1995 25 25 1 400 ));
150150
DATA(insert ( 1997 1083 1083 1 452 ));
151+
DATA(insert ( 1998 1700 1700 1 432 ));
151152
DATA(insert ( 1999 1184 1184 1 452 ));
152153
DATA(insert ( 2001 1266 1266 1 1696 ));
153154
DATA(insert ( 2040 1114 1114 1 452 ));

src/include/catalog/pg_opclass.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
2929
* Portions Copyright (c) 1994, Regents of the University of California
3030
*
31-
* $PostgreSQL: pgsql/src/include/catalog/pg_opclass.h,v 1.75 2007/04/02 03:49:40 tgl Exp $
31+
* $PostgreSQL: pgsql/src/include/catalog/pg_opclass.h,v 1.76 2007/05/08 18:56:47 neilc Exp $
3232
*
3333
* NOTES
3434
* the genbki.sh script reads this file and generates .bki
@@ -129,6 +129,7 @@ DATA(insert ( 405 macaddr_ops PGNSP PGUID 1985 829 t 0 ));
129129
DATA(insert ( 403 name_ops PGNSP PGUID 1986 19 t 0 ));
130130
DATA(insert ( 405 name_ops PGNSP PGUID 1987 19 t 0 ));
131131
DATA(insert ( 403 numeric_ops PGNSP PGUID 1988 1700 t 0 ));
132+
DATA(insert ( 405 numeric_ops PGNSP PGUID 1998 1700 t 0 ));
132133
DATA(insert OID = 1981 ( 403 oid_ops PGNSP PGUID 1989 26 t 0 ));
133134
#define OID_BTREE_OPS_OID 1981
134135
DATA(insert ( 405 oid_ops PGNSP PGUID 1990 26 t 0 ));

src/include/catalog/pg_operator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.151 2007/04/02 03:49:40 tgl Exp $
11+
* $PostgreSQL: pgsql/src/include/catalog/pg_operator.h,v 1.152 2007/05/08 18:56:47 neilc Exp $
1212
*
1313
* NOTES
1414
* the genbki.sh script reads this file and generates .bki
@@ -675,7 +675,7 @@ DATA(insert OID = 1630 ( "!~~*" PGNSP PGUID b f f 1042 25 16 0 1629 bpcharicn
675675

676676
/* NUMERIC type - OID's 1700-1799 */
677677
DATA(insert OID = 1751 ( "-" PGNSP PGUID l f f 0 1700 1700 0 0 numeric_uminus - - ));
678-
DATA(insert OID = 1752 ( "=" PGNSP PGUID b t f 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel ));
678+
DATA(insert OID = 1752 ( "=" PGNSP PGUID b t t 1700 1700 16 1752 1753 numeric_eq eqsel eqjoinsel ));
679679
DATA(insert OID = 1753 ( "<>" PGNSP PGUID b f f 1700 1700 16 1753 1752 numeric_ne neqsel neqjoinsel ));
680680
DATA(insert OID = 1754 ( "<" PGNSP PGUID b f f 1700 1700 16 1756 1757 numeric_lt scalarltsel scalarltjoinsel ));
681681
DATA(insert OID = 1755 ( "<=" PGNSP PGUID b f f 1700 1700 16 1757 1756 numeric_le scalarltsel scalarltjoinsel ));

src/include/catalog/pg_opfamily.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $PostgreSQL: pgsql/src/include/catalog/pg_opfamily.h,v 1.4 2007/04/02 03:49:40 tgl Exp $
11+
* $PostgreSQL: pgsql/src/include/catalog/pg_opfamily.h,v 1.5 2007/05/08 18:56:47 neilc Exp $
1212
*
1313
* NOTES
1414
* the genbki.sh script reads this file and generates .bki
@@ -93,6 +93,7 @@ DATA(insert OID = 1986 ( 403 name_ops PGNSP PGUID ));
9393
#define NAME_BTREE_FAM_OID 1986
9494
DATA(insert OID = 1987 ( 405 name_ops PGNSP PGUID ));
9595
DATA(insert OID = 1988 ( 403 numeric_ops PGNSP PGUID ));
96+
DATA(insert OID = 1998 ( 405 numeric_ops PGNSP PGUID ));
9697
DATA(insert OID = 1989 ( 403 oid_ops PGNSP PGUID ));
9798
#define OID_BTREE_FAM_OID 1989
9899
DATA(insert OID = 1990 ( 405 oid_ops PGNSP PGUID ));

src/include/catalog/pg_proc.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.454 2007/04/02 03:49:40 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.455 2007/05/08 18:56:48 neilc Exp $
1111
*
1212
* NOTES
1313
* The script catalog/genbki.sh reads this file and generates .bki
@@ -838,6 +838,8 @@ DATA(insert OID = 399 ( hashmacaddr PGNSP PGUID 12 1 0 f f t f i 1 23 "829"
838838
DESCR("hash");
839839
DATA(insert OID = 422 ( hashinet PGNSP PGUID 12 1 0 f f t f i 1 23 "869" _null_ _null_ _null_ hashinet - _null_ ));
840840
DESCR("hash");
841+
DATA(insert OID = 432 ( hash_numeric PGNSP PGUID 12 1 0 f f t f i 1 23 "1700" _null_ _null_ _null_ hash_numeric - _null_ ));
842+
DESCR("hash");
841843
DATA(insert OID = 458 ( text_larger PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_larger - _null_ ));
842844
DESCR("larger of two");
843845
DATA(insert OID = 459 ( text_smaller PGNSP PGUID 12 1 0 f f t f i 2 25 "25 25" _null_ _null_ _null_ text_smaller - _null_ ));

src/include/utils/builtins.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.291 2007/04/02 03:49:41 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.292 2007/05/08 18:56:48 neilc Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -883,6 +883,7 @@ extern Datum int2_avg_accum(PG_FUNCTION_ARGS);
883883
extern Datum int4_avg_accum(PG_FUNCTION_ARGS);
884884
extern Datum int8_avg(PG_FUNCTION_ARGS);
885885
extern Datum width_bucket_numeric(PG_FUNCTION_ARGS);
886+
extern Datum hash_numeric(PG_FUNCTION_ARGS);
886887

887888
/* ri_triggers.c */
888889
extern Datum RI_FKey_check_ins(PG_FUNCTION_ARGS);

0 commit comments

Comments
 (0)