Skip to content

Commit 95f8901

Browse files
committed
Add comparison operators and btree indexing support for type bytea.
From Joe Conway.
1 parent 4d7af98 commit 95f8901

File tree

9 files changed

+328
-10
lines changed

9 files changed

+328
-10
lines changed

src/backend/utils/adt/selfuncs.c

+116-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.95 2001/07/16 05:06:59 tgl Exp $
18+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.96 2001/08/13 18:45:35 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -134,8 +134,16 @@ static void convert_string_to_scalar(unsigned char *value,
134134
double *scaledlobound,
135135
unsigned char *hibound,
136136
double *scaledhibound);
137+
static void convert_bytea_to_scalar(Datum value,
138+
double *scaledvalue,
139+
Datum lobound,
140+
double *scaledlobound,
141+
Datum hibound,
142+
double *scaledhibound);
137143
static double convert_one_string_to_scalar(unsigned char *value,
138144
int rangelo, int rangehi);
145+
static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
146+
int rangelo, int rangehi);
139147
static unsigned char *convert_string_datum(Datum value, Oid typid);
140148
static double convert_timevalue_to_scalar(Datum value, Oid typid);
141149
static double get_att_numdistinct(Query *root, Var *var,
@@ -1664,6 +1672,9 @@ icnlikejoinsel(PG_FUNCTION_ARGS)
16641672
* which is explained below. The reason why this routine deals with
16651673
* three values at a time, not just one, is that we need it for strings.
16661674
*
1675+
* The bytea datatype is just enough different from strings that it has
1676+
* to be treated separately.
1677+
*
16671678
* The several datatypes representing absolute times are all converted
16681679
* to Timestamp, which is actually a double, and then we just use that
16691680
* double value. Note this will give bad results for the various "special"
@@ -1718,6 +1729,17 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
17181729
return true;
17191730
}
17201731

1732+
/*
1733+
* Built-in bytea type
1734+
*/
1735+
case BYTEAOID:
1736+
{
1737+
convert_bytea_to_scalar(value, scaledvalue,
1738+
lobound, scaledlobound,
1739+
hibound, scaledhibound);
1740+
return true;
1741+
}
1742+
17211743
/*
17221744
* Built-in time types
17231745
*/
@@ -1996,6 +2018,99 @@ convert_string_datum(Datum value, Oid typid)
19962018
return (unsigned char *) val;
19972019
}
19982020

2021+
/*
2022+
* Do convert_to_scalar()'s work for any bytea data type.
2023+
*
2024+
* Very similar to convert_string_to_scalar except we can't assume
2025+
* null-termination and therefore pass explicit lengths around.
2026+
*
2027+
* Also, assumptions about likely "normal" ranges of characters have been
2028+
* removed - a data range of 0..255 is always used, for now. (Perhaps
2029+
* someday we will add information about actual byte data range to
2030+
* pg_statistic.)
2031+
*/
2032+
static void
2033+
convert_bytea_to_scalar(Datum value,
2034+
double *scaledvalue,
2035+
Datum lobound,
2036+
double *scaledlobound,
2037+
Datum hibound,
2038+
double *scaledhibound)
2039+
{
2040+
int rangelo,
2041+
rangehi,
2042+
valuelen = VARSIZE(DatumGetPointer(value)) - VARHDRSZ,
2043+
loboundlen = VARSIZE(DatumGetPointer(lobound)) - VARHDRSZ,
2044+
hiboundlen = VARSIZE(DatumGetPointer(hibound)) - VARHDRSZ,
2045+
i,
2046+
minlen;
2047+
unsigned char *valstr = (unsigned char *) VARDATA(DatumGetPointer(value)),
2048+
*lostr = (unsigned char *) VARDATA(DatumGetPointer(lobound)),
2049+
*histr = (unsigned char *) VARDATA(DatumGetPointer(hibound));
2050+
2051+
/*
2052+
* Assume bytea data is uniformly distributed across all byte values.
2053+
*/
2054+
rangelo = 0;
2055+
rangehi = 255;
2056+
2057+
/*
2058+
* Now strip any common prefix of the three strings.
2059+
*/
2060+
minlen = Min(Min(valuelen, loboundlen), hiboundlen);
2061+
for (i = 0; i < minlen; i++)
2062+
{
2063+
if (*lostr != *histr || *lostr != *valstr)
2064+
break;
2065+
lostr++, histr++, valstr++;
2066+
loboundlen--, hiboundlen--, valuelen--;
2067+
}
2068+
2069+
/*
2070+
* Now we can do the conversions.
2071+
*/
2072+
*scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
2073+
*scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
2074+
*scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
2075+
}
2076+
2077+
static double
2078+
convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
2079+
int rangelo, int rangehi)
2080+
{
2081+
double num,
2082+
denom,
2083+
base;
2084+
2085+
if (valuelen <= 0)
2086+
return 0.0; /* empty string has scalar value 0 */
2087+
2088+
/*
2089+
* Since base is 256, need not consider more than about 10
2090+
* chars (even this many seems like overkill)
2091+
*/
2092+
if (valuelen > 10)
2093+
valuelen = 10;
2094+
2095+
/* Convert initial characters to fraction */
2096+
base = rangehi - rangelo + 1;
2097+
num = 0.0;
2098+
denom = base;
2099+
while (valuelen-- > 0)
2100+
{
2101+
int ch = *value++;
2102+
2103+
if (ch < rangelo)
2104+
ch = rangelo - 1;
2105+
else if (ch > rangehi)
2106+
ch = rangehi + 1;
2107+
num += ((double) (ch - rangelo)) / denom;
2108+
denom *= base;
2109+
}
2110+
2111+
return num;
2112+
}
2113+
19992114
/*
20002115
* Do convert_to_scalar()'s work for any timevalue data type.
20012116
*/

src/backend/utils/adt/varlena.c

+160-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.70 2001/05/03 19:00:36 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varlena.c,v 1.71 2001/08/13 18:45:35 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -875,3 +875,162 @@ name_text(PG_FUNCTION_ARGS)
875875

876876
PG_RETURN_TEXT_P(result);
877877
}
878+
879+
880+
/*****************************************************************************
881+
* Comparison Functions used for bytea
882+
*
883+
* Note: btree indexes need these routines not to leak memory; therefore,
884+
* be careful to free working copies of toasted datums. Most places don't
885+
* need to be so careful.
886+
*****************************************************************************/
887+
888+
Datum
889+
byteaeq(PG_FUNCTION_ARGS)
890+
{
891+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
892+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
893+
int len1,
894+
len2;
895+
bool result;
896+
897+
len1 = VARSIZE(arg1) - VARHDRSZ;
898+
len2 = VARSIZE(arg2) - VARHDRSZ;
899+
900+
/* fast path for different-length inputs */
901+
if (len1 != len2)
902+
result = false;
903+
else
904+
result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) == 0);
905+
906+
PG_FREE_IF_COPY(arg1, 0);
907+
PG_FREE_IF_COPY(arg2, 1);
908+
909+
PG_RETURN_BOOL(result);
910+
}
911+
912+
Datum
913+
byteane(PG_FUNCTION_ARGS)
914+
{
915+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
916+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
917+
int len1,
918+
len2;
919+
bool result;
920+
921+
len1 = VARSIZE(arg1) - VARHDRSZ;
922+
len2 = VARSIZE(arg2) - VARHDRSZ;
923+
924+
/* fast path for different-length inputs */
925+
if (len1 != len2)
926+
result = true;
927+
else
928+
result = (memcmp(VARDATA(arg1), VARDATA(arg2), len1) != 0);
929+
930+
PG_FREE_IF_COPY(arg1, 0);
931+
PG_FREE_IF_COPY(arg2, 1);
932+
933+
PG_RETURN_BOOL(result);
934+
}
935+
936+
Datum
937+
bytealt(PG_FUNCTION_ARGS)
938+
{
939+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
940+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
941+
int len1,
942+
len2;
943+
int cmp;
944+
945+
len1 = VARSIZE(arg1) - VARHDRSZ;
946+
len2 = VARSIZE(arg2) - VARHDRSZ;
947+
948+
cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
949+
950+
PG_FREE_IF_COPY(arg1, 0);
951+
PG_FREE_IF_COPY(arg2, 1);
952+
953+
PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 < len2)));
954+
}
955+
956+
Datum
957+
byteale(PG_FUNCTION_ARGS)
958+
{
959+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
960+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
961+
int len1,
962+
len2;
963+
int cmp;
964+
965+
len1 = VARSIZE(arg1) - VARHDRSZ;
966+
len2 = VARSIZE(arg2) - VARHDRSZ;
967+
968+
cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
969+
970+
PG_FREE_IF_COPY(arg1, 0);
971+
PG_FREE_IF_COPY(arg2, 1);
972+
973+
PG_RETURN_BOOL((cmp < 0) || ((cmp == 0) && (len1 <= len2)));
974+
}
975+
976+
Datum
977+
byteagt(PG_FUNCTION_ARGS)
978+
{
979+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
980+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
981+
int len1,
982+
len2;
983+
int cmp;
984+
985+
len1 = VARSIZE(arg1) - VARHDRSZ;
986+
len2 = VARSIZE(arg2) - VARHDRSZ;
987+
988+
cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
989+
990+
PG_FREE_IF_COPY(arg1, 0);
991+
PG_FREE_IF_COPY(arg2, 1);
992+
993+
PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 > len2)));
994+
}
995+
996+
Datum
997+
byteage(PG_FUNCTION_ARGS)
998+
{
999+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
1000+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
1001+
int len1,
1002+
len2;
1003+
int cmp;
1004+
1005+
len1 = VARSIZE(arg1) - VARHDRSZ;
1006+
len2 = VARSIZE(arg2) - VARHDRSZ;
1007+
1008+
cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1009+
1010+
PG_FREE_IF_COPY(arg1, 0);
1011+
PG_FREE_IF_COPY(arg2, 1);
1012+
1013+
PG_RETURN_BOOL((cmp > 0) || ((cmp == 0) && (len1 >= len2)));
1014+
}
1015+
1016+
Datum
1017+
byteacmp(PG_FUNCTION_ARGS)
1018+
{
1019+
bytea *arg1 = PG_GETARG_BYTEA_P(0);
1020+
bytea *arg2 = PG_GETARG_BYTEA_P(1);
1021+
int len1,
1022+
len2;
1023+
int cmp;
1024+
1025+
len1 = VARSIZE(arg1) - VARHDRSZ;
1026+
len2 = VARSIZE(arg2) - VARHDRSZ;
1027+
1028+
cmp = memcmp(VARDATA(arg1), VARDATA(arg2), Min(len1, len2));
1029+
if ((cmp == 0) && (len1 != len2))
1030+
cmp = (len1 < len2) ? -1 : 1;
1031+
1032+
PG_FREE_IF_COPY(arg1, 0);
1033+
PG_FREE_IF_COPY(arg2, 1);
1034+
1035+
PG_RETURN_INT32(cmp);
1036+
}

src/include/catalog/catversion.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@
3737
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
3838
* Portions Copyright (c) 1994, Regents of the University of California
3939
*
40-
* $Id: catversion.h,v 1.87 2001/08/10 18:57:39 tgl Exp $
40+
* $Id: catversion.h,v 1.88 2001/08/13 18:45:36 tgl Exp $
4141
*
4242
*-------------------------------------------------------------------------
4343
*/
@@ -53,6 +53,6 @@
5353
*/
5454

5555
/* yyyymmddN */
56-
#define CATALOG_VERSION_NO 200108101
56+
#define CATALOG_VERSION_NO 200108131
5757

5858
#endif

src/include/catalog/pg_amop.h

+11-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
99
* Portions Copyright (c) 1994, Regents of the University of California
1010
*
11-
* $Id: pg_amop.h,v 1.39 2001/08/10 18:57:39 tgl Exp $
11+
* $Id: pg_amop.h,v 1.40 2001/08/13 18:45:36 tgl Exp $
1212
*
1313
* NOTES
1414
* the genbki.sh script reads this file and generates .bki
@@ -249,6 +249,16 @@ DATA(insert ( 403 1077 1062 3 ));
249249
DATA(insert ( 403 1077 1069 4 ));
250250
DATA(insert ( 403 1077 1068 5 ));
251251

252+
/*
253+
* nbtree bytea_ops
254+
*/
255+
256+
DATA(insert ( 403 1961 1957 1 ));
257+
DATA(insert ( 403 1961 1958 2 ));
258+
DATA(insert ( 403 1961 1955 3 ));
259+
DATA(insert ( 403 1961 1960 4 ));
260+
DATA(insert ( 403 1961 1959 5 ));
261+
252262
/*
253263
* nbtree date_ops
254264
*/

src/include/catalog/pg_amproc.h

+2-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
13-
* $Id: pg_amproc.h,v 1.28 2001/08/10 18:57:39 tgl Exp $
13+
* $Id: pg_amproc.h,v 1.29 2001/08/13 18:45:36 tgl Exp $
1414
*
1515
* NOTES
1616
* the genbki.sh script reads this file and generates .bki
@@ -101,6 +101,7 @@ DATA(insert (403 1690 1693 1));
101101
DATA(insert (403 1399 1358 1));
102102
DATA(insert (403 424 1596 1));
103103
DATA(insert (403 425 1672 1));
104+
DATA(insert (403 1961 1954 1));
104105

105106

106107
/* hash */

0 commit comments

Comments
 (0)