postgres
diff --git a/‎src/common/Makefile
Lines changed: 1 addition & 0 deletions b/‎src/common/Makefile
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/common/meson.build
Lines changed: 1 addition & 0 deletions b/‎src/common/meson.build
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/common/unicode/Makefile
Lines changed: 12 additions & 3 deletions b/‎src/common/unicode/Makefile
Lines changed: 12 additions & 3 deletions
diff --git a/‎src/common/unicode/case_test.c
Lines changed: 100 additions & 0 deletions b/‎src/common/unicode/case_test.c
Lines changed: 100 additions & 0 deletions
diff --git a/‎src/common/unicode/generate-unicode_case_table.pl
Lines changed: 134 additions & 0 deletions b/‎src/common/unicode/generate-unicode_case_table.pl
Lines changed: 134 additions & 0 deletions
diff --git a/‎src/common/unicode/meson.build
Lines changed: 31 additions & 0 deletions b/‎src/common/unicode/meson.build
Lines changed: 31 additions & 0 deletions
@@ -78,6 +78,7 @@ OBJS_COMMON = \
 	scram-common.o \
 	string.o \
 	stringinfo.o \
+	unicode_case.o \
 	unicode_category.o \
 	unicode_norm.o \
 	username.o \
 
@@ -32,6 +32,7 @@ common_sources = files(
   'scram-common.c',
   'string.c',
   'stringinfo.c',
+  'unicode_case.c',
   'unicode_category.c',
   'unicode_norm.c',
   'username.c',
 
@@ -21,8 +21,9 @@ CPPFLAGS += $(ICU_CFLAGS)
 # By default, do nothing.
 all:
 
-update-unicode: unicode_category_table.h unicode_east_asian_fw_table.h unicode_nonspacing_table.h unicode_norm_hashfunc.h unicode_norm_table.h unicode_normprops_table.h unicode_version.h
+update-unicode: unicode_case_table.h unicode_category_table.h unicode_east_asian_fw_table.h unicode_nonspacing_table.h unicode_norm_hashfunc.h unicode_norm_table.h unicode_normprops_table.h unicode_version.h
 	mv $^ $(top_srcdir)/src/include/common/
+	$(MAKE) case-check
 	$(MAKE) category-check
 	$(MAKE) normalization-check
 
@@ -35,6 +36,9 @@ CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.tx
 unicode_version.h: generate-unicode_version.pl
 	$(PERL) $< --version $(UNICODE_VERSION)
 
+unicode_case_table.h: generate-unicode_case_table.pl UnicodeData.txt
+	$(PERL) $<
+
 unicode_category_table.h: generate-unicode_category_table.pl DerivedCoreProperties.txt PropList.txt UnicodeData.txt
 	$(PERL) $<
 
@@ -55,12 +59,17 @@ unicode_normprops_table.h: generate-unicode_normprops_table.pl DerivedNormalizat
 	$(PERL) $^ >$@
 
 # Test suite
+case-check: case_test
+	./case_test
+
 category-check: category_test
 	./category_test
 
 normalization-check: norm_test
 	./norm_test
 
+case_test: case_test.o ../unicode_case.o | submake-common
+
 category_test: category_test.o ../unicode_category.o | submake-common
 
 norm_test: norm_test.o ../unicode_norm.o | submake-common
@@ -79,7 +88,7 @@ norm_test_table.h: generate-norm_test_table.pl NormalizationTest.txt
 
 
 clean:
-	rm -f $(OBJS) category_test category_test.o norm_test norm_test.o
+	rm -f $(OBJS) case_test case_test.o category_test category_test.o norm_test norm_test.o
 
 distclean: clean
-	rm -f CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt norm_test_table.h unicode_category_table.h unicode_norm_table.h
+	rm -f CompositionExclusions.txt DerivedCoreProperties.txt DerivedNormalizationProps.txt EastAsianWidth.txt NormalizationTest.txt PropList.txt UnicodeData.txt norm_test_table.h unicode_case_table.h unicode_category_table.h unicode_norm_table.h
@@ -0,0 +1,100 @@
+/*-------------------------------------------------------------------------
+ * case_test.c
+ *		Program to test Unicode case mapping functions.
+ *
+ * Portions Copyright (c) 2017-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/common/unicode/case_test.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wctype.h>
+
+#ifdef USE_ICU
+#include <unicode/uchar.h>
+#endif
+#include "common/unicode_case.h"
+#include "common/unicode_category.h"
+#include "common/unicode_version.h"
+
+#ifdef USE_ICU
+
+static void
+icu_test_simple(pg_wchar code)
+{
+	pg_wchar	lower = unicode_lowercase_simple(code);
+	pg_wchar	title = unicode_titlecase_simple(code);
+	pg_wchar	upper = unicode_uppercase_simple(code);
+	pg_wchar	iculower = u_tolower(code);
+	pg_wchar	icutitle = u_totitle(code);
+	pg_wchar	icuupper = u_toupper(code);
+
+	if (lower != iculower || title != icutitle || upper != icuupper)
+	{
+		printf("case_test: FAILURE for codepoint 0x%06x\n", code);
+		printf("case_test: Postgres lower/title/upper:	0x%06x/0x%06x/0x%06x\n",
+			   lower, title, upper);
+		printf("case_test: ICU lower/title/upper:		0x%06x/0x%06x/0x%06x\n",
+			   iculower, icutitle, icuupper);
+		printf("\n");
+		exit(1);
+	}
+}
+
+static void
+test_icu(void)
+{
+	int			successful = 0;
+	int			skipped_mismatch = 0;
+
+	for (pg_wchar code = 0; code <= 0x10ffff; code++)
+	{
+		pg_unicode_category category = unicode_category(code);
+
+		if (category != PG_U_UNASSIGNED)
+		{
+			uint8_t		icu_category = u_charType(code);
+
+			if (icu_category == PG_U_UNASSIGNED)
+			{
+				skipped_mismatch++;
+				continue;
+			}
+
+			icu_test_simple(code);
+			successful++;
+		}
+	}
+
+	if (skipped_mismatch > 0)
+		printf("case_test: skipped %d codepoints unassigned in ICU due to Unicode version mismatch\n",
+			   skipped_mismatch);
+
+	printf("case_test: ICU simple mapping test: %d codepoints successful\n",
+		   successful);
+}
+#endif
+
+/*
+ * Exhaustively compare case mappings with the results from libc and ICU.
+ */
+int
+main(int argc, char **argv)
+{
+	printf("case_test: Postgres Unicode version:\t%s\n", PG_UNICODE_VERSION);
+#ifdef USE_ICU
+	printf("case_test: ICU Unicode version:\t\t%s\n", U_UNICODE_VERSION);
+	test_icu();
+#else
+	printf("case_test: ICU not available; skipping\n");
+#endif
+
+	exit(0);
+}
@@ -0,0 +1,134 @@
+#!/usr/bin/perl
+#
+# Generate Unicode character case mappings. Does not include tailoring
+# or locale-specific mappings.
+#
+# Input: UnicodeData.txt
+# Output: unicode_case_table.h
+#
+# Copyright (c) 2000-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+use Getopt::Long;
+
+use FindBin;
+use lib "$FindBin::RealBin/../../tools/";
+
+my $output_path = '.';
+
+GetOptions('outdir:s' => \$output_path);
+
+my $output_table_file = "$output_path/unicode_case_table.h";
+
+my $FH;
+
+my %simple = ();
+
+open($FH, '<', "$output_path/UnicodeData.txt")
+  or die "Could not open $output_path/UnicodeData.txt: $!.";
+while (my $line = <$FH>)
+{
+	my @elts = split(';', $line);
+	my $code = hex($elts[0]);
+	my $simple_uppercase = hex($elts[12] =~ s/^\s+|\s+$//rg);
+	my $simple_lowercase = hex($elts[13] =~ s/^\s+|\s+$//rg);
+	my $simple_titlecase = hex($elts[14] =~ s/^\s+|\s+$//rg);
+
+	die "codepoint $code out of range" if $code > 0x10FFFF;
+	die "Simple_Lowercase $code out of range" if $simple_lowercase > 0x10FFFF;
+	die "Simple_Titlecase $code out of range" if $simple_titlecase > 0x10FFFF;
+	die "Simple_Uppercase $code out of range" if $simple_uppercase > 0x10FFFF;
+
+	if ($simple_lowercase || $simple_titlecase || $simple_uppercase)
+	{
+		$simple{$code} = {
+			Simple_Lowercase => ($simple_lowercase || $code),
+			Simple_Titlecase => ($simple_titlecase || $code),
+			Simple_Uppercase => ($simple_uppercase || $code)
+		};
+	}
+}
+close $FH;
+
+# Start writing out the output files
+open my $OT, '>', $output_table_file
+  or die "Could not open output file $output_table_file: $!\n";
+
+# determine size of array given that codepoints <= 0x80 are dense and
+# the rest of the entries are sparse
+my $num_simple = 0x80;
+foreach my $code (sort { $a <=> $b } (keys %simple))
+{
+	$num_simple++ unless $code < 0x80;
+}
+
+print $OT <<"EOS";
+/*-------------------------------------------------------------------------
+ *
+ * unicode_case_table.h
+ *	  Case mapping and information table.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * src/include/common/unicode_case_table.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * File auto-generated by src/common/unicode/generate-unicode_case_table.pl,
+ * do not edit. There is deliberately not an #ifndef PG_UNICODE_CASE_TABLE_H
+ * here.
+ */
+
+#include "common/unicode_case.h"
+#include "mb/pg_wchar.h"
+
+typedef enum
+{
+	CaseLower = 0,
+	CaseTitle = 1,
+	CaseUpper = 2,
+	NCaseKind
+}			CaseKind;
+
+typedef struct
+{
+	pg_wchar	codepoint;		/* Unicode codepoint */
+	pg_wchar	simplemap[NCaseKind];
+}			pg_case_map;
+
+/*
+ * Case mapping table. Dense for codepoints < 0x80 (enabling fast lookup),
+ * sparse for higher codepoints (requiring scan or binary search).
+ */
+static const pg_case_map case_map[$num_simple] =
+{
+EOS
+
+printf $OT "\t/* begin dense entries for codepoints < 0x80 */\n";
+for (my $code = 0; $code < 0x80; $code++)
+{
+	my $lc = ($simple{$code}{Simple_Lowercase} || $code);
+	my $tc = ($simple{$code}{Simple_Titlecase} || $code);
+	my $uc = ($simple{$code}{Simple_Uppercase} || $code);
+	printf $OT
+	  "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}},\n",
+	  $code, $lc, $tc, $uc;
+}
+printf $OT "\n";
+
+printf $OT "\t/* begin sparse entries for codepoints >= 0x80 */\n";
+foreach my $code (sort { $a <=> $b } (keys %simple))
+{
+	next unless $code >= 0x80;    # already output above
+
+	my $map = $simple{$code};
+	printf $OT
+	  "\t{0x%06x, {[CaseLower] = 0x%06x,[CaseTitle] = 0x%06x,[CaseUpper] = 0x%06x}},\n",
+	  $code, $map->{Simple_Lowercase}, $map->{Simple_Titlecase},
+	  $map->{Simple_Uppercase};
+}
+print $OT "};\n";
@@ -24,6 +24,16 @@ endforeach
 
 update_unicode_targets = []
 
+update_unicode_targets += \
+  custom_target('unicode_case_table.h',
+    input: [unicode_data['UnicodeData.txt']],
+    output: ['unicode_case_table.h'],
+    command: [
+      perl, files('generate-unicode_case_table.pl'),
+      '--outdir', '@OUTDIR@', '@INPUT@'],
+    build_by_default: false,
+  )
+
 update_unicode_targets += \
   custom_target('unicode_category_table.h',
     input: [unicode_data['UnicodeData.txt'], unicode_data['DerivedCoreProperties.txt'], unicode_data['PropList.txt']],
@@ -92,6 +102,17 @@ norm_test_table = custom_target('norm_test_table.h',
 
 inc = include_directories('.')
 
+case_test = executable('case_test',
+  ['case_test.c'],
+  dependencies: [frontend_port_code, icu],
+  include_directories: inc,
+  link_with: [common_static, pgport_static],
+  build_by_default: false,
+  kwargs: default_bin_args + {
+    'install': false,
+  }
+)
+
 category_test = executable('category_test',
   ['category_test.c'],
   dependencies: [frontend_port_code, icu],
@@ -116,6 +137,16 @@ norm_test = executable('norm_test',
 
 update_unicode_dep = []
 
+if not meson.is_cross_build()
+  update_unicode_dep += custom_target('case_test.run',
+    output: 'case_test.run',
+    input: update_unicode_targets,
+    command: [case_test, UNICODE_VERSION],
+    build_by_default: false,
+    build_always_stale: true,
+  )
+endif
+
 if not meson.is_cross_build()
   update_unicode_dep += custom_target('category_test.run',
     output: 'category_test.run',