Skip to content

Commit 1b65847

Browse files
committed
Add support for Windows codepages 1253, 1254, 1255, and 1257 and clean
up a bunch of the support utilities. In src/backend/utils/mb/Unicode remove nearly duplicate copies of the UCS_to_XXX perl script and replace with one version to handle all generic files. Update the Makefile so that it knows about all the map files. This produces a slight difference in some of the map files, using a uniform naming convention and not mapping the null character. In src/backend/utils/mb/conversion_procs create a master utf8<->win codepage function like the ISO 8859 versions instead of having a separate handler for each conversion. There is an externally visible change in the name of the win1258 to utf8 conversion. According to the documentation notes, it was named incorrectly and this changes it to a standard name. Running the Unicode mapping perl scripts has shown some additional mapping changes in koi8r and iso8859-7.
1 parent a6d3b5b commit 1b65847

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

54 files changed

+1527
-1436
lines changed

doc/src/sgml/charset.sgml

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75 2005/11/04 23:13:59 petere Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.76 2006/02/18 16:15:21 petere Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -571,13 +571,41 @@ initdb --locale=sv_SE
571571
<entry>1</entry>
572572
<entry></entry>
573573
</row>
574+
<row>
575+
<entry><literal>WIN1253</literal></entry>
576+
<entry>Windows CP1253</entry>
577+
<entry>Greek</entry>
578+
<entry>1</entry>
579+
<entry></entry>
580+
</row>
581+
<row>
582+
<entry><literal>WIN1254</literal></entry>
583+
<entry>Windows CP1254</entry>
584+
<entry>Turkish</entry>
585+
<entry>1</entry>
586+
<entry></entry>
587+
</row>
588+
<row>
589+
<entry><literal>WIN1255</literal></entry>
590+
<entry>Windows CP1255</entry>
591+
<entry>Hebrew</entry>
592+
<entry>1</entry>
593+
<entry></entry>
594+
</row>
574595
<row>
575596
<entry><literal>WIN1256</literal></entry>
576597
<entry>Windows CP1256</entry>
577598
<entry>Arabic</entry>
578599
<entry>1</entry>
579600
<entry></entry>
580601
</row>
602+
<row>
603+
<entry><literal>WIN1257</literal></entry>
604+
<entry>Windows CP1257</entry>
605+
<entry>Baltic</entry>
606+
<entry>1</entry>
607+
<entry></entry>
608+
</row>
581609
<row>
582610
<entry><literal>WIN1258</literal></entry>
583611
<entry>Windows CP1258</entry>
@@ -952,12 +980,36 @@ $ <userinput>psql -l</userinput>
952980
<literal>UTF8</literal>
953981
</entry>
954982
</row>
983+
<row>
984+
<entry><literal>WIN1253</literal></entry>
985+
<entry><emphasis>WIN1253</emphasis>,
986+
<literal>UTF8</literal>
987+
</entry>
988+
</row>
989+
<row>
990+
<entry><literal>WIN1254</literal></entry>
991+
<entry><emphasis>WIN1254</emphasis>,
992+
<literal>UTF8</literal>
993+
</entry>
994+
</row>
995+
<row>
996+
<entry><literal>WIN1255</literal></entry>
997+
<entry><emphasis>WIN1255</emphasis>,
998+
<literal>UTF8</literal>
999+
</entry>
1000+
</row>
9551001
<row>
9561002
<entry><literal>WIN1256</literal></entry>
9571003
<entry><emphasis>WIN1256</emphasis>,
9581004
<literal>UTF8</literal>
9591005
</entry>
9601006
</row>
1007+
<row>
1008+
<entry><literal>WIN1257</literal></entry>
1009+
<entry><emphasis>WIN1257</emphasis>,
1010+
<literal>UTF8</literal>
1011+
</entry>
1012+
</row>
9611013
<row>
9621014
<entry><literal>WIN1258</literal></entry>
9631015
<entry><emphasis>WIN1258</emphasis>,

doc/src/sgml/func.sgml

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.306 2006/02/12 04:44:15 momjian Exp $
2+
$PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.307 2006/02/18 16:15:21 petere Exp $
33
PostgreSQL documentation
44
-->
55

@@ -2172,12 +2172,36 @@ PostgreSQL documentation
21722172
<entry><literal>WIN1252</literal></entry>
21732173
</row>
21742174

2175+
<row>
2176+
<entry><literal>utf8_to_windows_1253</literal></entry>
2177+
<entry><literal>UTF8</literal></entry>
2178+
<entry><literal>WIN1253</literal></entry>
2179+
</row>
2180+
2181+
<row>
2182+
<entry><literal>utf8_to_windows_1254</literal></entry>
2183+
<entry><literal>UTF8</literal></entry>
2184+
<entry><literal>WIN1254</literal></entry>
2185+
</row>
2186+
2187+
<row>
2188+
<entry><literal>utf8_to_windows_1255</literal></entry>
2189+
<entry><literal>UTF8</literal></entry>
2190+
<entry><literal>WIN1255</literal></entry>
2191+
</row>
2192+
21752193
<row>
21762194
<entry><literal>utf8_to_windows_1256</literal></entry>
21772195
<entry><literal>UTF8</literal></entry>
21782196
<entry><literal>WIN1256</literal></entry>
21792197
</row>
21802198

2199+
<row>
2200+
<entry><literal>utf8_to_windows_1257</literal></entry>
2201+
<entry><literal>UTF8</literal></entry>
2202+
<entry><literal>WIN1257</literal></entry>
2203+
</row>
2204+
21812205
<row>
21822206
<entry><literal>utf8_to_windows_866</literal></entry>
21832207
<entry><literal>UTF8</literal></entry>

src/backend/utils/mb/README

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,11 @@
1-
common.c: public functions for both the backend and the frontend.
2-
requires conv.c and wchar.c
1+
encnames.c: public functions for both the backend and the frontend.
32
conv.c: static functions and a public table for code conversion
43
wchar.c: mostly static functions and a public table for mb string and
54
multibyte conversion
6-
mbutilc.c: public functions for the backend only.
5+
mbutils.c: public functions for the backend only.
76
requires conv.c and wchar.c
87
wstrcmp.c: strcmp for mb
98
wstrncmp.c: strncmp for mb
10-
alt.c: a tool to generate KOI8 <--> CP866 conversion table
9+
win866.c: a tool to generate KOI8 <--> CP866 conversion table
1110
iso.c: a tool to generate KOI8 <--> ISO8859-5 conversion table
12-
win.c: a tool to generate KOI8 <--> CP1251 conversion table
13-
big5.c: conversion between BIG5 and Mule Internal Code(CNS 116643-1992
14-
plane 1 and plane 2).
15-
utftest.c: test driver for utf2wchar()
11+
win1251.c: a tool to generate KOI8 <--> CP1251 conversion table

src/backend/utils/mb/Unicode/Makefile

Lines changed: 58 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -4,63 +4,88 @@
44
#
55
# Copyright (c) 2001-2005, PostgreSQL Global Development Group
66
#
7-
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.9 2005/03/07 04:30:52 momjian Exp $
7+
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/Makefile,v 1.10 2006/02/18 16:15:22 petere Exp $
88
#
99
#-------------------------------------------------------------------------
1010

1111
subdir = src/backend/utils/mb/Unicode
1212
top_builddir = ../../../../..
1313
include $(top_builddir)/src/Makefile.global
1414

15-
ISO8859MAPS=iso8859_2_to_utf8.map iso8859_3_to_utf8.map \
16-
iso8859_4_to_utf8.map iso8859_5_to_utf8.map \
17-
utf8_to_iso8859_2.map utf8_to_iso8859_3.map \
18-
utf8_to_iso8859_4.map utf8_to_iso8859_5.map
19-
20-
21-
CYRILLICMAPS=koi8r_to_utf8.map win1251_to_utf8.map win866_to_utf8.map\
22-
utf8_to_koi8r.map utf8_to_win1251.map utf8_to_win866.map
23-
24-
MAPS= $(ISO8859MAPS) $(CYRILLICMAPS)\
25-
big5_to_utf8.map euc_cn_to_utf8.map euc_jp_to_utf8.map \
26-
euc_kr_to_utf8.map euc_tw_to_utf8.map sjis_to_utf8.map \
27-
utf8_to_big5.map utf8_to_euc_cn.map utf8_to_euc_jp.map \
28-
utf8_to_euc_kr.map utf8_to_euc_tw.map utf8_to_iso8859_2.map \
29-
utf8_to_sjis.map gb18030_to_utf8.map utf8_to_gb18030.map
30-
31-
ISO8859TEXTS= 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT
32-
CYRILLICTEXTS=cp866.txt cp1251.txt koi8-r.txt
33-
34-
TEXTS=$(ISO8859TEXTS) $(CYRILLICTEXTS) \
35-
BIG5.TXT CNS11643.TXT GB2312.TXT \
36-
JIS0201.TXT JIS0208.TXT JIS0212.TXT \
37-
OLD5601.TXT SHIFTJIS.TXT ISO10646-GB18030.TXT
15+
ISO8859MAPS = iso8859_2_to_utf8.map utf8_to_iso8859_2.map \
16+
iso8859_3_to_utf8.map utf8_to_iso8859_3.map \
17+
iso8859_4_to_utf8.map utf8_to_iso8859_4.map \
18+
iso8859_5_to_utf8.map utf8_to_iso8859_5.map \
19+
iso8859_6_to_utf8.map utf8_to_iso8859_6.map \
20+
iso8859_7_to_utf8.map utf8_to_iso8859_7.map \
21+
iso8859_8_to_utf8.map utf8_to_iso8859_8.map \
22+
iso8859_9_to_utf8.map utf8_to_iso8859_9.map \
23+
iso8859_10_to_utf8.map utf8_to_iso8859_10.map \
24+
iso8859_13_to_utf8.map utf8_to_iso8859_13.map \
25+
iso8859_14_to_utf8.map utf8_to_iso8859_14.map \
26+
iso8859_15_to_utf8.map utf8_to_iso8859_15.map \
27+
iso8859_16_to_utf8.map utf8_to_iso8859_16.map
28+
29+
WINMAPS = win866_to_utf8.map utf8_to_win866.map \
30+
win874_to_utf8.map utf8_to_win874.map \
31+
win1250_to_utf8.map utf8_to_win1250.map \
32+
win1251_to_utf8.map utf8_to_win1251.map \
33+
win1252_to_utf8.map utf8_to_win1252.map \
34+
win1253_to_utf8.map utf8_to_win1253.map \
35+
win1254_to_utf8.map utf8_to_win1254.map \
36+
win1255_to_utf8.map utf8_to_win1255.map \
37+
win1256_to_utf8.map utf8_to_win1256.map \
38+
win1257_to_utf8.map utf8_to_win1257.map \
39+
win1258_to_utf8.map utf8_to_win1258.map
40+
41+
GENERICMAPS = $(ISO8859MAPS) $(WINMAPS) \
42+
big5_to_utf8.map utf8_to_big5.map \
43+
johab_to_utf8.map utf8_to_johab.map \
44+
uhc_to_utf8.map utf8_to_uhc.map \
45+
gbk_to_utf8.map utf8_to_gbk.map \
46+
koi8r_to_utf8.map utf8_to_koi8r.map
47+
48+
SPECIALMAPS = euc_cn_to_utf8.map utf8_to_euc_cn.map \
49+
euc_jp_to_utf8.map utf8_to_euc_jp.map \
50+
euc_kr_to_utf8.map utf8_to_euc_kr.map \
51+
euc_tw_to_utf8.map utf8_to_euc_tw.map \
52+
sjis_to_utf8.map utf8_to_sjis.map \
53+
gb18030_to_utf8.map utf8_to_gb18030.map
54+
55+
MAPS = $(GENERICMAPS) $(SPECIALMAPS)
56+
57+
ISO8859TEXTS = 8859-2.TXT 8859-3.TXT 8859-4.TXT 8859-5.TXT \
58+
8859-6.TXT 8859-7.TXT 8859-8.TXT 8859-9.TXT \
59+
8859-10.TXT 8859-13.TXT 8859-14.TXT 8859-15.TXT \
60+
8859-16.TXT
61+
62+
WINTEXTS = CP866.TXT CP874.TXT CP1250.TXT CP1251.TXT \
63+
CP1252.TXT CP1253.TXT CP1254.TXT CP1255.TXT \
64+
CP1256.TXT CP1257.TXT CP1258.TXT
65+
66+
GENERICTEXTS = $(ISO8859TEXTS) $(WINTEXTS) \
67+
KOI8-R.TXT CP936.TXT CP949.TXT JOHAB.TXT BIG5.TXT
3868

3969
all: $(MAPS)
4070

41-
$(ISO8859MAPS) : $(ISO8859TEXTS)
42-
./UCS_to_8859.pl
43-
$(CYRILLICMAPS) : $(CYRILLICTEXTS)
44-
./UCS_to_cyrillic.pl
71+
$(GENERICMAPS) : $(GENERICTEXTS)
72+
./UCS_to_most.pl
4573

4674
euc_jp_to_utf8.map utf8_to_euc_jp.map : JIS0201.TXT JIS0208.TXT JIS0212.TXT
4775
./UCS_to_EUC_JP.pl
4876

4977
euc_cn_to_utf8.map utf8_to_euc_cn.map : GB2312.TXT
5078
./UCS_to_EUC_CN.pl
5179

52-
euc_kr_to_utf8.map utf8_to_euc_kr.map : OLD5601.TXT
80+
euc_kr_to_utf8.map utf8_to_euc_kr.map : KSX1001.TXT
5381
./UCS_to_EUC_KR.pl
5482

5583
euc_tw_to_utf8.map utf8_to_euc_tw.map : CNS11643.TXT
5684
./UCS_to_EUC_TW.pl
5785

58-
sjis_to_utf8.map utf8_to_sjis.map : SHIFTJIS.TXT
86+
sjis_to_utf8.map utf8_to_sjis.map : CP932.TXT
5987
./UCS_to_SJIS.pl
6088

61-
big5_to_utf8.map utf8_to_big5.map : BIG5.TXT
62-
./UCS_to_BIG5.pl
63-
6489
gb18030_to_utf8.map utf8_to_gb18030.map : ISO10646-GB18030.TXT
6590
./UCS_to_GB18030.pl
6691
clean:

src/backend/utils/mb/Unicode/UCS_to_8859.pl

Lines changed: 0 additions & 110 deletions
This file was deleted.

0 commit comments

Comments
 (0)