Skip to content

Commit 65edb54

Browse files
committed
Attached are a patch to allow the charset encoding used by the JDBC
driver to be set, and a description of said patch. Please refer to the latter for more information. William -- William Webber william@peopleweb.net.au
1 parent 4f5cdad commit 65edb54

File tree

7 files changed

+328
-12
lines changed

7 files changed

+328
-12
lines changed

src/interfaces/jdbc/Makefile

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Makefile for Java JDBC interface
55
#
66
# IDENTIFICATION
7-
# $Id: Makefile,v 1.23 2000/06/06 11:05:56 peter Exp $
7+
# $Id: Makefile,v 1.24 2000/09/12 04:58:46 momjian Exp $
88
#
99
#-------------------------------------------------------------------------
1010

@@ -226,7 +226,8 @@ EX2= example/blobtest.class
226226

227227
# These are really test classes not true examples
228228
TESTS= example/metadata.class \
229-
example/threadsafe.class
229+
example/threadsafe.class \
230+
example/Unicode.class
230231

231232
# Non functional/obsolete examples
232233
# example/datestyle.class \
@@ -266,6 +267,7 @@ tests: $(TESTS)
266267
@echo The following tests have been built:
267268
@echo " example.metadata Tests various metadata methods"
268269
@echo " example.threadsafe Tests the driver's thread safety"
270+
@echo " example.Unicode Tests unicode charset support"
269271
@echo ------------------------------------------------------------
270272
@echo
271273

@@ -276,6 +278,7 @@ example/psql.class: example/psql.java
276278
example/ImageViewer.class: example/ImageViewer.java
277279
example/threadsafe.class: example/threadsafe.java
278280
example/metadata.class: example/metadata.java
281+
example/Unicode.class: example/Unicode.java
279282

280283
#######################################################################
281284
#
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
package example;
2+
3+
import java.io.*;
4+
import java.sql.*;
5+
import java.util.*;
6+
7+
/**
8+
* Test inserting and extracting Unicode-encoded strings.
9+
*
10+
* Synopsis:
11+
* example.Unicode <url> <user> <password>
12+
* where <url> must specify an existing database to which <user> and
13+
* <password> give access and which has UNICODE as its encoding.
14+
* (To create a database with UNICODE encoding, you need to compile
15+
* postgres with "--enable-multibyte" and run createdb with the
16+
* flag "-E UNICODE".)
17+
*
18+
* This test only produces output on error.
19+
*
20+
* @author William Webber <william@live.com.au>
21+
*/
22+
public class Unicode {
23+
24+
/**
25+
* The url for the database to connect to.
26+
*/
27+
private String url;
28+
29+
/**
30+
* The user to connect as.
31+
*/
32+
private String user;
33+
34+
/**
35+
* The password to connect with.
36+
*/
37+
private String password;
38+
39+
private static void usage() {
40+
log("usage: example.Unicode <url> <user> <password>");
41+
}
42+
43+
private static void log(String message) {
44+
System.err.println(message);
45+
}
46+
47+
private static void log(String message, Exception e) {
48+
System.err.println(message);
49+
e.printStackTrace();
50+
}
51+
52+
53+
public Unicode(String url, String user, String password) {
54+
this.url = url;
55+
this.user = user;
56+
this.password = password;
57+
}
58+
59+
/**
60+
* Establish and return a connection to the database.
61+
*/
62+
private Connection getConnection() throws SQLException,
63+
ClassNotFoundException {
64+
Class.forName("org.postgresql.Driver");
65+
Properties info = new Properties();
66+
info.put("user", user);
67+
info.put("password", password);
68+
info.put("charSet", "utf-8");
69+
return DriverManager.getConnection(url, info);
70+
}
71+
72+
/**
73+
* Get string representing a block of 256 consecutive unicode characters.
74+
* We exclude the null character, "'", and "\".
75+
*/
76+
private String getSqlSafeUnicodeBlock(int blockNum) {
77+
if (blockNum < 0 || blockNum > 255)
78+
throw new IllegalArgumentException("blockNum must be from 0 to "
79+
+ "255: " + blockNum);
80+
StringBuffer sb = new StringBuffer(256);
81+
int blockFirst = blockNum * 256;
82+
int blockLast = blockFirst + 256;
83+
for (int i = blockFirst; i < blockLast; i++) {
84+
char c = (char) i;
85+
if (c == '\0' || c == '\'' || c == '\\')
86+
continue;
87+
sb.append(c);
88+
}
89+
return sb.toString();
90+
}
91+
92+
/**
93+
* Is the block a block of valid unicode values.
94+
* d800 to db7f is the "unassigned high surrogate" range.
95+
* db80 to dbff is the "private use" range.
96+
* These should not be used in actual Unicode strings;
97+
* at least, jdk1.2 will not convert them to utf-8.
98+
*/
99+
private boolean isValidUnicodeBlock(int blockNum) {
100+
if (blockNum >= 0xd8 && blockNum <= 0xdb)
101+
return false;
102+
else
103+
return true;
104+
}
105+
106+
/**
107+
* Report incorrect block retrieval.
108+
*/
109+
private void reportRetrievalError(int blockNum, String block,
110+
String retrieved) {
111+
String message = "Block " + blockNum + " returned incorrectly: ";
112+
int i = 0;
113+
for (i = 0; i < block.length(); i++) {
114+
if (i >= retrieved.length()) {
115+
message += "too short";
116+
break;
117+
} else if (retrieved.charAt(i) != block.charAt(i)) {
118+
message +=
119+
"first changed character at position " + i + ", sent as 0x"
120+
+ Integer.toHexString((int) block.charAt(i))
121+
+ ", retrieved as 0x"
122+
+ Integer.toHexString ((int) retrieved.charAt(i));
123+
break;
124+
}
125+
}
126+
if (i >= block.length())
127+
message += "too long";
128+
log(message);
129+
}
130+
131+
/**
132+
* Do the testing.
133+
*/
134+
public void runTest() {
135+
Connection connection = null;
136+
Statement statement = null;
137+
int blockNum = 0;
138+
final int CREATE = 0;
139+
final int INSERT = 1;
140+
final int SELECT = 2;
141+
final int LIKE = 3;
142+
int mode = CREATE;
143+
try {
144+
connection = getConnection();
145+
statement = connection.createStatement();
146+
statement.executeUpdate("CREATE TABLE test_unicode "
147+
+ "( blockNum INT PRIMARY KEY, "
148+
+ "block TEXT );");
149+
mode = INSERT;
150+
for (blockNum = 0; blockNum < 256; blockNum++) {
151+
if (isValidUnicodeBlock(blockNum)) {
152+
String block = getSqlSafeUnicodeBlock(blockNum);
153+
statement.executeUpdate
154+
("INSERT INTO test_unicode VALUES ( " + blockNum
155+
+ ", '" + block + "');");
156+
}
157+
}
158+
mode = SELECT;
159+
for (blockNum = 0; blockNum < 256; blockNum++) {
160+
if (isValidUnicodeBlock(blockNum)) {
161+
String block = getSqlSafeUnicodeBlock(blockNum);
162+
ResultSet rs = statement.executeQuery
163+
("SELECT block FROM test_unicode WHERE blockNum = "
164+
+ blockNum + ";");
165+
if (!rs.next())
166+
log("Could not retrieve block " + blockNum);
167+
else {
168+
String retrieved = rs.getString(1);
169+
if (!retrieved.equals(block)) {
170+
reportRetrievalError(blockNum, block, retrieved);
171+
}
172+
}
173+
}
174+
}
175+
mode = LIKE;
176+
for (blockNum = 0; blockNum < 256; blockNum++) {
177+
if (isValidUnicodeBlock(blockNum)) {
178+
String block = getSqlSafeUnicodeBlock(blockNum);
179+
String likeString = "%" +
180+
block.substring(2, block.length() - 3) + "%" ;
181+
ResultSet rs = statement.executeQuery
182+
("SELECT blockNum FROM test_unicode WHERE block LIKE '"
183+
+ likeString + "';");
184+
if (!rs.next())
185+
log("Could get block " + blockNum + " using LIKE");
186+
}
187+
}
188+
} catch (SQLException sqle) {
189+
switch (mode) {
190+
case CREATE:
191+
log("Exception creating database", sqle);
192+
break;
193+
case INSERT:
194+
log("Exception inserting block " + blockNum, sqle);
195+
break;
196+
case SELECT:
197+
log("Exception selecting block " + blockNum, sqle);
198+
break;
199+
case LIKE:
200+
log("Exception doing LIKE on block " + blockNum, sqle);
201+
break;
202+
default:
203+
log("Exception", sqle);
204+
break;
205+
}
206+
} catch (ClassNotFoundException cnfe) {
207+
log("Unable to load driver", cnfe);
208+
return;
209+
}
210+
try {
211+
if (statement != null)
212+
statement.close();
213+
if (connection != null)
214+
connection.close();
215+
} catch (SQLException sqle) {
216+
log("Exception closing connections", sqle);
217+
}
218+
if (mode > CREATE) {
219+
// If the backend gets what it regards as garbage on a connection,
220+
// that connection may become unusable. To be safe, we create
221+
// a fresh connection to delete the table.
222+
try {
223+
connection = getConnection();
224+
statement = connection.createStatement();
225+
statement.executeUpdate("DROP TABLE test_unicode;");
226+
} catch (Exception sqle) {
227+
log("*** ERROR: unable to delete test table "
228+
+ "test_unicode; must be deleted manually", sqle);
229+
}
230+
}
231+
}
232+
233+
public static void main(String [] args) {
234+
if (args.length != 3) {
235+
usage();
236+
System.exit(1);
237+
}
238+
new Unicode(args[0], args[1], args[2]).runTest();
239+
}
240+
}

src/interfaces/jdbc/org/postgresql/Connection.java

Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import org.postgresql.util.*;
1111

1212
/**
13-
* $Id: Connection.java,v 1.4 2000/06/06 11:05:59 peter Exp $
13+
* $Id: Connection.java,v 1.5 2000/09/12 04:58:47 momjian Exp $
1414
*
1515
* This abstract class is used by org.postgresql.Driver to open either the JDBC1 or
1616
* JDBC2 versions of the Connection class.
@@ -30,6 +30,14 @@ public abstract class Connection
3030
private String PG_PASSWORD;
3131
private String PG_DATABASE;
3232
private boolean PG_STATUS;
33+
34+
/**
35+
* The encoding to use for this connection.
36+
* If <b>null</b>, the encoding has not been specified by the
37+
* user, and the default encoding for the platform should be
38+
* used.
39+
*/
40+
private String encoding;
3341

3442
public boolean CONNECTION_OK = true;
3543
public boolean CONNECTION_BAD = false;
@@ -111,6 +119,8 @@ protected void openConnection(String host, int port, Properties info, String dat
111119
PG_PORT = port;
112120
PG_HOST = new String(host);
113121
PG_STATUS = CONNECTION_BAD;
122+
123+
encoding = info.getProperty("charSet"); // could be null
114124

115125
// Now make the initial connection
116126
try
@@ -154,7 +164,8 @@ protected void openConnection(String host, int port, Properties info, String dat
154164
// The most common one to be thrown here is:
155165
// "User authentication failed"
156166
//
157-
throw new SQLException(pg_stream.ReceiveString(4096));
167+
throw new SQLException(pg_stream.ReceiveString
168+
(4096, getEncoding()));
158169

159170
case 'R':
160171
// Get the type of request
@@ -224,7 +235,8 @@ protected void openConnection(String host, int port, Properties info, String dat
224235
break;
225236
case 'E':
226237
case 'N':
227-
throw new SQLException(pg_stream.ReceiveString(4096));
238+
throw new SQLException(pg_stream.ReceiveString
239+
(4096, getEncoding()));
228240
default:
229241
throw new PSQLException("postgresql.con.setup");
230242
}
@@ -313,7 +325,7 @@ public java.sql.ResultSet ExecSQL(String sql) throws SQLException
313325

314326
Field[] fields = null;
315327
Vector tuples = new Vector();
316-
byte[] buf = new byte[sql.length()];
328+
byte[] buf = null;
317329
int fqp = 0;
318330
boolean hfr = false;
319331
String recv_status = null, msg;
@@ -325,6 +337,18 @@ public java.sql.ResultSet ExecSQL(String sql) throws SQLException
325337
// larger than 8K. Peter June 6 2000
326338
//if (sql.length() > 8192)
327339
//throw new PSQLException("postgresql.con.toolong",sql);
340+
341+
if (getEncoding() == null)
342+
buf = sql.getBytes();
343+
else {
344+
try {
345+
buf = sql.getBytes(getEncoding());
346+
} catch (UnsupportedEncodingException unse) {
347+
throw new PSQLException("postgresql.con.encoding",
348+
unse);
349+
}
350+
}
351+
328352
try
329353
{
330354
pg_stream.SendChar('Q');
@@ -513,6 +537,15 @@ public String getUserName() throws SQLException
513537
{
514538
return PG_USER;
515539
}
540+
541+
/**
542+
* Get the character encoding to use for this connection.
543+
* @return the encoding to use, or <b>null</b> for the
544+
* default encoding.
545+
*/
546+
public String getEncoding() throws SQLException {
547+
return encoding;
548+
}
516549

517550
/**
518551
* This returns the Fastpath API for the current connection.

src/interfaces/jdbc/org/postgresql/Driver.java

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,15 @@ public Driver() throws SQLException
9090
* <p>The java.util.Properties argument can be used to pass arbitrary
9191
* string tag/value pairs as connection arguments. Normally, at least
9292
* "user" and "password" properties should be included in the
93-
* properties.
93+
* properties. In addition, the "charSet" property can be used to
94+
* set a character set encoding (e.g. "utf-8") other than the platform
95+
* default (typically Latin1). This is necessary in particular if storing
96+
* multibyte characters in the database. For a list of supported
97+
* character encoding , see
98+
* http://java.sun.com/products/jdk/1.2/docs/guide/internat/encoding.doc.html
99+
* Note that you will probably want to have set up the Postgres database
100+
* itself to use the same encoding, with the "-E <encoding>" argument
101+
* to createdb.
94102
*
95103
* Our protocol takes the forms:
96104
* <PRE>

0 commit comments

Comments
 (0)