Skip to content

Commit

Permalink
Fix export of HTML when using UTF-8 (Issue #526)
Browse files Browse the repository at this point in the history
  • Loading branch information
michaelrsweet committed Nov 21, 2024
1 parent 8d1c067 commit 4c61bca
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
management and fix potential double-free bugs.
- Updated configure script to look for zlib with pkg-config (Issue #519)
- Updated markdown support code to mmd.
- Fixed export of UTF-8 HTML (Issue #526)
- Fixed handling of whitespace-only nodes (Issue #528)
- Fixed handling of tabs in PRE nodes (Issue #529)
- Fixed case sensitivity of link targets (Issue #530)
Expand Down
48 changes: 41 additions & 7 deletions htmldoc/iso8859.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,13 @@
* ISO-8859-1 conversion routines for HTMLDOC, an HTML document
* processing program.
*
* Copyright 2011-2019 by Michael R Sweet.
* Copyright 2011-2024 by Michael R Sweet.
* Copyright 1997-2010 by Easy Software Products. All rights reserved.
*
* This program is free software. Distribution and use rights are outlined in
* the file "COPYING".
*/

/*
* Include necessary headers.
*/

#include <stdio.h>
#include <stdlib.h>

Expand Down Expand Up @@ -441,11 +437,49 @@ iso8859(uchar value) /* I - ISO-8859-1 equivalent */

if (iso8859_names[value] == NULL)
{
buf[0] = value;
buf[1] = '\0';
if (value < 127)
{
// ASCII...
buf[0] = value;
buf[1] = '\0';
}
else if (_htmlUTF8)
{
// UTF-8...
int unich = _htmlUnicode[value]; // Unicode character

if (unich < 0x400)
{
buf[0] = 0xc0 | (unich >> 6);
buf[1] = 0x80 | (unich & 0x3f);
buf[2] = '\0';
}
else if (unich < 0x10000)
{
buf[0] = 0xe0 | (unich >> 12);
buf[1] = 0x80 | ((unich >> 6) & 0x3f);
buf[2] = 0x80 | (unich & 0x3f);
buf[3] = '\0';
}
else
{
buf[0] = 0xf0 | (unich >> 18);
buf[1] = 0x80 | ((unich >> 12) & 0x3f);
buf[2] = 0x80 | ((unich >> 6) & 0x3f);
buf[3] = 0x80 | (unich & 0x3f);
buf[4] = '\0';
}
}
else
{
// Character-set neutral way to map to Unicode...
snprintf((char *)buf, sizeof(buf), "&#%d;", _htmlUnicode[value]);
}
}
else
{
snprintf((char *)buf, sizeof(buf), "&%s;", iso8859_names[value]->name);
}

return (buf);
}
Expand Down

0 comments on commit 4c61bca

Please sign in to comment.