Skip to content

Use non-BOM encodings #2370

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/embed_tests/TestPyType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ public void CanCreateHeapType()
const string name = "nÁmæ";
const string docStr = "dÁcæ";

using var doc = new StrPtr(docStr, Encoding.UTF8);
using var doc = new StrPtr(docStr, Encodings.UTF8);
var spec = new TypeSpec(
name: name,
basicSize: Util.ReadInt32(Runtime.Runtime.PyBaseObjectType, TypeOffset.tp_basicsize),
Expand Down
6 changes: 3 additions & 3 deletions src/runtime/Loader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ public unsafe static int Initialize(IntPtr data, int size)
{
try
{
var dllPath = Encoding.UTF8.GetString((byte*)data.ToPointer(), size);
var dllPath = Encodings.UTF8.GetString((byte*)data.ToPointer(), size);

if (!string.IsNullOrEmpty(dllPath))
{
Expand All @@ -33,15 +33,15 @@ public unsafe static int Initialize(IntPtr data, int size)
);
return 1;
}

return 0;
}

public unsafe static int Shutdown(IntPtr data, int size)
{
try
{
var command = Encoding.UTF8.GetString((byte*)data.ToPointer(), size);
var command = Encodings.UTF8.GetString((byte*)data.ToPointer(), size);

if (command == "full_shutdown")
{
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/Native/CustomMarshaler.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ public int GetNativeDataSize()
internal class UcsMarshaler : MarshalerBase
{
internal static readonly int _UCS = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? 2 : 4;
internal static readonly Encoding PyEncoding = _UCS == 2 ? Encoding.Unicode : Encoding.UTF32;
internal static readonly Encoding PyEncoding = _UCS == 2 ? Encodings.UTF16 : Encodings.UTF32;
private static readonly MarshalerBase Instance = new UcsMarshaler();

public override IntPtr MarshalManagedToNative(object managedObj)
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/Native/NativeTypeSpec.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
{
if (spec is null) throw new ArgumentNullException(nameof(spec));

this.Name = new StrPtr(spec.Name, Encoding.UTF8);
this.Name = new StrPtr(spec.Name, Encodings.UTF8);
this.BasicSize = spec.BasicSize;
this.ItemSize = spec.ItemSize;
this.Flags = (int)spec.Flags;
Expand Down
2 changes: 1 addition & 1 deletion src/runtime/PythonTypes/PyType.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ public string Name
{
RawPointer = Util.ReadIntPtr(this, TypeOffset.tp_name),
};
return namePtr.ToString(System.Text.Encoding.UTF8)!;
return namePtr.ToString(Encodings.UTF8)!;
}
}

Expand Down
46 changes: 24 additions & 22 deletions src/runtime/Runtime.cs
Original file line number Diff line number Diff line change
Expand Up @@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)

internal static int PyRun_SimpleString(string code)
{
using var codePtr = new StrPtr(code, Encoding.UTF8);
using var codePtr = new StrPtr(code, Encodings.UTF8);
return Delegates.PyRun_SimpleStringFlags(codePtr, Utf8String);
}

internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedReference globals, BorrowedReference locals)
{
using var codePtr = new StrPtr(code, Encoding.UTF8);
using var codePtr = new StrPtr(code, Encodings.UTF8);
return Delegates.PyRun_StringFlags(codePtr, st, globals, locals, Utf8String);
}

Expand All @@ -813,14 +813,14 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
/// </summary>
internal static NewReference Py_CompileString(string str, string file, int start)
{
using var strPtr = new StrPtr(str, Encoding.UTF8);
using var strPtr = new StrPtr(str, Encodings.UTF8);
using var fileObj = new PyString(file);
return Delegates.Py_CompileStringObject(strPtr, fileObj, start, Utf8String, -1);
}

internal static NewReference PyImport_ExecCodeModule(string name, BorrowedReference code)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyImport_ExecCodeModule(namePtr, code);
}

Expand Down Expand Up @@ -867,13 +867,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)

internal static int PyObject_HasAttrString(BorrowedReference pointer, string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyObject_HasAttrString(pointer, namePtr);
}

internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyObject_GetAttrString(pointer, namePtr);
}

Expand All @@ -884,12 +884,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
internal static int PyObject_DelAttr(BorrowedReference @object, BorrowedReference name) => Delegates.PyObject_SetAttr(@object, name, null);
internal static int PyObject_DelAttrString(BorrowedReference @object, string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyObject_SetAttrString(@object, namePtr, null);
}
internal static int PyObject_SetAttrString(BorrowedReference @object, string name, BorrowedReference value)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyObject_SetAttrString(@object, namePtr, value);
}

Expand Down Expand Up @@ -1071,7 +1071,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)

internal static NewReference PyLong_FromString(string value, int radix)
{
using var valPtr = new StrPtr(value, Encoding.UTF8);
using var valPtr = new StrPtr(value, Encodings.UTF8);
return Delegates.PyLong_FromString(valPtr, IntPtr.Zero, radix);
}

Expand Down Expand Up @@ -1252,12 +1252,14 @@ internal static bool PyString_CheckExact(BorrowedReference ob)

internal static NewReference PyString_FromString(string value)
{
int byteorder = BitConverter.IsLittleEndian ? -1 : 1;
int* byteorderPtr = &byteorder;
fixed(char* ptr = value)
return Delegates.PyUnicode_DecodeUTF16(
(IntPtr)ptr,
value.Length * sizeof(Char),
IntPtr.Zero,
IntPtr.Zero
(IntPtr)byteorderPtr
);
}

Expand All @@ -1272,7 +1274,7 @@ internal static NewReference EmptyPyBytes()
internal static NewReference PyByteArray_FromStringAndSize(IntPtr strPtr, nint len) => Delegates.PyByteArray_FromStringAndSize(strPtr, len);
internal static NewReference PyByteArray_FromStringAndSize(string s)
{
using var ptr = new StrPtr(s, Encoding.UTF8);
using var ptr = new StrPtr(s, Encodings.UTF8);
return PyByteArray_FromStringAndSize(ptr.RawPointer, checked((nint)ptr.ByteCount));
}

Expand Down Expand Up @@ -1300,7 +1302,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)

internal static NewReference PyUnicode_InternFromString(string s)
{
using var ptr = new StrPtr(s, Encoding.UTF8);
using var ptr = new StrPtr(s, Encodings.UTF8);
return Delegates.PyUnicode_InternFromString(ptr);
}

Expand Down Expand Up @@ -1375,7 +1377,7 @@ internal static bool PyDict_Check(BorrowedReference ob)

internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer, string key)
{
using var keyStr = new StrPtr(key, Encoding.UTF8);
using var keyStr = new StrPtr(key, Encodings.UTF8);
return Delegates.PyDict_GetItemString(pointer, keyStr);
}

Expand All @@ -1391,7 +1393,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
/// </summary>
internal static int PyDict_SetItemString(BorrowedReference dict, string key, BorrowedReference value)
{
using var keyPtr = new StrPtr(key, Encoding.UTF8);
using var keyPtr = new StrPtr(key, Encodings.UTF8);
return Delegates.PyDict_SetItemString(dict, keyPtr, value);
}

Expand All @@ -1400,7 +1402,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor

internal static int PyDict_DelItemString(BorrowedReference pointer, string key)
{
using var keyPtr = new StrPtr(key, Encoding.UTF8);
using var keyPtr = new StrPtr(key, Encodings.UTF8);
return Delegates.PyDict_DelItemString(pointer, keyPtr);
}

Expand Down Expand Up @@ -1515,7 +1517,7 @@ internal static bool PyIter_Check(BorrowedReference ob)

internal static NewReference PyModule_New(string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyModule_New(namePtr);
}

Expand All @@ -1529,7 +1531,7 @@ internal static NewReference PyModule_New(string name)
/// <returns>Return -1 on error, 0 on success.</returns>
internal static int PyModule_AddObject(BorrowedReference module, string name, StolenReference value)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
IntPtr valueAddr = value.DangerousGetAddressOrNull();
int res = Delegates.PyModule_AddObject(module, namePtr, valueAddr);
// We can't just exit here because the reference is stolen only on success.
Expand All @@ -1547,7 +1549,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St

internal static NewReference PyImport_ImportModule(string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyImport_ImportModule(namePtr);
}

Expand All @@ -1556,7 +1558,7 @@ internal static NewReference PyImport_ImportModule(string name)

internal static BorrowedReference PyImport_AddModule(string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PyImport_AddModule(namePtr);
}

Expand Down Expand Up @@ -1584,13 +1586,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)

internal static BorrowedReference PySys_GetObject(string name)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PySys_GetObject(namePtr);
}

internal static int PySys_SetObject(string name, BorrowedReference ob)
{
using var namePtr = new StrPtr(name, Encoding.UTF8);
using var namePtr = new StrPtr(name, Encodings.UTF8);
return Delegates.PySys_SetObject(namePtr, ob);
}

Expand Down Expand Up @@ -1689,7 +1691,7 @@ internal static IntPtr PyMem_Malloc(long size)

internal static void PyErr_SetString(BorrowedReference ob, string message)
{
using var msgPtr = new StrPtr(message, Encoding.UTF8);
using var msgPtr = new StrPtr(message, Encodings.UTF8);
Delegates.PyErr_SetString(ob, msgPtr);
}

Expand Down
10 changes: 10 additions & 0 deletions src/runtime/Util/Encodings.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
using System;
using System.Text;

namespace Python.Runtime;

static class Encodings {
public static System.Text.Encoding UTF8 = new UTF8Encoding(false, true);
public static System.Text.Encoding UTF16 = new UnicodeEncoding(!BitConverter.IsLittleEndian, false, true);
public static System.Text.Encoding UTF32 = new UTF32Encoding(!BitConverter.IsLittleEndian, false, true);
}
3 changes: 3 additions & 0 deletions tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,9 @@ def test_string_conversion():
ob.StringField = System.String(u'\uffff\uffff')
assert ob.StringField == u'\uffff\uffff'

ob.StringField = System.String("\ufeffbom")
assert ob.StringField == "\ufeffbom"

ob.StringField = None
assert ob.StringField is None

Expand Down