Skip to content

Commit 4c46c6d

Browse files
committed
Use non-BOM encodings
1 parent 6a8a97d commit 4c46c6d

File tree

8 files changed

+41
-28
lines changed

8 files changed

+41
-28
lines changed

src/embed_tests/TestPyType.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ public void CanCreateHeapType()
2828
const string name = "nÁmæ";
2929
const string docStr = "dÁcæ";
3030

31-
using var doc = new StrPtr(docStr, Encoding.UTF8);
31+
using var doc = new StrPtr(docStr, Encodings.UTF8);
3232
var spec = new TypeSpec(
3333
name: name,
3434
basicSize: Util.ReadInt32(Runtime.Runtime.PyBaseObjectType, TypeOffset.tp_basicsize),

src/runtime/Loader.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ public unsafe static int Initialize(IntPtr data, int size)
1212
{
1313
try
1414
{
15-
var dllPath = Encoding.UTF8.GetString((byte*)data.ToPointer(), size);
15+
var dllPath = Encodings.UTF8.GetString((byte*)data.ToPointer(), size);
1616

1717
if (!string.IsNullOrEmpty(dllPath))
1818
{
@@ -33,15 +33,15 @@ public unsafe static int Initialize(IntPtr data, int size)
3333
);
3434
return 1;
3535
}
36-
36+
3737
return 0;
3838
}
3939

4040
public unsafe static int Shutdown(IntPtr data, int size)
4141
{
4242
try
4343
{
44-
var command = Encoding.UTF8.GetString((byte*)data.ToPointer(), size);
44+
var command = Encodings.UTF8.GetString((byte*)data.ToPointer(), size);
4545

4646
if (command == "full_shutdown")
4747
{

src/runtime/Native/CustomMarshaler.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ public int GetNativeDataSize()
4242
internal class UcsMarshaler : MarshalerBase
4343
{
4444
internal static readonly int _UCS = RuntimeInformation.IsOSPlatform(OSPlatform.Windows) ? 2 : 4;
45-
internal static readonly Encoding PyEncoding = _UCS == 2 ? Encoding.Unicode : Encoding.UTF32;
45+
internal static readonly Encoding PyEncoding = _UCS == 2 ? Encodings.UTF16 : Encodings.UTF32;
4646
private static readonly MarshalerBase Instance = new UcsMarshaler();
4747

4848
public override IntPtr MarshalManagedToNative(object managedObj)

src/runtime/Native/NativeTypeSpec.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
1717
{
1818
if (spec is null) throw new ArgumentNullException(nameof(spec));
1919

20-
this.Name = new StrPtr(spec.Name, Encoding.UTF8);
20+
this.Name = new StrPtr(spec.Name, Encodings.UTF8);
2121
this.BasicSize = spec.BasicSize;
2222
this.ItemSize = spec.ItemSize;
2323
this.Flags = (int)spec.Flags;

src/runtime/PythonTypes/PyType.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ public string Name
5353
{
5454
RawPointer = Util.ReadIntPtr(this, TypeOffset.tp_name),
5555
};
56-
return namePtr.ToString(System.Text.Encoding.UTF8)!;
56+
return namePtr.ToString(Encodings.UTF8)!;
5757
}
5858
}
5959

src/runtime/Runtime.cs

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)
795795

796796
internal static int PyRun_SimpleString(string code)
797797
{
798-
using var codePtr = new StrPtr(code, Encoding.UTF8);
798+
using var codePtr = new StrPtr(code, Encodings.UTF8);
799799
return Delegates.PyRun_SimpleStringFlags(codePtr, Utf8String);
800800
}
801801

802802
internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedReference globals, BorrowedReference locals)
803803
{
804-
using var codePtr = new StrPtr(code, Encoding.UTF8);
804+
using var codePtr = new StrPtr(code, Encodings.UTF8);
805805
return Delegates.PyRun_StringFlags(codePtr, st, globals, locals, Utf8String);
806806
}
807807

@@ -813,14 +813,14 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
813813
/// </summary>
814814
internal static NewReference Py_CompileString(string str, string file, int start)
815815
{
816-
using var strPtr = new StrPtr(str, Encoding.UTF8);
816+
using var strPtr = new StrPtr(str, Encodings.UTF8);
817817
using var fileObj = new PyString(file);
818818
return Delegates.Py_CompileStringObject(strPtr, fileObj, start, Utf8String, -1);
819819
}
820820

821821
internal static NewReference PyImport_ExecCodeModule(string name, BorrowedReference code)
822822
{
823-
using var namePtr = new StrPtr(name, Encoding.UTF8);
823+
using var namePtr = new StrPtr(name, Encodings.UTF8);
824824
return Delegates.PyImport_ExecCodeModule(namePtr, code);
825825
}
826826

@@ -867,13 +867,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)
867867

868868
internal static int PyObject_HasAttrString(BorrowedReference pointer, string name)
869869
{
870-
using var namePtr = new StrPtr(name, Encoding.UTF8);
870+
using var namePtr = new StrPtr(name, Encodings.UTF8);
871871
return Delegates.PyObject_HasAttrString(pointer, namePtr);
872872
}
873873

874874
internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, string name)
875875
{
876-
using var namePtr = new StrPtr(name, Encoding.UTF8);
876+
using var namePtr = new StrPtr(name, Encodings.UTF8);
877877
return Delegates.PyObject_GetAttrString(pointer, namePtr);
878878
}
879879

@@ -884,12 +884,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
884884
internal static int PyObject_DelAttr(BorrowedReference @object, BorrowedReference name) => Delegates.PyObject_SetAttr(@object, name, null);
885885
internal static int PyObject_DelAttrString(BorrowedReference @object, string name)
886886
{
887-
using var namePtr = new StrPtr(name, Encoding.UTF8);
887+
using var namePtr = new StrPtr(name, Encodings.UTF8);
888888
return Delegates.PyObject_SetAttrString(@object, namePtr, null);
889889
}
890890
internal static int PyObject_SetAttrString(BorrowedReference @object, string name, BorrowedReference value)
891891
{
892-
using var namePtr = new StrPtr(name, Encoding.UTF8);
892+
using var namePtr = new StrPtr(name, Encodings.UTF8);
893893
return Delegates.PyObject_SetAttrString(@object, namePtr, value);
894894
}
895895

@@ -1071,7 +1071,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)
10711071

10721072
internal static NewReference PyLong_FromString(string value, int radix)
10731073
{
1074-
using var valPtr = new StrPtr(value, Encoding.UTF8);
1074+
using var valPtr = new StrPtr(value, Encodings.UTF8);
10751075
return Delegates.PyLong_FromString(valPtr, IntPtr.Zero, radix);
10761076
}
10771077

@@ -1272,7 +1272,7 @@ internal static NewReference EmptyPyBytes()
12721272
internal static NewReference PyByteArray_FromStringAndSize(IntPtr strPtr, nint len) => Delegates.PyByteArray_FromStringAndSize(strPtr, len);
12731273
internal static NewReference PyByteArray_FromStringAndSize(string s)
12741274
{
1275-
using var ptr = new StrPtr(s, Encoding.UTF8);
1275+
using var ptr = new StrPtr(s, Encodings.UTF8);
12761276
return PyByteArray_FromStringAndSize(ptr.RawPointer, checked((nint)ptr.ByteCount));
12771277
}
12781278

@@ -1300,7 +1300,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)
13001300

13011301
internal static NewReference PyUnicode_InternFromString(string s)
13021302
{
1303-
using var ptr = new StrPtr(s, Encoding.UTF8);
1303+
using var ptr = new StrPtr(s, Encodings.UTF8);
13041304
return Delegates.PyUnicode_InternFromString(ptr);
13051305
}
13061306

@@ -1375,7 +1375,7 @@ internal static bool PyDict_Check(BorrowedReference ob)
13751375

13761376
internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer, string key)
13771377
{
1378-
using var keyStr = new StrPtr(key, Encoding.UTF8);
1378+
using var keyStr = new StrPtr(key, Encodings.UTF8);
13791379
return Delegates.PyDict_GetItemString(pointer, keyStr);
13801380
}
13811381

@@ -1391,7 +1391,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
13911391
/// </summary>
13921392
internal static int PyDict_SetItemString(BorrowedReference dict, string key, BorrowedReference value)
13931393
{
1394-
using var keyPtr = new StrPtr(key, Encoding.UTF8);
1394+
using var keyPtr = new StrPtr(key, Encodings.UTF8);
13951395
return Delegates.PyDict_SetItemString(dict, keyPtr, value);
13961396
}
13971397

@@ -1400,7 +1400,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor
14001400

14011401
internal static int PyDict_DelItemString(BorrowedReference pointer, string key)
14021402
{
1403-
using var keyPtr = new StrPtr(key, Encoding.UTF8);
1403+
using var keyPtr = new StrPtr(key, Encodings.UTF8);
14041404
return Delegates.PyDict_DelItemString(pointer, keyPtr);
14051405
}
14061406

@@ -1515,7 +1515,7 @@ internal static bool PyIter_Check(BorrowedReference ob)
15151515

15161516
internal static NewReference PyModule_New(string name)
15171517
{
1518-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1518+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15191519
return Delegates.PyModule_New(namePtr);
15201520
}
15211521

@@ -1529,7 +1529,7 @@ internal static NewReference PyModule_New(string name)
15291529
/// <returns>Return -1 on error, 0 on success.</returns>
15301530
internal static int PyModule_AddObject(BorrowedReference module, string name, StolenReference value)
15311531
{
1532-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1532+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15331533
IntPtr valueAddr = value.DangerousGetAddressOrNull();
15341534
int res = Delegates.PyModule_AddObject(module, namePtr, valueAddr);
15351535
// We can't just exit here because the reference is stolen only on success.
@@ -1547,7 +1547,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St
15471547

15481548
internal static NewReference PyImport_ImportModule(string name)
15491549
{
1550-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1550+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15511551
return Delegates.PyImport_ImportModule(namePtr);
15521552
}
15531553

@@ -1556,7 +1556,7 @@ internal static NewReference PyImport_ImportModule(string name)
15561556

15571557
internal static BorrowedReference PyImport_AddModule(string name)
15581558
{
1559-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1559+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15601560
return Delegates.PyImport_AddModule(namePtr);
15611561
}
15621562

@@ -1584,13 +1584,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)
15841584

15851585
internal static BorrowedReference PySys_GetObject(string name)
15861586
{
1587-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1587+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15881588
return Delegates.PySys_GetObject(namePtr);
15891589
}
15901590

15911591
internal static int PySys_SetObject(string name, BorrowedReference ob)
15921592
{
1593-
using var namePtr = new StrPtr(name, Encoding.UTF8);
1593+
using var namePtr = new StrPtr(name, Encodings.UTF8);
15941594
return Delegates.PySys_SetObject(namePtr, ob);
15951595
}
15961596

@@ -1689,7 +1689,7 @@ internal static IntPtr PyMem_Malloc(long size)
16891689

16901690
internal static void PyErr_SetString(BorrowedReference ob, string message)
16911691
{
1692-
using var msgPtr = new StrPtr(message, Encoding.UTF8);
1692+
using var msgPtr = new StrPtr(message, Encodings.UTF8);
16931693
Delegates.PyErr_SetString(ob, msgPtr);
16941694
}
16951695

src/runtime/Util/Encodings.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
using System;
2+
using System.Text;
3+
4+
namespace Python.Runtime;
5+
6+
static class Encodings {
7+
public static System.Text.Encoding UTF8 = new UTF8Encoding(false, true);
8+
public static System.Text.Encoding UTF16 = new UnicodeEncoding(!BitConverter.IsLittleEndian, false, true);
9+
public static System.Text.Encoding UTF32 = new UTF32Encoding(!BitConverter.IsLittleEndian, false, true);
10+
}

tests/test_conversion.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,6 +510,9 @@ def test_string_conversion():
510510
ob.StringField = System.String(u'\uffff\uffff')
511511
assert ob.StringField == u'\uffff\uffff'
512512

513+
ob.StringField = System.String("\ufeffbom")
514+
assert ob.StringField == "\ufeffbom"
515+
513516
ob.StringField = None
514517
assert ob.StringField is None
515518

0 commit comments

Comments
 (0)