Skip to content

1838 Use Python buffer protocol when converting Python objects to .NET arrays #2372

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions src/embed_tests/TestConverter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections.Generic;
using System.Linq;
using System.Numerics;
using System.Text;

using NUnit.Framework;

Expand Down Expand Up @@ -209,6 +210,108 @@ class PyGetListImpl(test.GetListImpl):
List<string> result = inst.GetList();
CollectionAssert.AreEqual(new[] { "testing" }, result);
}

[Test]
public void TestConvertNumpyFloat32ArrayToManaged()
{
var testValue = new float[] { 0, 1, 2, 3 };
var nparr = np.arange(4, dtype: np.float32);

object convertedValue;
var converted = Converter.ToManaged(nparr, typeof(float[]), out convertedValue, false);

Assert.IsTrue(converted);
Assert.AreEqual(testValue, convertedValue);
}

[Test]
public void TestConvertNumpyFloat64_2DArrayToManaged()
{
var testValue = new double[,] {{ 0, 1, 2, 3,}, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }};
var shape = new PyTuple(new[] {new PyInt(3), new PyInt(4)});
var nparr = np.arange(12, dtype: np.float64).reshape(shape);

object convertedValue;
var converted = Converter.ToManaged(nparr, typeof(double[,]), out convertedValue, false);

Assert.IsTrue(converted);
Assert.AreEqual(testValue, convertedValue);
}

[Test]
public void TestConvertBytearrayToManaged()
{
var testValue = Encoding.ASCII.GetBytes("test");
using var str = PythonEngine.Eval("'test'.encode('ascii')");

object convertedValue;
var converted = Converter.ToManaged(str, typeof(byte[]), out convertedValue, false);

Assert.IsTrue(converted);
Assert.AreEqual(testValue, convertedValue);
}

[Test]
[TestCaseSource(typeof(Arrays))]
public void TestConvertArrayToManaged(string arrayType, Type t, object expected)
{
object convertedValue;
var arr = array.array(arrayType.ToPython(), expected.ToPython());
var converted = Converter.ToManaged(arr, t, out convertedValue, false);

Assert.IsTrue(converted);
Assert.AreEqual(expected, convertedValue);
}

public class Arrays : System.Collections.IEnumerable
{
public System.Collections.IEnumerator GetEnumerator()
{
yield return new object[] { "b", typeof(byte[]), new byte[] { 0, 1, 2, 3, 4 } };
yield return new object[] { "B", typeof(byte[]), new byte[] { 0, 1, 2, 3, 4 } };
yield return new object[] { "u", typeof(char[]), new char[] { 'a', 'b', 'c', 'd', 'e' } };
yield return new object[] { "h", typeof(short[]), new short[] { -2, -1, 0, 1, 2, 3, 4 } };
yield return new object[] { "H", typeof(ushort[]), new ushort[] { 0, 1, 2, 3, 4 } };
yield return new object[] { "i", typeof(int[]), new int[] { -2, -1, 0, 1, 2, 3, 4 } };
yield return new object[] { "I", typeof(uint[]), new uint[] { 0, 1, 2, 3, 4 } };
yield return new object[] { "q", typeof(long[]), new long[] { -2, -1, 0, 1, 2, 3, 4 } };
yield return new object[] { "q", typeof(ulong[]), new ulong[] { 0, 1, 2, 3, 4 } };
yield return new object[] { "f", typeof(float[]), new float[] { -2, -1, 0, 1, 2, 3, 4 } };
yield return new object[] { "d", typeof(double[]), new double[] { -2, -1, 0, 1, 2, 3, 4 } };
}
};

dynamic np
{
get
{
try
{
return Py.Import("numpy");
}
catch (PythonException)
{
Assert.Inconclusive("Numpy or dependency not installed");
return null;
}
}
}

dynamic array
{
get
{
try
{
return Py.Import("array");
}
catch (PythonException)
{
Assert.Inconclusive("Could not import array");
return null;
}
}
}
}

public interface IGetList
Expand Down
38 changes: 38 additions & 0 deletions src/runtime/Converter.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Security;
Expand Down Expand Up @@ -880,12 +881,49 @@ private static void SetConversionError(BorrowedReference value, Type target)
/// Convert a Python value to a correctly typed managed array instance.
/// The Python value must support the Python iterator protocol or and the
/// items in the sequence must be convertible to the target array type.
/// If the Python value supports the buffer protocol the underlying buffer
/// will be copied directly.
/// </summary>
private static bool ToArray(BorrowedReference value, Type obType, out object? result, bool setError)
{
Type elementType = obType.GetElementType();
result = null;

// structs can also be blittable but for the standard usecases this suffices
var isBlittable = elementType.IsPrimitive && elementType != typeof(char) && elementType != typeof(bool);
if (isBlittable && Runtime.PyObject_GetBuffer(value, out var view, (int)PyBUF.CONTIG_RO) == 0)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both directions need a check that source and target element types match.

Copy link
Author

@chickenservice chickenservice May 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Would it be ok to match the typecode of the .NET type to the corresponding format string of the buffer, i.e. roughly like so:

var compatible = false;
var format = buf.format;
switch (tc)
{
    case TypeCode.Boolean:
        compatible = format == "?";
        break;
    case TypeCode.Byte:
        compatible = format == "b";
        break;
        //etc...
    default:
        break;

This would require #2371 to be fixed so I can actually retrieve the format field.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure it is safe to assume getting buffer has the same semantics as using iterator for an arbitrary type that supports GetBuffer.

Imho, this could be made the default behavior for converting a fixed set of standard types, but for the others it might be better to expose some kind of explicit call instead of conversion. E.g. pyObjectInst.ReadBuffer(destinationArray)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this caught by the primitive blitable check?

Copy link
Author

@chickenservice chickenservice May 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could move the whole thing to after the if (IterObject.IsNull()) check to make sure it's an iterable? Other than that as @filmor said I already allow only primitive blittable types as array element types.

Copy link
Member

@lostmsu lostmsu May 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This issue is not about element types, but about container types. Does Python guarantee, that GetBuffer()[0], GetBuffer()[1], etc is the same sequence as .iter[0], .iter[1], etc for every type that implements GetBuffer?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought that was the point of the buffer protocol :). I'll read up more on it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chickenservice the concern is about breaking changes, especially within Python.NET 3.x, more especially within Python.NET 3.0.x.

If there's a difference in result, we should delay the change of behavior to 4.x, and in 3.x only add new functions.

IMHO, we can only immediately apply the speedups for the types that we know have the same behavior.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lostmsu that makes sense. The tests cover bytearray, array.array and ndarray. From my experience these behave according to what you described and are the canonical implementations anyway. Any concerns? Are there other types you have in mind? Should we limit ndim == 1 for now?

Copy link
Member

@lostmsu lostmsu May 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the tests do not cover multidim or the behavior in multidim is different, we should limit ndim to 1.

But again, I strongly suggest to add an explicit new method to PyObject that does the right thing with buffers, and only works with buffer protocol.

Copy link
Author

@chickenservice chickenservice May 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I misunderstood your suggestion, to clarify:

For upcoming minor/patch release expose this as a dedicated method and remove it from ToArray completely as breaking changes cannot be ruled out completely.

For the next major release reevaluate whether this can/should be done implicitly in ToArray and accept breaking changes.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chickenservice yep, that's exactly right.

{
GCHandle ptr;
try
{
var shape = new IntPtr[view.ndim];
Marshal.Copy(view.shape, shape, 0, view.ndim);
Array arr = Array.CreateInstance(elementType, shape.Select(x => (long)x).ToArray());
ptr = GCHandle.Alloc(arr, GCHandleType.Pinned);
var addr = ptr.AddrOfPinnedObject();
unsafe
{
Buffer.MemoryCopy((void*)view.buf, (void*)addr, arr.Length * Marshal.SizeOf(elementType), view.len);
}

result = arr;
return true;
}
finally
{
if (ptr.IsAllocated)
{
ptr.Free();
}

Runtime.PyBuffer_Release(ref view);
}
}
else
{
Exceptions.Clear();
}

using var IterObject = Runtime.PyObject_GetIter(value);
if (IterObject.IsNull())
{
Expand Down
Loading