AMPManager

dotnet · haytham2597 · Feb 11, 2024 · Feb 17, 2024 · Feb 18, 2024 · Feb 18, 2024
commit 3c42a87bf4770d04fda2f67fc7ce1bca826b5598
diff --git a/src/TorchSharp/Amp/AMPManager.cs b/src/TorchSharp/Amp/AMPManager.cs
@@ -0,0 +1,89 @@
+using System;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+using Google.Protobuf.WellKnownTypes;
+using TorchSharp.PInvoke;
+using TorchSharp.Utils;
+
+namespace TorchSharp.Amp
+{
+    public class AMPManager : IDisposable
+    {
+        //TODO: Make Singleton THREADSAFE
+        public UnorderedMap<IntPtr, torch.ScalarType> TensorPtrs;
+        private readonly AutocastMode autocastMode = AutocastMode.GetInstance();
+
+        private AMPManager() { }
+
+        public bool IsEnabled => autocastMode.Enabled;
+        private static AMPManager Instance;
+        //bool disposedValue;
+
+        public static AMPManager GetInstance()
+        {
+            return Instance ??= new AMPManager();
+        }
+
+        private void To(IntPtr ptr, torch.ScalarType type)
+        {
+            var res = NativeMethods.THSTensor_to_type(ptr, (sbyte)type);
+            if (res == IntPtr.Zero)
+                torch.CheckForErrors();
+        }
+        private void Revert()
+        {
+            using (var enumer = TensorPtrs.GetEnumerator())
+                while (enumer.MoveNext())
+                    To(enumer.Current.Key, enumer.Current.Value);
+            TensorPtrs.Clear(); //Or should use Stack for POP?? May better performance and better ram usage
+        }
+
+        public void Add(IntPtr ptr)
+        {
+            if (!autocastMode.Enabled) {
+
+                if (TensorPtrs.ContainsKey(ptr))
+                    To(ptr, TensorPtrs[ptr]);
+                return;
+            }
+
+            TensorPtrs[ptr] = (torch.ScalarType)NativeMethods.THSTensor_type(ptr);
+            To(ptr, autocastMode.GetFastType()); //TODO: Set scalar autocast
+        }
+
+        public IDisposable Enter()
+        {
+            return null;
+        }
+        protected virtual void Dispose(bool disposing)
+        {
+            Revert();
+            autocastMode.Dispose();
+            /*if (!disposedValue) {
+                if (disposing) {
+
+
+                    // TODO: dispose managed state (managed objects)
+                }
+
+                // TODO: free unmanaged resources (unmanaged objects) and override finalizer
+                // TODO: set large fields to null
+                disposedValue = true;
+            }*/
+        }
+
+        // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
+        ~AMPManager()
+        {
+            Dispose(false);
+        }
+
+        public void Dispose()
+        {
+            // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
+            Dispose(disposing: true);
+            GC.SuppressFinalize(this);
+        }
+    }
+}
diff --git a/src/TorchSharp/Amp/AutocastDisposeManager.cs b/src/TorchSharp/Amp/AutocastDisposeManager.cs
diff --git a/src/TorchSharp/Amp/AutocastDisposeScope.cs b/src/TorchSharp/Amp/AutocastDisposeScope.cs
diff --git a/src/TorchSharp/Amp/AutocastManager.cs b/src/TorchSharp/Amp/AutocastManager.cs
diff --git a/src/TorchSharp/Amp/AutocastMode.cs b/src/TorchSharp/Amp/AutocastMode.cs
@@ -1,6 +1,7 @@
 using System;
 using System.Collections.Generic;
 using System.Linq;
+using System.Security.Cryptography;
 using System.Text;
 using System.Threading.Tasks;
 
@@ -17,22 +18,33 @@ public static torch.Tensor AutoCast(this torch.Tensor input)
     public sealed class AutocastMode : IDisposable
     {
         //NEED "Register" all tensor in scope for uncasting outer-scope
-        private bool Enabled, Prev;
+        internal bool Enabled, Prev;
         //private torch.ScalarType Dtype = torch.ScalarType.Float32;
-        private torch.ScalarType fast_dtype = torch.ScalarType.Float32;
-        private torch.Device Device = new torch.Device(DeviceType.CUDA);
+        internal torch.ScalarType fast_dtype = torch.ScalarType.Float32;
+        public torch.Device Device = new torch.Device(DeviceType.CUDA);
         private static AutocastMode instance;
+        bool disposedValue;
+
         /*public static AutocastMode GetInstance(torch.Device dev, torch.ScalarType? dtype = null, bool enabled = true, bool? cache_enabled = null)
-        {
-            if(instance ==null)
-                instance = new AutocastMode(dev, dtype, enabled, cache_enabled);
-            return instance;
-        }*/
+{
+if(instance ==null)
+instance = new AutocastMode(dev, dtype, enabled, cache_enabled);
+return instance;
+}*/
         public static AutocastMode GetInstance()
         {
             return instance ??= new AutocastMode(torch.CUDA, cache_enabled:true);
         }
 
+        public torch.ScalarType GetFastType()
+        {
+            var ft = torch.ScalarType.Float32;
+            if (Device.type == DeviceType.CUDA)
+                ft = torch.get_autocast_gpu_dtype();
+            if (Device.type == DeviceType.CPU)
+                ft = torch.get_autocast_cpu_dtype();
+            return ft;
+        }
         private AutocastMode(torch.Device dev, torch.ScalarType? dtype = null, bool enabled=true, bool? cache_enabled = null)
         {
             //var la = torch.tensor(9);
@@ -78,32 +90,57 @@ internal torch.Tensor CastTensor(torch.Tensor tensor)
                 return tensor;
             return tensor.to(fast_dtype, tensor.device);
         }
-        /*public IDisposable Enter()
-        {
 
-            return this;
-        }*/
-        public void Dispose()
+        private void Dispose(bool disposing)
         {
-            this.Enabled = false;
-            if (Device.type == DeviceType.CUDA) {
-                if(torch.autocast_decrement_nesting() == 0)
-                    torch.clear_autocast_cache();
-                torch.set_autocast_gpu_dtype(this.fast_dtype);
-                //torch.set_autocast_enabled(this.Prev);
-                torch.set_autocast_enabled(false);
-                torch.set_autocast_cache_enabled(false);
-            }
+            if (!disposedValue) {
+                if (disposing) {
 
-            if (Device.type == DeviceType.CPU) {
-                if (torch.autocast_decrement_nesting() == 0)
-                    torch.clear_autocast_cache();
-                //torch.set_autocast_enabled(this.Prev);
-                torch.set_autocast_cpu_dtype(this.fast_dtype);
-                torch.set_autocast_enabled(false);
-                torch.set_autocast_cache_enabled(false);
+                    this.Enabled = false;
+                    if (Device.type == DeviceType.CUDA) {
+                        if (torch.autocast_decrement_nesting() == 0)
+                            torch.clear_autocast_cache();
+                        torch.set_autocast_gpu_dtype(this.fast_dtype);
+                        //torch.set_autocast_enabled(this.Prev);
+                        torch.set_autocast_enabled(false);
+                        torch.set_autocast_cache_enabled(false);
+                    }
+
+                    if (Device.type == DeviceType.CPU) {
+                        if (torch.autocast_decrement_nesting() == 0)
+                            torch.clear_autocast_cache();
+                        //torch.set_autocast_enabled(this.Prev);
+                        torch.set_autocast_cpu_dtype(this.fast_dtype);
+                        torch.set_autocast_enabled(false);
+                        torch.set_autocast_cache_enabled(false);
+                    }
+                    //throw new NotImplementedException();
+                    // TODO: dispose managed state (managed objects)
+                }
+
+                // TODO: free unmanaged resources (unmanaged objects) and override finalizer
+                // TODO: set large fields to null
+                disposedValue = true;
             }
-            //throw new NotImplementedException();
         }
+
+        // // TODO: override finalizer only if 'Dispose(bool disposing)' has code to free unmanaged resources
+        // ~AutocastMode()
+        // {
+        //     // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
+        //     Dispose(disposing: false);
+        // }
+
+        public void Dispose()
+        {
+            // Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
+            Dispose(disposing: true);
+            GC.SuppressFinalize(this);
+        }
+        /*public IDisposable Enter()
+{
+
+   return this;
+}*/
     }
 }
diff --git a/src/TorchSharp/Amp/GradScaler.cs b/src/TorchSharp/Amp/GradScaler.cs
@@ -13,7 +13,6 @@ public class GradScaler
         private bool Enabled;
         private torch.Tensor _scale, _growth_tracker;
         private float InitScale, GrowthFactor, BackoffFactor, GrowthInterval, InitGrowthTracker;
-
         private Dictionary<int, Dictionary<string, object>> _per_optimizer_states = new Dictionary<int, Dictionary<string, object>>();
         //https://github.com/pytorch/pytorch/blob/main/torch/amp/grad_scaler.py
         public GradScaler(torch.Device dev, float init_scale = 2.0e16f, float growth_factor = 2.0f,
@@ -54,9 +53,9 @@ public torch.Tensor scale(torch.Tensor output)
         }
         private class MultiDeviceReplicator
         {
-            private torch.Tensor master;
+            private readonly torch.Tensor master;
 
-            internal Dictionary<torch.Device, torch.Tensor> per_device_tensors = new Dictionary<torch.Device, torch.Tensor>();
+            internal readonly Dictionary<torch.Device, torch.Tensor> per_device_tensors = new Dictionary<torch.Device, torch.Tensor>();
             public MultiDeviceReplicator(torch.Tensor master_tensor)
             {
                 master = master_tensor;
@@ -155,8 +154,6 @@ public void unscale(torch.optim.Optimizer optimizer)
                 return;
 
             check_scale_growth_tracker(nameof(unscale));
-
-
         }
     }
 }
diff --git a/src/TorchSharp/NN/Convolution/Conv1D.cs b/src/TorchSharp/NN/Convolution/Conv1D.cs
@@ -27,6 +27,10 @@ namespace Modules
     {
         public abstract class Convolution : torch.nn.Module<Tensor, Tensor>
         {
+            internal long _dimension, _in_channel, _out_channel, _kernel,_stride, _padding,_dilation,_groups;
+            internal PaddingModes _paddingModes;
+            internal (long, long)? _kernels, _strides, _paddings, _dilations;
+            internal bool _bias;
             protected Convolution(IntPtr handle, IntPtr boxedHandle, long input_channels) : base(handle, boxedHandle)
             {
                 this.input_channels = input_channels;
@@ -113,7 +117,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize
             {
                 var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding, dilation, (long)padding_mode, groups, bias, out var boxedHandle);
                 if (res == IntPtr.Zero) { torch.CheckForErrors(); }
-                return new Conv1d(res, boxedHandle, in_channels).MoveModule<Conv1d>(device, dtype);
+                return new Conv1d(res, boxedHandle, in_channels) {
+                    _in_channel = in_channels,
+                    _out_channel = out_channels,
+                    _kernel = kernelSize,
+                    _stride = stride,
+                    _padding = padding,
+                    _dilation = dilation,
+                    _paddingModes = padding_mode,
+                    _groups = groups,
+                    _bias = bias
+                }.MoveModule<Conv1d>(device, dtype);
             }
 
             /// <summary>
@@ -135,7 +149,17 @@ public static Conv1d Conv1d(long in_channels, long out_channels, long kernelSize
             {
                 var res = THSNN_Conv1d_ctor(in_channels, out_channels, kernelSize, stride, padding == Padding.Valid ? 0 : -1, dilation, (long)padding_mode, groups, bias, out var boxedHandle);
                 if (res == IntPtr.Zero) { torch.CheckForErrors(); }
-                return new Conv1d(res, boxedHandle, in_channels).MoveModule<Conv1d>(device, dtype);
+                return new Conv1d(res, boxedHandle, in_channels) {
+                    _in_channel = in_channels,
+                    _out_channel = out_channels,
+                    _kernel = kernelSize,
+                    _stride = stride,
+                    _padding = (long)padding,
+                    _dilation = dilation,
+                    _paddingModes = padding_mode,
+                    _groups = groups,
+                    _bias = bias
+                }.MoveModule<Conv1d>(device, dtype);
             }
 
             public static partial class functional