Skip to content

Commit 8011216

Browse files
authored
Merge pull request shimat#621 from shimat/textdetector
TextDetector
2 parents 618d2ec + 8f6cf46 commit 8011216

33 files changed

+2227
-298
lines changed

appveyor.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@ test_script:
171171
- cmd: cd %APPVEYOR_BUILD_FOLDER%
172172
- cmd: cd test
173173
- cmd: cd OpenCvSharp.Tests
174-
- cmd: dotnet test -c Release -f net461 #--no-build
175-
- cmd: dotnet test -c Release -f netcoreapp2.0 #--no-build
174+
- cmd: dotnet test -c Release -f net472 #--no-build
175+
- cmd: dotnet test -c Release -f netcoreapp2.1 #--no-build
176176
- cmd: cd %APPVEYOR_BUILD_FOLDER%
177177

178178
artifacts:
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
using System;
2+
3+
namespace OpenCvSharp
4+
{
5+
/// <summary>
6+
/// An abstract class providing interface for text detection algorithms
7+
/// </summary>
8+
public abstract class TextDetector : DisposableCvObject
9+
{
10+
/// <summary>
11+
/// Method that provides a quick and simple interface to detect text inside an image
12+
/// </summary>
13+
/// <param name="inputImage">an image to process</param>
14+
/// <param name="bbox"> a vector of Rect that will store the detected word bounding box</param>
15+
/// <param name="confidence">a vector of float that will be updated with the confidence the classifier has for the selected bounding box</param>
16+
public virtual void Detect(InputArray inputImage, out Rect[] bbox, out float[] confidence)
17+
{
18+
if (inputImage == null)
19+
throw new ArgumentNullException(nameof(inputImage));
20+
inputImage.ThrowIfDisposed();
21+
22+
using (var bboxVec = new VectorOfRect())
23+
using (var confidenceVec = new VectorOfFloat())
24+
{
25+
NativeMethods.text_TextDetector_detect(ptr, inputImage.CvPtr, bboxVec.CvPtr, confidenceVec.CvPtr);
26+
bbox = bboxVec.ToArray();
27+
confidence = confidenceVec.ToArray();
28+
}
29+
30+
GC.KeepAlive(this);
31+
GC.KeepAlive(inputImage);
32+
}
33+
}
34+
}
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
using System;
2+
using System.Collections.Generic;
3+
using OpenCvSharp.Util;
4+
5+
namespace OpenCvSharp
6+
{
7+
// ReSharper disable InconsistentNaming
8+
9+
/// <summary>
10+
/// TextDetectorCNN class provides the functionality of text bounding box detection.
11+
/// </summary>
12+
/// <remarks>
13+
/// This class is representing to find bounding boxes of text words given an input image.
14+
/// This class uses OpenCV dnn module to load pre-trained model described in @cite LiaoSBWL17.
15+
/// The original repository with the modified SSD Caffe version: https://github.com/MhLiao/TextBoxes.
16+
/// Model can be downloaded from[DropBox](https://www.dropbox.com/s/g8pjzv2de9gty8g/TextBoxes_icdar13.caffemodel?dl=0).
17+
/// Modified.prototxt file with the model description can be found in `opencv_contrib/modules/text/samples/textbox.prototxt`.
18+
/// </remarks>
19+
public class TextDetectorCNN : TextDetector
20+
{
21+
/// <summary>
22+
/// cv::Ptr&lt;T&gt;
23+
/// </summary>
24+
private Ptr objectPtr;
25+
26+
/// <summary>
27+
/// Creates an instance of the TextDetectorCNN class using the provided parameters.
28+
/// </summary>
29+
/// <param name="modelArchFilename">the relative or absolute path to the prototxt file describing the classifiers architecture.</param>
30+
/// <param name="modelWeightsFilename">the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.</param>
31+
/// <param name="detectionSizes">a list of sizes for multiscale detection. The values`[(300,300),(700,500),(700,300),(700,700),(1600,1600)]` are recommended in @cite LiaoSBWL17 to achieve the best quality.</param>
32+
/// <returns></returns>
33+
public static TextDetectorCNN Create(
34+
string modelArchFilename, string modelWeightsFilename, IEnumerable<Size> detectionSizes)
35+
{
36+
if (string.IsNullOrEmpty(modelArchFilename))
37+
throw new ArgumentException("empty string", nameof(detectionSizes));
38+
if (string.IsNullOrEmpty(modelWeightsFilename))
39+
throw new ArgumentException("empty string", nameof(modelWeightsFilename));
40+
if (detectionSizes == null)
41+
throw new ArgumentNullException(nameof(detectionSizes));
42+
43+
var detectionSizesArray = EnumerableEx.ToArray(detectionSizes);
44+
var ptr = NativeMethods.text_TextDetectorCNN_create1(
45+
modelArchFilename, modelWeightsFilename, detectionSizesArray, detectionSizesArray.Length);
46+
GC.KeepAlive(detectionSizes);
47+
return new TextDetectorCNN(ptr);
48+
}
49+
50+
/// <summary>
51+
/// Creates an instance of the TextDetectorCNN class using the provided parameters.
52+
/// </summary>
53+
/// <param name="modelArchFilename">the relative or absolute path to the prototxt file describing the classifiers architecture.</param>
54+
/// <param name="modelWeightsFilename">the relative or absolute path to the file containing the pretrained weights of the model in caffe-binary form.</param>
55+
/// <returns></returns>
56+
public static TextDetectorCNN Create(
57+
string modelArchFilename, string modelWeightsFilename)
58+
{
59+
var ptr = NativeMethods.text_TextDetectorCNN_create2(modelArchFilename, modelWeightsFilename);
60+
return new TextDetectorCNN(ptr);
61+
}
62+
63+
internal TextDetectorCNN(IntPtr ptr)
64+
{
65+
this.objectPtr = new Ptr(ptr);
66+
this.ptr = objectPtr.Get();
67+
}
68+
69+
/// <summary>
70+
/// Releases managed resources
71+
/// </summary>
72+
protected override void DisposeManaged()
73+
{
74+
objectPtr?.Dispose();
75+
objectPtr = null;
76+
base.DisposeManaged();
77+
}
78+
79+
/// <summary>
80+
/// Method that provides a quick and simple interface to detect text inside an image
81+
/// </summary>
82+
/// <param name="inputImage">an image to process</param>
83+
/// <param name="bbox"> a vector of Rect that will store the detected word bounding box</param>
84+
/// <param name="confidence">a vector of float that will be updated with the confidence the classifier has for the selected bounding box</param>
85+
public override void Detect(InputArray inputImage, out Rect[] bbox, out float[] confidence)
86+
{
87+
if (inputImage == null)
88+
throw new ArgumentNullException(nameof(inputImage));
89+
inputImage.ThrowIfDisposed();
90+
91+
using (var bboxVec = new VectorOfRect())
92+
using (var confidenceVec = new VectorOfFloat())
93+
{
94+
NativeMethods.text_TextDetectorCNN_detect(ptr, inputImage.CvPtr, bboxVec.CvPtr, confidenceVec.CvPtr);
95+
bbox = bboxVec.ToArray();
96+
confidence = confidenceVec.ToArray();
97+
}
98+
99+
GC.KeepAlive(this);
100+
GC.KeepAlive(inputImage);
101+
}
102+
103+
internal class Ptr : OpenCvSharp.Ptr
104+
{
105+
public Ptr(IntPtr ptr) : base(ptr)
106+
{
107+
}
108+
109+
public override IntPtr Get()
110+
{
111+
var res = NativeMethods.text_Ptr_TextDetectorCNN_get(ptr);
112+
GC.KeepAlive(this);
113+
return res;
114+
}
115+
116+
protected override void DisposeUnmanaged()
117+
{
118+
NativeMethods.text_Ptr_TextDetectorCNN_delete(ptr);
119+
base.DisposeUnmanaged();
120+
}
121+
}
122+
}
123+
}
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
using System;
2+
using System.Runtime.InteropServices;
3+
4+
#pragma warning disable 1591
5+
6+
namespace OpenCvSharp
7+
{
8+
static partial class NativeMethods
9+
{
10+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
11+
public static extern void text_TextDetector_detect(IntPtr obj, IntPtr inputImage, IntPtr Bbox, IntPtr confidence);
12+
13+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
14+
public static extern void text_TextDetectorCNN_detect(IntPtr obj, IntPtr inputImage, IntPtr Bbox, IntPtr confidence);
15+
16+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
17+
public static extern IntPtr text_TextDetectorCNN_create1(
18+
[MarshalAs(UnmanagedType.LPStr)] string modelArchFilename,
19+
[MarshalAs(UnmanagedType.LPStr)] string modelWeightsFilename,
20+
[MarshalAs(UnmanagedType.LPArray)] Size[] detectionSizes, int detectionSizesLength);
21+
22+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
23+
public static extern IntPtr text_TextDetectorCNN_create2(
24+
[MarshalAs(UnmanagedType.LPStr)] string modelArchFilename,
25+
[MarshalAs(UnmanagedType.LPStr)] string modelWeightsFilename);
26+
27+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
28+
public static extern void text_Ptr_TextDetectorCNN_delete(IntPtr obj);
29+
30+
[DllImport(DllExtern, CallingConvention = CallingConvention.Cdecl, ExactSpelling = true)]
31+
public static extern IntPtr text_Ptr_TextDetectorCNN_get(IntPtr obj);
32+
}
33+
}

src/OpenCvSharpExtern/OpenCvSharpExtern.vcxproj

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@
326326
<ClInclude Include="shape_ShapeDistanceExtractor.h" />
327327
<ClInclude Include="std_string.h" />
328328
<ClInclude Include="text.h" />
329+
<ClInclude Include="text_TextDetector.h" />
329330
<ClInclude Include="tracking.h" />
330331
<ClInclude Include="tracking_MultiTracker.h" />
331332
<ClInclude Include="videoio.h" />

src/OpenCvSharpExtern/OpenCvSharpExtern.vcxproj.filters

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -339,9 +339,6 @@
339339
<ClInclude Include="std_string.h">
340340
<Filter>Header Files</Filter>
341341
</ClInclude>
342-
<ClInclude Include="text.h">
343-
<Filter>Header Files</Filter>
344-
</ClInclude>
345342
<ClInclude Include="img_hash.h">
346343
<Filter>Header Files</Filter>
347344
</ClInclude>
@@ -381,6 +378,12 @@
381378
<ClInclude Include="objdetect_QRCodeDetector.h">
382379
<Filter>Header Files\objdetect</Filter>
383380
</ClInclude>
381+
<ClInclude Include="text_TextDetector.h">
382+
<Filter>Header Files\text</Filter>
383+
</ClInclude>
384+
<ClInclude Include="text.h">
385+
<Filter>Header Files\text</Filter>
386+
</ClInclude>
384387
</ItemGroup>
385388
<ItemGroup>
386389
<Filter Include="Source Files">
@@ -445,6 +448,9 @@
445448
<Filter Include="Header Files\tracking">
446449
<UniqueIdentifier>{b2a89203-38db-4ab1-a520-3252d788596d}</UniqueIdentifier>
447450
</Filter>
451+
<Filter Include="Header Files\text">
452+
<UniqueIdentifier>{94192d8f-b2fa-46be-ba1c-0fcd09bf1393}</UniqueIdentifier>
453+
</Filter>
448454
</ItemGroup>
449455
<ItemGroup>
450456
<None Include="packages.config" />

0 commit comments

Comments
 (0)