Merge pull request opencv#10215 from dkurt:dnn_js

vpisarev · vpisarev · commit 7e680bd9ff27 · 2017-12-11T12:47:52.000Z
diff --git a/3rdparty/protobuf/src/google/protobuf/stubs/port.h b/3rdparty/protobuf/src/google/protobuf/stubs/port.h
@@ -224,8 +224,7 @@ static const uint64 kuint64max = GOOGLE_ULONGLONG(0xFFFFFFFFFFFFFFFF);
 
 #if defined(__clang__) && defined(__has_cpp_attribute) \
     && !defined(GOOGLE_PROTOBUF_OS_APPLE)
-# if defined(GOOGLE_PROTOBUF_OS_NACL) || defined(EMSCRIPTEN) || \
-     __has_cpp_attribute(clang::fallthrough)
+# if defined(GOOGLE_PROTOBUF_OS_NACL) || __has_cpp_attribute(clang::fallthrough)
 #  define GOOGLE_FALLTHROUGH_INTENDED [[clang::fallthrough]]
 # endif
 #endif
diff --git a/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown b/doc/tutorials/dnn/dnn_javascript/dnn_javascript.markdown
@@ -0,0 +1,44 @@
+# How to run deep networks in browser {#tutorial_dnn_javascript}
+
+## Introduction
+This tutorial will show us how to run deep learning models using OpenCV.js right
+in a browser. Tutorial refers a sample of face detection and face recognition
+models pipeline.
+
+## Face detection
+Face detection network gets BGR image as input and produces set of bounding boxes
+that might contain faces. All that we need is just select the boxes with a strong
+confidence.
+
+## Face recognition
+Network is called OpenFace (project https://github.com/cmusatyalab/openface).
+Face recognition model receives RGB face image of size `96x96`. Then it returns
+`128`-dimensional unit vector that represents input face as a point on the unit
+multidimensional sphere. So difference between two faces is an angle between two
+output vectors.
+
+## Sample
+All the sample is an HTML page that has JavaScript code to use OpenCV.js functionality.
+You may see an insertion of this page below. Press `Start` button to begin a demo.
+Press `Add a person` to name a person that is recognized as an unknown one.
+Next we'll discuss main parts of the code.
+
+@htmlinclude js_face_recognition.html
+
+-# Run face detection network to detect faces on input image.
+@snippet dnn/js_face_recognition.html Run face detection model
+You may play with input blob sizes to balance detection quality and efficiency.
+The bigger input blob the smaller faces may be detected.
+
+-# Run face recognition network to receive `128`-dimensional unit feature vector by input face image.
+@snippet dnn/js_face_recognition.html Get 128 floating points feature vector
+
+-# Perform a recognition.
+@snippet dnn/js_face_recognition.html Recognize
+Match a new feature vector with registered ones. Return a name of the best matched person.
+
+-# The main loop.
+@snippet dnn/js_face_recognition.html Define frames processing
+A main loop of our application receives a frames from a camera and makes a recognition
+of an every detected face on the frame. We start this function ones when OpenCV.js was
+initialized and deep learning models were downloaded.
diff --git a/doc/tutorials/dnn/table_of_content_dnn.markdown b/doc/tutorials/dnn/table_of_content_dnn.markdown
@@ -25,10 +25,26 @@ Deep Neural Networks (dnn module) {#tutorial_table_of_content_dnn}
 
     In this tutorial we describe the ways to schedule your networks using Halide backend in OpenCV deep learning module.
 
+-   @subpage tutorial_dnn_android
+
+    *Compatibility:* \> OpenCV 3.3
+
+    *Author:* Dmitry Kurtaev
+
+    This tutorial will show you how to run deep learning model using OpenCV on Android device.
+
 -   @subpage tutorial_dnn_yolo
 
     *Compatibility:* \> OpenCV 3.3.1
 
     *Author:* Alessandro de Oliveira Faria
 
     In this tutorial you will learn how to use opencv_dnn module using yolo_object_detection with device capture, video file or image.
+
+-   @subpage tutorial_dnn_javascript
+
+    *Compatibility:* \> OpenCV 3.3.1
+
+    *Author:* Dmitry Kurtaev
+
+    In this tutorial we'll run deep learning models in browser using OpenCV.js.
diff --git a/modules/dnn/CMakeLists.txt b/modules/dnn/CMakeLists.txt
@@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d
 
 ocv_add_dispatched_file("layers/layers_common" AVX AVX2)
 
-ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java)
+ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo
                                      -Wmissing-declarations -Wmissing-prototypes
 )
diff --git a/modules/dnn/include/opencv2/dnn/all_layers.hpp b/modules/dnn/include/opencv2/dnn/all_layers.hpp
@@ -221,11 +221,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     class CV_EXPORTS LRNLayer : public Layer
     {
     public:
-        enum Type
-        {
-            CHANNEL_NRM,
-            SPATIAL_NRM
-        };
         int type;
 
         int size;
@@ -238,14 +233,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     class CV_EXPORTS PoolingLayer : public Layer
     {
     public:
-        enum Type
-        {
-            MAX,
-            AVE,
-            STOCHASTIC,
-            ROI
-        };
-
         int type;
         Size kernel, stride, pad;
         bool globalPooling;
@@ -474,13 +461,6 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
     class CV_EXPORTS EltwiseLayer : public Layer
     {
     public:
-        enum EltwiseOp
-        {
-            PROD = 0,
-            SUM = 1,
-            MAX = 2,
-        };
-
         static Ptr<EltwiseLayer> create(const LayerParams &params);
     };
 
diff --git a/modules/dnn/include/opencv2/dnn/dnn.hpp b/modules/dnn/include/opencv2/dnn/dnn.hpp
@@ -423,8 +423,8 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          *  @param outputBlobs contains all output blobs for each layer specified in @p outBlobNames.
          *  @param outBlobNames names for layers which outputs are needed to get
          */
-        CV_WRAP void forward(std::vector<std::vector<Mat> >& outputBlobs,
-                             const std::vector<String>& outBlobNames);
+        void forward(std::vector<std::vector<Mat> >& outputBlobs,
+                     const std::vector<String>& outBlobNames);
 
         //TODO:
         /** @brief Optimized forward.
@@ -467,7 +467,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
          *  @note If updating blob is not empty then @p blob must have the same shape,
          *  because network reshaping is not implemented yet.
          */
-        CV_WRAP void setInput(const Mat &blob, const String& name = "");
+        CV_WRAP void setInput(InputArray blob, const String& name = "");
 
         /** @brief Sets the new value for the learned param of the layer.
          *  @param layer name or id of the layer.
@@ -733,7 +733,7 @@ CV__DNN_EXPERIMENTAL_NS_BEGIN
      *  If @p crop is false, direct resize without cropping and preserving aspect ratio is performed.
      *  @returns 4-dimansional Mat with NCHW dimensions order.
      */
-    CV_EXPORTS_W Mat blobFromImage(const Mat& image, double scalefactor=1.0, const Size& size = Size(),
+    CV_EXPORTS_W Mat blobFromImage(InputArray image, double scalefactor=1.0, const Size& size = Size(),
                                    const Scalar& mean = Scalar(), bool swapRB=true, bool crop=true);
     /** @brief Creates 4-dimensional blob from series of images. Optionally resizes and
      *  crops @p images from center, subtract @p mean values, scales values by @p scalefactor,
diff --git a/modules/dnn/src/dnn.cpp b/modules/dnn/src/dnn.cpp
@@ -84,11 +84,11 @@ static String toString(const T &v)
     return ss.str();
 }
 
-Mat blobFromImage(const Mat& image, double scalefactor, const Size& size,
+Mat blobFromImage(InputArray image, double scalefactor, const Size& size,
                   const Scalar& mean, bool swapRB, bool crop)
 {
     CV_TRACE_FUNCTION();
-    std::vector<Mat> images(1, image);
+    std::vector<Mat> images(1, image.getMat());
     return blobFromImages(images, scalefactor, size, mean, swapRB, crop);
 }
 
@@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)
     impl->netInputLayer->setNames(inputBlobNames);
 }
 
-void Net::setInput(const Mat &blob_, const String& name)
+void Net::setInput(InputArray blob, const String& name)
 {
     CV_TRACE_FUNCTION();
     CV_TRACE_ARG_VALUE(name, "name", name.c_str());
@@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name)
         ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );
     ld.outputBlobsWrappers.resize(ld.outputBlobs.size());
     MatShape prevShape = shape(ld.outputBlobs[pin.oid]);
+    Mat blob_ = blob.getMat();
     bool oldShape = prevShape == shape(blob_);
     if (oldShape)
     {
diff --git a/modules/dnn/src/layers/eltwise_layer.cpp b/modules/dnn/src/layers/eltwise_layer.cpp
@@ -52,22 +52,27 @@ namespace dnn
 class EltwiseLayerImpl : public EltwiseLayer
 {
 public:
-    EltwiseOp op;
+    enum EltwiseOp
+    {
+        PROD = 0,
+        SUM = 1,
+        MAX = 2,
+    } op;
     std::vector<float> coeffs;
 
     EltwiseLayerImpl(const LayerParams& params)
     {
         setParamsFrom(params);
-        op = EltwiseLayer::SUM;
+        op = SUM;
         if (params.has("operation"))
         {
             String operation = params.get<String>("operation").toLowerCase();
             if (operation == "prod")
-                op = EltwiseLayer::PROD;
+                op = PROD;
             else if (operation == "sum")
-                op = EltwiseLayer::SUM;
+                op = SUM;
             else if (operation == "max")
-                op = EltwiseLayer::MAX;
+                op = MAX;
             else
                 CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");
         }
@@ -122,7 +127,7 @@ class EltwiseLayerImpl : public EltwiseLayer
         int channels;
         size_t planeSize;
 
-        EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0), channels(0), planeSize(0)  {}
+        EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0)  {}
 
         static void run(const Mat** srcs, int nsrcs, Mat& dst,
                         const std::vector<float>& coeffs, EltwiseOp op,
@@ -150,7 +155,7 @@ class EltwiseLayerImpl : public EltwiseLayer
             CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);
 
             bool simpleCoeffs = true;
-            if( op == EltwiseLayer::SUM && !coeffs.empty() )
+            if( op == SUM && !coeffs.empty() )
             {
                 CV_Assert( coeffs.size() == (size_t)nsrcs );
 
@@ -192,7 +197,7 @@ class EltwiseLayerImpl : public EltwiseLayer
                     const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;
                     float* dstptr = dstptr0 + globalDelta;
 
-                    if( op == EltwiseLayer::PROD )
+                    if( op == PROD )
                     {
                         for( k = 1; k < n; k++ )
                         {
@@ -204,7 +209,7 @@ class EltwiseLayerImpl : public EltwiseLayer
                             srcptr0 = (const float*)dstptr;
                         }
                     }
-                    else if( op == EltwiseLayer::MAX )
+                    else if( op == MAX )
                     {
                         for( k = 1; k < n; k++ )
                         {
diff --git a/modules/dnn/src/layers/lrn_layer.cpp b/modules/dnn/src/layers/lrn_layer.cpp
@@ -67,9 +67,9 @@ class LRNLayerImpl : public LRNLayer
         type = -1;
         String nrmType = params.get<String>("norm_region", "ACROSS_CHANNELS");
         if (nrmType == "ACROSS_CHANNELS")
-            type = LRNLayer::CHANNEL_NRM;
+            type = CHANNEL_NRM;
         else if (nrmType == "WITHIN_CHANNEL")
-            type = LRNLayer::SPATIAL_NRM;
+            type = SPATIAL_NRM;
         else
             CV_Error(Error::StsBadArg, "Unknown region type \"" + nrmType + "\"");
 
@@ -397,6 +397,13 @@ class LRNLayerImpl : public LRNLayer
         }
         return flops;
     }
+
+private:
+    enum Type
+    {
+        CHANNEL_NRM,
+        SPATIAL_NRM
+    };
 };
 
 Ptr<LRNLayer> LRNLayer::create(const LayerParams& params)
diff --git a/modules/dnn/src/layers/pooling_layer.cpp b/modules/dnn/src/layers/pooling_layer.cpp
@@ -63,27 +63,27 @@ class PoolingLayerImpl : public PoolingLayer
 public:
     PoolingLayerImpl(const LayerParams& params)
     {
-        type = PoolingLayer::MAX;
+        type = MAX;
         computeMaxIdx = true;
         globalPooling = false;
 
         if (params.has("pool"))
         {
             String pool = params.get<String>("pool").toLowerCase();
             if (pool == "max")
-                type = PoolingLayer::MAX;
+                type = MAX;
             else if (pool == "ave")
-                type = PoolingLayer::AVE;
+                type = AVE;
             else if (pool == "stochastic")
-                type = PoolingLayer::STOCHASTIC;
+                type = STOCHASTIC;
             else
                 CV_Error(Error::StsBadArg, "Unknown pooling type \"" + pool + "\"");
             getPoolingKernelParams(params, kernel.height, kernel.width, globalPooling,
                                    pad.height, pad.width, stride.height, stride.width, padMode);
         }
         else if (params.has("pooled_w") || params.has("pooled_h") || params.has("spatial_scale"))
         {
-            type = PoolingLayer::ROI;
+            type = ROI;
         }
         setParamsFrom(params);
         ceilMode = params.get<bool>("ceil_mode", true);
@@ -115,8 +115,7 @@ class PoolingLayerImpl : public PoolingLayer
     {
         return backendId == DNN_BACKEND_DEFAULT ||
                backendId == DNN_BACKEND_HALIDE && haveHalide() &&
-               (type == PoolingLayer::MAX ||
-                type == PoolingLayer::AVE && !pad.width && !pad.height);
+               (type == MAX || type == AVE && !pad.width && !pad.height);
     }
 
 #ifdef HAVE_OPENCL
@@ -200,9 +199,9 @@ class PoolingLayerImpl : public PoolingLayer
 
     virtual Ptr<BackendNode> initHalide(const std::vector<Ptr<BackendWrapper> > &inputs)
     {
-        if (type == PoolingLayer::MAX)
+        if (type == MAX)
             return initMaxPoolingHalide(inputs);
-        else if (type == PoolingLayer::AVE)
+        else if (type == AVE)
             return initAvePoolingHalide(inputs);
         else
             return Ptr<BackendNode>();
@@ -221,7 +220,7 @@ class PoolingLayerImpl : public PoolingLayer
         float spatialScale;
 
         PoolingInvoker() : src(0), rois(0), dst(0), mask(0), nstripes(0),
-                           computeMaxIdx(0), poolingType(PoolingLayer::MAX), spatialScale(0) {}
+                           computeMaxIdx(0), poolingType(MAX), spatialScale(0) {}
 
         static void run(const Mat& src, const Mat& rois, Mat& dst, Mat& mask, Size kernel,
                         Size stride, Size pad, int poolingType, float spatialScale,
@@ -698,7 +697,7 @@ class PoolingLayerImpl : public PoolingLayer
             out.height = 1;
             out.width = 1;
         }
-        else if (type == PoolingLayer::ROI)
+        else if (type == ROI)
         {
             out.height = pooledSize.height;
             out.width = pooledSize.width;
@@ -757,6 +756,14 @@ class PoolingLayerImpl : public PoolingLayer
         }
         return flops;
     }
+private:
+    enum Type
+    {
+        MAX,
+        AVE,
+        STOCHASTIC,
+        ROI
+    };
 };
 
 Ptr<PoolingLayer> PoolingLayer::create(const LayerParams& params)
diff --git a/modules/js/src/core_bindings.cpp b/modules/js/src/core_bindings.cpp
diff --git a/modules/js/src/embindgen.py b/modules/js/src/embindgen.py
diff --git a/platforms/js/build_js.py b/platforms/js/build_js.py
diff --git a/samples/dnn/js_face_recognition.html b/samples/dnn/js_face_recognition.html

Original file line number	Diff line number	Diff line change
`@@ -15,7 +15,7 @@ set(the_description "Deep neural network module. It allows to load models from d`
`15`	`15`
`16`	`16`	`ocv_add_dispatched_file("layers/layers_common" AVX AVX2)`
`17`	`17`
`18`		`-ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java)`
	`18`	`+ocv_add_module(dnn opencv_core opencv_imgproc WRAP python matlab java js)`
`19`	`19`	`ocv_warnings_disable(CMAKE_CXX_FLAGS -Wno-shadow -Wno-parentheses -Wmaybe-uninitialized -Wsign-promo`
`20`	`20`	`-Wmissing-declarations -Wmissing-prototypes`
`21`	`21`	`)`
Original file line number	Diff line number	Diff line change
`@@ -84,11 +84,11 @@ static String toString(const T &v)`
`84`	`84`	`return ss.str();`
`85`	`85`	`}`
`86`	`86`
`87`		`-Mat blobFromImage(const Mat& image, double scalefactor, const Size& size,`
	`87`	`+Mat blobFromImage(InputArray image, double scalefactor, const Size& size,`
`88`	`88`	`const Scalar& mean, bool swapRB, bool crop)`
`89`	`89`	`{`
`90`	`90`	`CV_TRACE_FUNCTION();`
`91`		`- std::vector<Mat> images(1, image);`
	`91`	`+ std::vector<Mat> images(1, image.getMat());`
`92`	`92`	`return blobFromImages(images, scalefactor, size, mean, swapRB, crop);`
`93`	`93`	`}`
`94`	`94`
`@@ -1910,7 +1910,7 @@ void Net::setInputsNames(const std::vector<String> &inputBlobNames)`
`1910`	`1910`	`impl->netInputLayer->setNames(inputBlobNames);`
`1911`	`1911`	`}`
`1912`	`1912`
`1913`		`-void Net::setInput(const Mat &blob_, const String& name)`
	`1913`	`+void Net::setInput(InputArray blob, const String& name)`
`1914`	`1914`	`{`
`1915`	`1915`	`CV_TRACE_FUNCTION();`
`1916`	`1916`	`CV_TRACE_ARG_VALUE(name, "name", name.c_str());`
`@@ -1930,6 +1930,7 @@ void Net::setInput(const Mat &blob_, const String& name)`
`1930`	`1930`	`ld.umat_outputBlobs.resize( std::max(pin.oid+1, (int)ld.requiredOutputs.size()) );`
`1931`	`1931`	`ld.outputBlobsWrappers.resize(ld.outputBlobs.size());`
`1932`	`1932`	`MatShape prevShape = shape(ld.outputBlobs[pin.oid]);`
	`1933`	`+ Mat blob_ = blob.getMat();`
`1933`	`1934`	`bool oldShape = prevShape == shape(blob_);`
`1934`	`1935`	`if (oldShape)`
`1935`	`1936`	`{`
Original file line number	Diff line number	Diff line change
`@@ -52,22 +52,27 @@ namespace dnn`
`52`	`52`	`class EltwiseLayerImpl : public EltwiseLayer`
`53`	`53`	`{`
`54`	`54`	`public:`
`55`		`- EltwiseOp op;`
	`55`	`+ enum EltwiseOp`
	`56`	`+ {`
	`57`	`+ PROD = 0,`
	`58`	`+ SUM = 1,`
	`59`	`+ MAX = 2,`
	`60`	`+ } op;`
`56`	`61`	`std::vector<float> coeffs;`
`57`	`62`
`58`	`63`	`EltwiseLayerImpl(const LayerParams& params)`
`59`	`64`	`{`
`60`	`65`	`setParamsFrom(params);`
`61`		`- op = EltwiseLayer::SUM;`
	`66`	`+ op = SUM;`
`62`	`67`	`if (params.has("operation"))`
`63`	`68`	`{`
`64`	`69`	`String operation = params.get<String>("operation").toLowerCase();`
`65`	`70`	`if (operation == "prod")`
`66`		`- op = EltwiseLayer::PROD;`
	`71`	`+ op = PROD;`
`67`	`72`	`else if (operation == "sum")`
`68`		`- op = EltwiseLayer::SUM;`
	`73`	`+ op = SUM;`
`69`	`74`	`else if (operation == "max")`
`70`		`- op = EltwiseLayer::MAX;`
	`75`	`+ op = MAX;`
`71`	`76`	`else`
`72`	`77`	`CV_Error(cv::Error::StsBadArg, "Unknown operaticon type \"" + operation + "\"");`
`73`	`78`	`}`
`@@ -122,7 +127,7 @@ class EltwiseLayerImpl : public EltwiseLayer`
`122`	`127`	`int channels;`
`123`	`128`	`size_t planeSize;`
`124`	`129`
`125`		`- EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(EltwiseLayer::PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}`
	`130`	`+ EltwiseInvoker() : srcs(0), nsrcs(0), dst(0), coeffs(0), op(PROD), nstripes(0), activ(0), channels(0), planeSize(0) {}`
`126`	`131`
`127`	`132`	`static void run(const Mat** srcs, int nsrcs, Mat& dst,`
`128`	`133`	`const std::vector<float>& coeffs, EltwiseOp op,`
`@@ -150,7 +155,7 @@ class EltwiseLayerImpl : public EltwiseLayer`
`150`	`155`	`CV_Assert(dst.total() == dst.size[0] * p.channels * p.planeSize);`
`151`	`156`
`152`	`157`	`bool simpleCoeffs = true;`
`153`		`- if( op == EltwiseLayer::SUM && !coeffs.empty() )`
	`158`	`+ if( op == SUM && !coeffs.empty() )`
`154`	`159`	`{`
`155`	`160`	`CV_Assert( coeffs.size() == (size_t)nsrcs );`
`156`	`161`
`@@ -192,7 +197,7 @@ class EltwiseLayerImpl : public EltwiseLayer`
`192`	`197`	`const float* srcptr0 = srcs[0]->ptr<float>() + globalDelta;`
`193`	`198`	`float* dstptr = dstptr0 + globalDelta;`
`194`	`199`
`195`		`- if( op == EltwiseLayer::PROD )`
	`200`	`+ if( op == PROD )`
`196`	`201`	`{`
`197`	`202`	`for( k = 1; k < n; k++ )`
`198`	`203`	`{`
`@@ -204,7 +209,7 @@ class EltwiseLayerImpl : public EltwiseLayer`
`204`	`209`	`srcptr0 = (const float*)dstptr;`
`205`	`210`	`}`
`206`	`211`	`}`
`207`		`- else if( op == EltwiseLayer::MAX )`
	`212`	`+ else if( op == MAX )`
`208`	`213`	`{`
`209`	`214`	`for( k = 1; k < n; k++ )`
`210`	`215`	`{`