Merge pull request opencv#9778 from dkurt:dnn_colorization

vpisarev · vpisarev · commit fe58b5893736 · 2017-10-06T11:48:05.000Z
diff --git a/modules/dnn/src/caffe/caffe_importer.cpp b/modules/dnn/src/caffe/caffe_importer.cpp
@@ -293,14 +293,13 @@ class CaffeImporter : public Importer
         addedBlobs.reserve(layersSize + 1);
 
         //setup input layer names
+        std::vector<String> netInputs(net.input_size());
         {
-            std::vector<String> netInputs(net.input_size());
             for (int inNum = 0; inNum < net.input_size(); inNum++)
             {
                 addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
                 netInputs[inNum] = net.input(inNum);
             }
-            dstNet.setInputsNames(netInputs);
         }
 
         for (int li = 0; li < layersSize; li++)
@@ -317,6 +316,13 @@ class CaffeImporter : public Importer
             if (repetitions)
                 name += String("_") + toString(repetitions);
 
+            if (type == "Input")
+            {
+                addedBlobs.push_back(BlobNote(name, 0, netInputs.size()));
+                netInputs.push_back(name);
+                continue;
+            }
+
             int id = dstNet.addLayer(name, type, layerParams);
 
             for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
@@ -325,6 +331,7 @@ class CaffeImporter : public Importer
             for (int outNum = 0; outNum < layer.top_size(); outNum++)
                 addOutput(layer, id, outNum);
         }
+        dstNet.setInputsNames(netInputs);
 
         addedBlobs.clear();
     }
diff --git a/modules/dnn/src/init.cpp b/modules/dnn/src/init.cpp
@@ -106,6 +106,7 @@ void initializeLayerFactory()
     CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool,      MaxUnpoolLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Dropout,        BlankLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Identity,       BlankLayer);
+    CV_DNN_REGISTER_LAYER_CLASS(Silence,        BlankLayer);
 
     CV_DNN_REGISTER_LAYER_CLASS(Crop,           CropLayer);
     CV_DNN_REGISTER_LAYER_CLASS(Eltwise,        EltwiseLayer);
diff --git a/modules/dnn/src/layers/convolution_layer.cpp b/modules/dnn/src/layers/convolution_layer.cpp
@@ -311,15 +311,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
                          Size kernel, Size pad, Size stride, Size dilation,
                          const ActivationLayer* activ, int ngroups, int nstripes )
         {
-            CV_Assert( input.dims == 4 && output.dims == 4 &&
-                       input.size[0] == output.size[0] &&
-                       weights.rows == output.size[1] &&
-                       weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height &&
-                       input.type() == output.type() &&
-                       input.type() == weights.type() &&
-                       input.type() == CV_32F &&
-                       input.isContinuous() &&
-                       output.isContinuous() &&
+            CV_Assert( input.dims == 4 && output.dims == 4,
+                       input.size[0] == output.size[0],
+                       weights.rows == output.size[1],
+                       weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height,
+                       input.type() == output.type(),
+                       input.type() == weights.type(),
+                       input.type() == CV_32F,
+                       input.isContinuous(),
+                       output.isContinuous(),
                        biasvec.size() == (size_t)output.size[1]+2);
             ParallelConv p;
 
@@ -1237,15 +1237,13 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
                                l->pad.width, l->stride.height, l->stride.width, l->dilation.height,
                                l->dilation.width, l->padMode);
 
-    bool bias = params.get<bool>("bias_term", true);
     l->numOutput = params.get<int>("num_output");
     int ngroups = params.get<int>("group", 1);
 
     l->adjustPad.height = params.get<int>("adj_h", 0);
     l->adjustPad.width = params.get<int>("adj_w", 0);
 
     CV_Assert(l->numOutput % ngroups == 0);
-    CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
     CV_Assert(l->adjustPad.width < l->stride.width &&
               l->adjustPad.height < l->stride.height);
 }
diff --git a/modules/dnn/src/layers/scale_layer.cpp b/modules/dnn/src/layers/scale_layer.cpp
@@ -33,6 +33,7 @@ class ScaleLayerImpl : public ScaleLayer
                          std::vector<MatShape> &outputs,
                          std::vector<MatShape> &internals) const
     {
+        CV_Assert(blobs.size() == 1 + hasBias);
         Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
         return true;
     }
@@ -48,8 +49,6 @@ class ScaleLayerImpl : public ScaleLayer
         CV_TRACE_FUNCTION();
         CV_TRACE_ARG_VALUE(name, "name", name.c_str());
 
-        CV_Assert(blobs.size() == 1 + hasBias);
-
         for (size_t ii = 0; ii < outputs.size(); ii++)
         {
             Mat &inpBlob = *inputs[ii];
diff --git a/modules/dnn/test/test_caffe_importer.cpp b/modules/dnn/test/test_caffe_importer.cpp
@@ -211,4 +211,27 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
     normAssert(out, ref, "", l1, lInf);
 }
 
+// https://github.com/richzhang/colorization
+TEST(Reproducibility_Colorization, Accuracy)
+{
+    const float l1 = 1e-5;
+    const float lInf = 3e-3;
+
+    Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
+    Mat ref = blobFromNPY(_tf("colorization_out.npy"));
+    Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy"));
+
+    const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
+    const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
+    Net net = readNetFromCaffe(proto, model);
+
+    net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
+    net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
+
+    net.setInput(inp);
+    Mat out = net.forward();
+
+    normAssert(out, ref, "", l1, lInf);
+}
+
 }
diff --git a/samples/dnn/colorization.py b/samples/dnn/colorization.py
@@ -0,0 +1,67 @@
+# Script is based on https://github.com/richzhang/colorization/colorize.py
+import numpy as np
+import argparse
+import cv2 as cv
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='iColor: deep interactive colorization')
+    parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
+    parser.add_argument('--prototxt', help='Path to colorization_deploy_v2.prototxt', default='./models/colorization_release_v2.prototxt')
+    parser.add_argument('--caffemodel', help='Path to colorization_release_v2.caffemodel', default='./models/colorization_release_v2.caffemodel')
+    parser.add_argument('--kernel', help='Path to pts_in_hull.npy', default='./resources/pts_in_hull.npy')
+
+    args = parser.parse_args()
+    return args
+
+if __name__ == '__main__':
+    W_in = 224
+    H_in = 224
+    imshowSize = (640, 480)
+
+    args = parse_args()
+
+    # Select desired model
+    net = cv.dnn.readNetFromCaffe(args.prototxt, args.caffemodel)
+
+    pts_in_hull = np.load(args.kernel) # load cluster centers
+
+    # populate cluster centers as 1x1 convolution kernel
+    pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1)
+    net.getLayer(long(net.getLayerId('class8_ab'))).blobs = [pts_in_hull.astype(np.float32)]
+    net.getLayer(long(net.getLayerId('conv8_313_rh'))).blobs = [np.full([1, 313], 2.606, np.float32)]
+
+    if args.input:
+        cap = cv.VideoCapture(args.input)
+    else:
+        cap = cv.VideoCapture(0)
+
+    while cv.waitKey(1) < 0:
+        hasFrame, frame = cap.read()
+        if not hasFrame:
+            cv.waitKey()
+            break
+
+        img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32)
+
+        img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab)
+        img_l = img_lab[:,:,0] # pull out L channel
+        (H_orig,W_orig) = img_rgb.shape[:2] # original image size
+
+        # resize image to network input size
+        img_rs = cv.resize(img_rgb, (W_in, H_in)) # resize image to network input size
+        img_lab_rs = cv.cvtColor(img_rs, cv.COLOR_RGB2Lab)
+        img_l_rs = img_lab_rs[:,:,0]
+        img_l_rs -= 50 # subtract 50 for mean-centering
+
+        net.setInput(cv.dnn.blobFromImage(img_l_rs))
+        ab_dec = net.forward('class8_ab')[0,:,:,:].transpose((1,2,0)) # this is our result
+
+        (H_out,W_out) = ab_dec.shape[:2]
+        ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig))
+        img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L
+        img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1)
+
+        frame = cv.resize(frame, imshowSize)
+        cv.imshow('origin', frame)
+        cv.imshow('gray', cv.cvtColor(frame, cv.COLOR_RGB2GRAY))
+        cv.imshow('colorized', cv.resize(img_bgr_out, imshowSize))

Original file line number	Diff line number	Diff line change
`@@ -293,14 +293,13 @@ class CaffeImporter : public Importer`
`293`	`293`	`addedBlobs.reserve(layersSize + 1);`
`294`	`294`
`295`	`295`	`//setup input layer names`
	`296`	`+ std::vector<String> netInputs(net.input_size());`
`296`	`297`	`{`
`297`		`- std::vector<String> netInputs(net.input_size());`
`298`	`298`	`for (int inNum = 0; inNum < net.input_size(); inNum++)`
`299`	`299`	`{`
`300`	`300`	`addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));`
`301`	`301`	`netInputs[inNum] = net.input(inNum);`
`302`	`302`	`}`
`303`		`- dstNet.setInputsNames(netInputs);`
`304`	`303`	`}`
`305`	`304`
`306`	`305`	`for (int li = 0; li < layersSize; li++)`
`@@ -317,6 +316,13 @@ class CaffeImporter : public Importer`
`317`	`316`	`if (repetitions)`
`318`	`317`	`name += String("_") + toString(repetitions);`
`319`	`318`
	`319`	`+ if (type == "Input")`
	`320`	`+ {`
	`321`	`+ addedBlobs.push_back(BlobNote(name, 0, netInputs.size()));`
	`322`	`+ netInputs.push_back(name);`
	`323`	`+ continue;`
	`324`	`+ }`
	`325`	`+`
`320`	`326`	`int id = dstNet.addLayer(name, type, layerParams);`
`321`	`327`
`322`	`328`	`for (int inNum = 0; inNum < layer.bottom_size(); inNum++)`
`@@ -325,6 +331,7 @@ class CaffeImporter : public Importer`
`325`	`331`	`for (int outNum = 0; outNum < layer.top_size(); outNum++)`
`326`	`332`	`addOutput(layer, id, outNum);`
`327`	`333`	`}`
	`334`	`+ dstNet.setInputsNames(netInputs);`
`328`	`335`
`329`	`336`	`addedBlobs.clear();`
`330`	`337`	`}`
Original file line number	Diff line number	Diff line change
`@@ -33,6 +33,7 @@ class ScaleLayerImpl : public ScaleLayer`
`33`	`33`	`std::vector<MatShape> &outputs,`
`34`	`34`	`std::vector<MatShape> &internals) const`
`35`	`35`	`{`
	`36`	`+ CV_Assert(blobs.size() == 1 + hasBias);`
`36`	`37`	`Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);`
`37`	`38`	`return true;`
`38`	`39`	`}`
`@@ -48,8 +49,6 @@ class ScaleLayerImpl : public ScaleLayer`
`48`	`49`	`CV_TRACE_FUNCTION();`
`49`	`50`	`CV_TRACE_ARG_VALUE(name, "name", name.c_str());`
`50`	`51`
`51`		`- CV_Assert(blobs.size() == 1 + hasBias);`
`52`		`-`
`53`	`52`	`for (size_t ii = 0; ii < outputs.size(); ii++)`
`54`	`53`	`{`
`55`	`54`	`Mat &inpBlob = *inputs[ii];`