Skip to content

Commit fe58b58

Browse files
committed
Merge pull request opencv#9778 from dkurt:dnn_colorization
2 parents 87595a6 + e268606 commit fe58b58

File tree

6 files changed

+110
-15
lines changed

6 files changed

+110
-15
lines changed

modules/dnn/src/caffe/caffe_importer.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -293,14 +293,13 @@ class CaffeImporter : public Importer
293293
addedBlobs.reserve(layersSize + 1);
294294

295295
//setup input layer names
296+
std::vector<String> netInputs(net.input_size());
296297
{
297-
std::vector<String> netInputs(net.input_size());
298298
for (int inNum = 0; inNum < net.input_size(); inNum++)
299299
{
300300
addedBlobs.push_back(BlobNote(net.input(inNum), 0, inNum));
301301
netInputs[inNum] = net.input(inNum);
302302
}
303-
dstNet.setInputsNames(netInputs);
304303
}
305304

306305
for (int li = 0; li < layersSize; li++)
@@ -317,6 +316,13 @@ class CaffeImporter : public Importer
317316
if (repetitions)
318317
name += String("_") + toString(repetitions);
319318

319+
if (type == "Input")
320+
{
321+
addedBlobs.push_back(BlobNote(name, 0, netInputs.size()));
322+
netInputs.push_back(name);
323+
continue;
324+
}
325+
320326
int id = dstNet.addLayer(name, type, layerParams);
321327

322328
for (int inNum = 0; inNum < layer.bottom_size(); inNum++)
@@ -325,6 +331,7 @@ class CaffeImporter : public Importer
325331
for (int outNum = 0; outNum < layer.top_size(); outNum++)
326332
addOutput(layer, id, outNum);
327333
}
334+
dstNet.setInputsNames(netInputs);
328335

329336
addedBlobs.clear();
330337
}

modules/dnn/src/init.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ void initializeLayerFactory()
106106
CV_DNN_REGISTER_LAYER_CLASS(MaxUnpool, MaxUnpoolLayer);
107107
CV_DNN_REGISTER_LAYER_CLASS(Dropout, BlankLayer);
108108
CV_DNN_REGISTER_LAYER_CLASS(Identity, BlankLayer);
109+
CV_DNN_REGISTER_LAYER_CLASS(Silence, BlankLayer);
109110

110111
CV_DNN_REGISTER_LAYER_CLASS(Crop, CropLayer);
111112
CV_DNN_REGISTER_LAYER_CLASS(Eltwise, EltwiseLayer);

modules/dnn/src/layers/convolution_layer.cpp

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -311,15 +311,15 @@ class ConvolutionLayerImpl : public BaseConvolutionLayerImpl
311311
Size kernel, Size pad, Size stride, Size dilation,
312312
const ActivationLayer* activ, int ngroups, int nstripes )
313313
{
314-
CV_Assert( input.dims == 4 && output.dims == 4 &&
315-
input.size[0] == output.size[0] &&
316-
weights.rows == output.size[1] &&
317-
weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height &&
318-
input.type() == output.type() &&
319-
input.type() == weights.type() &&
320-
input.type() == CV_32F &&
321-
input.isContinuous() &&
322-
output.isContinuous() &&
314+
CV_Assert( input.dims == 4 && output.dims == 4,
315+
input.size[0] == output.size[0],
316+
weights.rows == output.size[1],
317+
weights.cols == (input.size[1]/ngroups)*kernel.width*kernel.height,
318+
input.type() == output.type(),
319+
input.type() == weights.type(),
320+
input.type() == CV_32F,
321+
input.isContinuous(),
322+
output.isContinuous(),
323323
biasvec.size() == (size_t)output.size[1]+2);
324324
ParallelConv p;
325325

@@ -1237,15 +1237,13 @@ static void initConvDeconvLayerFromCaffe(Ptr<BaseConvolutionLayer> l, const Laye
12371237
l->pad.width, l->stride.height, l->stride.width, l->dilation.height,
12381238
l->dilation.width, l->padMode);
12391239

1240-
bool bias = params.get<bool>("bias_term", true);
12411240
l->numOutput = params.get<int>("num_output");
12421241
int ngroups = params.get<int>("group", 1);
12431242

12441243
l->adjustPad.height = params.get<int>("adj_h", 0);
12451244
l->adjustPad.width = params.get<int>("adj_w", 0);
12461245

12471246
CV_Assert(l->numOutput % ngroups == 0);
1248-
CV_Assert((bias && l->blobs.size() == 2) || (!bias && l->blobs.size() == 1));
12491247
CV_Assert(l->adjustPad.width < l->stride.width &&
12501248
l->adjustPad.height < l->stride.height);
12511249
}

modules/dnn/src/layers/scale_layer.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ class ScaleLayerImpl : public ScaleLayer
3333
std::vector<MatShape> &outputs,
3434
std::vector<MatShape> &internals) const
3535
{
36+
CV_Assert(blobs.size() == 1 + hasBias);
3637
Layer::getMemoryShapes(inputs, requiredOutputs, outputs, internals);
3738
return true;
3839
}
@@ -48,8 +49,6 @@ class ScaleLayerImpl : public ScaleLayer
4849
CV_TRACE_FUNCTION();
4950
CV_TRACE_ARG_VALUE(name, "name", name.c_str());
5051

51-
CV_Assert(blobs.size() == 1 + hasBias);
52-
5352
for (size_t ii = 0; ii < outputs.size(); ii++)
5453
{
5554
Mat &inpBlob = *inputs[ii];

modules/dnn/test/test_caffe_importer.cpp

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,4 +211,27 @@ TEST(Reproducibility_GoogLeNet_fp16, Accuracy)
211211
normAssert(out, ref, "", l1, lInf);
212212
}
213213

214+
// https://github.com/richzhang/colorization
215+
TEST(Reproducibility_Colorization, Accuracy)
216+
{
217+
const float l1 = 1e-5;
218+
const float lInf = 3e-3;
219+
220+
Mat inp = blobFromNPY(_tf("colorization_inp.npy"));
221+
Mat ref = blobFromNPY(_tf("colorization_out.npy"));
222+
Mat kernel = blobFromNPY(_tf("colorization_pts_in_hull.npy"));
223+
224+
const string proto = findDataFile("dnn/colorization_deploy_v2.prototxt", false);
225+
const string model = findDataFile("dnn/colorization_release_v2.caffemodel", false);
226+
Net net = readNetFromCaffe(proto, model);
227+
228+
net.getLayer(net.getLayerId("class8_ab"))->blobs.push_back(kernel);
229+
net.getLayer(net.getLayerId("conv8_313_rh"))->blobs.push_back(Mat(1, 313, CV_32F, 2.606));
230+
231+
net.setInput(inp);
232+
Mat out = net.forward();
233+
234+
normAssert(out, ref, "", l1, lInf);
235+
}
236+
214237
}

samples/dnn/colorization.py

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
# Script is based on https://github.com/richzhang/colorization/colorize.py
2+
import numpy as np
3+
import argparse
4+
import cv2 as cv
5+
6+
def parse_args():
7+
parser = argparse.ArgumentParser(description='iColor: deep interactive colorization')
8+
parser.add_argument('--input', help='Path to image or video. Skip to capture frames from camera')
9+
parser.add_argument('--prototxt', help='Path to colorization_deploy_v2.prototxt', default='./models/colorization_release_v2.prototxt')
10+
parser.add_argument('--caffemodel', help='Path to colorization_release_v2.caffemodel', default='./models/colorization_release_v2.caffemodel')
11+
parser.add_argument('--kernel', help='Path to pts_in_hull.npy', default='./resources/pts_in_hull.npy')
12+
13+
args = parser.parse_args()
14+
return args
15+
16+
if __name__ == '__main__':
17+
W_in = 224
18+
H_in = 224
19+
imshowSize = (640, 480)
20+
21+
args = parse_args()
22+
23+
# Select desired model
24+
net = cv.dnn.readNetFromCaffe(args.prototxt, args.caffemodel)
25+
26+
pts_in_hull = np.load(args.kernel) # load cluster centers
27+
28+
# populate cluster centers as 1x1 convolution kernel
29+
pts_in_hull = pts_in_hull.transpose().reshape(2, 313, 1, 1)
30+
net.getLayer(long(net.getLayerId('class8_ab'))).blobs = [pts_in_hull.astype(np.float32)]
31+
net.getLayer(long(net.getLayerId('conv8_313_rh'))).blobs = [np.full([1, 313], 2.606, np.float32)]
32+
33+
if args.input:
34+
cap = cv.VideoCapture(args.input)
35+
else:
36+
cap = cv.VideoCapture(0)
37+
38+
while cv.waitKey(1) < 0:
39+
hasFrame, frame = cap.read()
40+
if not hasFrame:
41+
cv.waitKey()
42+
break
43+
44+
img_rgb = (frame[:,:,[2, 1, 0]] * 1.0 / 255).astype(np.float32)
45+
46+
img_lab = cv.cvtColor(img_rgb, cv.COLOR_RGB2Lab)
47+
img_l = img_lab[:,:,0] # pull out L channel
48+
(H_orig,W_orig) = img_rgb.shape[:2] # original image size
49+
50+
# resize image to network input size
51+
img_rs = cv.resize(img_rgb, (W_in, H_in)) # resize image to network input size
52+
img_lab_rs = cv.cvtColor(img_rs, cv.COLOR_RGB2Lab)
53+
img_l_rs = img_lab_rs[:,:,0]
54+
img_l_rs -= 50 # subtract 50 for mean-centering
55+
56+
net.setInput(cv.dnn.blobFromImage(img_l_rs))
57+
ab_dec = net.forward('class8_ab')[0,:,:,:].transpose((1,2,0)) # this is our result
58+
59+
(H_out,W_out) = ab_dec.shape[:2]
60+
ab_dec_us = cv.resize(ab_dec, (W_orig, H_orig))
61+
img_lab_out = np.concatenate((img_l[:,:,np.newaxis],ab_dec_us),axis=2) # concatenate with original image L
62+
img_bgr_out = np.clip(cv.cvtColor(img_lab_out, cv.COLOR_Lab2BGR), 0, 1)
63+
64+
frame = cv.resize(frame, imshowSize)
65+
cv.imshow('origin', frame)
66+
cv.imshow('gray', cv.cvtColor(frame, cv.COLOR_RGB2GRAY))
67+
cv.imshow('colorized', cv.resize(img_bgr_out, imshowSize))

0 commit comments

Comments
 (0)