Skip to content

Commit 9087778

Browse files
committed
Merge pull request opencv#8895 from lewisjb:python-docstrings
2 parents ace0701 + 078b4cc commit 9087778

File tree

2 files changed

+73
-28
lines changed

2 files changed

+73
-28
lines changed

modules/python/src2/gen2.py

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,9 @@ def __init__(self, classname, name, decl, isconstructor):
405405
self.name = self.wname = name
406406
self.isconstructor = isconstructor
407407

408-
self.rettype = decl[4] if len(decl) >=5 else handle_ptr(decl[1])
408+
self.docstring = decl[5]
409+
410+
self.rettype = decl[4] or handle_ptr(decl[1])
409411
if self.rettype == "void":
410412
self.rettype = ""
411413
self.args = []
@@ -494,7 +496,7 @@ def init_pyproto(self):
494496
else:
495497
outstr = "None"
496498

497-
self.py_docstring = "%s(%s) -> %s" % (self.wname, argstr, outstr)
499+
self.py_prototype = "%s(%s) -> %s" % (self.wname, argstr, outstr)
498500
self.py_noptargs = noptargs
499501
self.py_arglist = arglist
500502
for aname, argno in arglist:
@@ -536,28 +538,49 @@ def get_wrapper_prototype(self):
536538
return "static PyObject* %s(PyObject* %s, PyObject* args, PyObject* kw)" % (full_fname, self_arg)
537539

538540
def get_tab_entry(self):
541+
prototype_list = []
539542
docstring_list = []
543+
540544
have_empty_constructor = False
541545
for v in self.variants:
542-
s = v.py_docstring
546+
s = v.py_prototype
543547
if (not v.py_arglist) and self.isconstructor:
544548
have_empty_constructor = True
545-
if s not in docstring_list:
546-
docstring_list.append(s)
549+
if s not in prototype_list:
550+
prototype_list.append(s)
551+
docstring_list.append(v.docstring)
552+
547553
# if there are just 2 constructors: default one and some other,
548554
# we simplify the notation.
549555
# Instead of ClassName(args ...) -> object or ClassName() -> object
550556
# we write ClassName([args ...]) -> object
551557
if have_empty_constructor and len(self.variants) == 2:
552558
idx = self.variants[1].py_arglist != []
553-
s = self.variants[idx].py_docstring
559+
s = self.variants[idx].py_prototype
554560
p1 = s.find("(")
555561
p2 = s.rfind(")")
556-
docstring_list = [s[:p1+1] + "[" + s[p1+1:p2] + "]" + s[p2:]]
562+
prototype_list = [s[:p1+1] + "[" + s[p1+1:p2] + "]" + s[p2:]]
563+
564+
# The final docstring will be: Each prototype, followed by
565+
# their relevant doxygen comment
566+
full_docstring = ""
567+
for prototype, body in zip(prototype_list, docstring_list):
568+
full_docstring += Template("$prototype\n$docstring\n\n\n\n").substitute(
569+
prototype=prototype,
570+
docstring='\n'.join(
571+
['. ' + line
572+
for line in body.split('\n')]
573+
)
574+
)
575+
576+
# Escape backslashes, newlines, and double quotes
577+
full_docstring = full_docstring.strip().replace("\\", "\\\\").replace('\n', '\\n').replace("\"", "\\\"")
578+
# Convert unicode chars to xml representation, but keep as string instead of bytes
579+
full_docstring = full_docstring.encode('ascii', errors='xmlcharrefreplace').decode()
557580

558581
return Template(' {"$py_funcname", (PyCFunction)$wrap_funcname, METH_VARARGS | METH_KEYWORDS, "$py_docstring"},\n'
559582
).substitute(py_funcname = self.variants[0].wname, wrap_funcname=self.get_wrapper_name(),
560-
py_docstring = " or ".join(docstring_list))
583+
py_docstring = full_docstring)
561584

562585
def gen_code(self, all_classes):
563586
proto = self.get_wrapper_prototype()

modules/python/src2/hdr_parser.py

Lines changed: 42 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@
2121
]
2222

2323
"""
24-
Each declaration is [funcname, return_value_type /* in C, not in Python */, <list_of_modifiers>, <list_of_arguments>],
24+
Each declaration is [funcname, return_value_type /* in C, not in Python */, <list_of_modifiers>, <list_of_arguments>, original_return_type, docstring],
2525
where each element of <list_of_arguments> is 4-element list itself:
2626
[argtype, argname, default_value /* or "" if none */, <list_of_modifiers>]
2727
where the list of modifiers is yet another nested list of strings
2828
(currently recognized are "/O" for output argument, "/S" for static (i.e. class) methods
2929
and "/A value" for the plain C arrays with counters)
30+
original_return_type is None if the original_return_type is the same as return_value_type
3031
"""
3132

3233
class CppHeaderParser(object):
@@ -226,7 +227,7 @@ def parse_enum(self, decl_str):
226227
else:
227228
prev_val_delta = 0
228229
prev_val = val = pv[1].strip()
229-
decl.append(["const " + self.get_dotted_name(pv[0].strip()), val, [], []])
230+
decl.append(["const " + self.get_dotted_name(pv[0].strip()), val, [], [], None, ""])
230231
return decl
231232

232233
def parse_class_decl(self, decl_str):
@@ -256,7 +257,7 @@ def parse_class_decl(self, decl_str):
256257
bases = ll[2:]
257258
return classname, bases, modlist
258259

259-
def parse_func_decl_no_wrap(self, decl_str, static_method = False):
260+
def parse_func_decl_no_wrap(self, decl_str, static_method=False, docstring=""):
260261
decl_str = (decl_str or "").strip()
261262
virtual_method = False
262263
explicit_method = False
@@ -299,7 +300,7 @@ def parse_func_decl_no_wrap(self, decl_str, static_method = False):
299300
apos = fdecl.find("(", apos+1)
300301

301302
fname = "cv." + fname.replace("::", ".")
302-
decl = [fname, rettype, [], []]
303+
decl = [fname, rettype, [], [], None, docstring]
303304

304305
# inline constructor implementation
305306
implmatch = re.match(r"(\(.*?\))\s*:\s*(\w+\(.*?\),?\s*)+", fdecl[apos:])
@@ -370,7 +371,7 @@ def parse_func_decl_no_wrap(self, decl_str, static_method = False):
370371
print(decl_str)
371372
return decl
372373

373-
def parse_func_decl(self, decl_str, use_umat=False):
374+
def parse_func_decl(self, decl_str, use_umat=False, docstring=""):
374375
"""
375376
Parses the function or method declaration in the form:
376377
[([CV_EXPORTS] <rettype>) | CVAPI(rettype)]
@@ -379,7 +380,7 @@ def parse_func_decl(self, decl_str, use_umat=False):
379380
[const] {; | <function_body>}
380381
381382
Returns the function declaration entry:
382-
[<func name>, <return value C-type>, <list of modifiers>, <list of arguments>] (see above)
383+
[<func name>, <return value C-type>, <list of modifiers>, <list of arguments>, <original return type>, <docstring>] (see above)
383384
"""
384385

385386
if self.wrap_mode:
@@ -484,7 +485,7 @@ def parse_func_decl(self, decl_str, use_umat=False):
484485
funcname = self.get_dotted_name(funcname)
485486

486487
if not self.wrap_mode:
487-
decl = self.parse_func_decl_no_wrap(decl_str, static_method)
488+
decl = self.parse_func_decl_no_wrap(decl_str, static_method, docstring)
488489
decl[0] = funcname
489490
return decl
490491

@@ -574,10 +575,7 @@ def parse_func_decl(self, decl_str, use_umat=False):
574575
if static_method:
575576
func_modlist.append("/S")
576577

577-
if original_type is None:
578-
return [funcname, rettype, func_modlist, args]
579-
else:
580-
return [funcname, rettype, func_modlist, args, original_type]
578+
return [funcname, rettype, func_modlist, args, original_type, docstring]
581579

582580
def get_dotted_name(self, name):
583581
"""
@@ -612,7 +610,7 @@ class A {
612610
n = "cv.Algorithm"
613611
return n
614612

615-
def parse_stmt(self, stmt, end_token, use_umat=False):
613+
def parse_stmt(self, stmt, end_token, use_umat=False, docstring=""):
616614
"""
617615
parses the statement (ending with ';' or '}') or a block head (ending with '{')
618616
@@ -659,7 +657,7 @@ def parse_stmt(self, stmt, end_token, use_umat=False):
659657
exit(1)
660658
if classname.startswith("_Ipl"):
661659
classname = classname[1:]
662-
decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, []]
660+
decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, [], None, docstring]
663661
if bases:
664662
decl[1] = ": " + ", ".join([self.get_dotted_name(b).replace(".","::") for b in bases])
665663
return stmt_type, classname, True, decl
@@ -674,7 +672,7 @@ def parse_stmt(self, stmt, end_token, use_umat=False):
674672
exit(1)
675673
decl = []
676674
if ("CV_EXPORTS_W" in stmt) or ("CV_EXPORTS_AS" in stmt) or (not self.wrap_mode):# and ("CV_EXPORTS" in stmt)):
677-
decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, []]
675+
decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, [], None, docstring]
678676
if bases:
679677
decl[1] = ": " + ", ".join([self.get_dotted_name(b).replace(".","::") for b in bases])
680678
return stmt_type, classname, True, decl
@@ -704,7 +702,7 @@ def parse_stmt(self, stmt, end_token, use_umat=False):
704702
# since we filtered off the other places where '(' can normally occur:
705703
# - code blocks
706704
# - function pointer typedef's
707-
decl = self.parse_func_decl(stmt, use_umat=use_umat)
705+
decl = self.parse_func_decl(stmt, use_umat=use_umat, docstring=docstring)
708706
# we return parse_flag == False to prevent the parser to look inside function/method bodies
709707
# (except for tracking the nested blocks)
710708
return stmt_type, "", False, decl
@@ -759,11 +757,13 @@ def parse(self, hname, wmode=True):
759757
SCAN = 0 # outside of a comment or preprocessor directive
760758
COMMENT = 1 # inside a multi-line comment
761759
DIRECTIVE = 2 # inside a multi-line preprocessor directive
760+
DOCSTRING = 3 # inside a multi-line docstring
762761

763762
state = SCAN
764763

765764
self.block_stack = [["file", hname, True, True, None]]
766765
block_head = ""
766+
docstring = ""
767767
self.lineno = 0
768768
self.wrap_mode = wmode
769769

@@ -789,6 +789,15 @@ def parse(self, hname, wmode=True):
789789
l = l[pos+2:]
790790
state = SCAN
791791

792+
if state == DOCSTRING:
793+
pos = l.find("*/")
794+
if pos < 0:
795+
docstring += l + "\n"
796+
continue
797+
docstring += l[:pos] + "\n"
798+
l = l[pos+2:]
799+
state = SCAN
800+
792801
if state != SCAN:
793802
print("Error at %d: invlid state = %d" % (self.lineno, state))
794803
sys.exit(-1)
@@ -806,11 +815,20 @@ def parse(self, hname, wmode=True):
806815

807816
if token == "/*":
808817
block_head += " " + l[:pos]
809-
pos = l.find("*/", pos+2)
810-
if pos < 0:
818+
end_pos = l.find("*/", pos+2)
819+
if len(l) > pos + 2 and l[pos+2] == "*":
820+
# '/**', it's a docstring
821+
if end_pos < 0:
822+
state = DOCSTRING
823+
docstring = l[pos+3:] + "\n"
824+
break
825+
else:
826+
docstring = l[pos+3:end_pos]
827+
828+
elif end_pos < 0:
811829
state = COMMENT
812830
break
813-
l = l[pos+2:]
831+
l = l[end_pos+2:]
814832
continue
815833

816834
if token == "\"":
@@ -840,7 +858,8 @@ def parse(self, hname, wmode=True):
840858
if stack_top[self.PROCESS_FLAG]:
841859
# even if stack_top[PUBLIC_SECTION] is False, we still try to process the statement,
842860
# since it can start with "public:"
843-
stmt_type, name, parse_flag, decl = self.parse_stmt(stmt, token)
861+
docstring = docstring.strip()
862+
stmt_type, name, parse_flag, decl = self.parse_stmt(stmt, token, docstring=docstring)
844863
if decl:
845864
if stmt_type == "enum":
846865
for d in decl:
@@ -854,8 +873,9 @@ def parse(self, hname, wmode=True):
854873
args = decl[3]
855874
has_mat = len(list(filter(lambda x: x[0] in {"Mat", "vector_Mat"}, args))) > 0
856875
if has_mat:
857-
_, _, _, umat_decl = self.parse_stmt(stmt, token, use_umat=True)
876+
_, _, _, umat_decl = self.parse_stmt(stmt, token, use_umat=True, docstring=docstring)
858877
decls.append(umat_decl)
878+
docstring = ""
859879
if stmt_type == "namespace":
860880
chunks = [block[1] for block in self.block_stack if block[0] == 'namespace'] + [name]
861881
self.namespaces.add('.'.join(chunks))
@@ -887,6 +907,8 @@ def print_decls(self, decls):
887907
"""
888908
for d in decls:
889909
print(d[0], d[1], ";".join(d[2]))
910+
# Uncomment below line to see docstrings
911+
# print('"""\n' + d[5] + '\n"""')
890912
for a in d[3]:
891913
print(" ", a[0], a[1], a[2], end="")
892914
if a[3]:

0 commit comments

Comments
 (0)