From 3d9f636fe4635a3e2c0d4ea24d438f055b138d49 Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Fri, 25 Mar 2022 15:35:09 +0100 Subject: [PATCH 1/3] py: add handling of format 'n' to ParseTupleAndKeywords --- py/args.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/py/args.go b/py/args.go index 5209a3d3..e3bfc36b 100644 --- a/py/args.go +++ b/py/args.go @@ -476,7 +476,7 @@ func ParseTupleAndKeywords(args Tuple, kwargs StringDict, format string, kwlist return ExceptionNewf(TypeError, "%s() argument %d must be str, not %s", name, i+1, arg.Type().Name) } *result = arg - case 'i': + case 'i', 'n': if _, ok := arg.(Int); !ok { return ExceptionNewf(TypeError, "%s() argument %d must be int, not %s", name, i+1, arg.Type().Name) } From 8ed73c5e5d8026bfbc58bab863206142b929d88b Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Fri, 25 Mar 2022 15:35:25 +0100 Subject: [PATCH 2/3] py: refactor str.split into String.Split --- py/string.go | 84 +++++++++++++++++++++++++++------------------------- 1 file changed, 44 insertions(+), 40 deletions(-) diff --git a/py/string.go b/py/string.go index 0f9ddc28..e3b25dd5 100644 --- a/py/string.go +++ b/py/string.go @@ -122,37 +122,8 @@ func fieldsN(s string, n int) []string { } func init() { - StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple) (Object, error) { - selfStr := self.(String) - var value Object = None - zeroRemove := true - if len(args) > 0 { - if _, ok := args[0].(NoneType); !ok { - value = args[0] - zeroRemove = false - } - } - var maxSplit int = -2 - if len(args) > 1 { - if m, ok := args[1].(Int); ok { - maxSplit = int(m) - } - } - var valArray []string - if valStr, ok := value.(String); ok { - valArray = strings.SplitN(string(selfStr), string(valStr), maxSplit+1) - } else if _, ok := value.(NoneType); ok { - valArray = fieldsN(string(selfStr), maxSplit) - } else { - return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", value.Type()) - } - o := List{} - for _, j := range valArray { - if len(j) > 0 || !zeroRemove { - o.Items = append(o.Items, String(j)) - } - } - return &o, nil + StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple, kwargs StringDict) (Object, error) { + return self.(String).Split(args, kwargs) }, 0, "split(sub) -> split string with sub.") StringType.Dict["startswith"] = MustNewMethod("startswith", func(self Object, args Tuple) (Object, error) { @@ -597,13 +568,46 @@ func (s String) M__contains__(item Object) (Object, error) { return NewBool(strings.Contains(string(s), string(needle))), nil } +func (s String) Split(args Tuple, kwargs StringDict) (Object, error) { + var ( + pyval Object = None + pymax Object = Int(-2) + pyfmt = "|Oi:split" + kwlst = []string{"sep", "maxsplit"} + ) + err := ParseTupleAndKeywords(args, kwargs, pyfmt, kwlst, &pyval, &pymax) + if err != nil { + return nil, err + } + + var ( + max = pymax.(Int) + vs []string + ) + switch v := pyval.(type) { + case String: + vs = strings.SplitN(string(s), string(v), int(max)+1) + case NoneType: + vs = fieldsN(string(s), int(max)) + default: + return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", pyval.Type()) + } + o := List{} + for _, j := range vs { + o.Items = append(o.Items, String(j)) + } + return &o, nil +} + // Check stringerface is satisfied -var _ richComparison = String("") -var _ sequenceArithmetic = String("") -var _ I__mod__ = String("") -var _ I__rmod__ = String("") -var _ I__imod__ = String("") -var _ I__len__ = String("") -var _ I__bool__ = String("") -var _ I__getitem__ = String("") -var _ I__contains__ = String("") +var ( + _ richComparison = String("") + _ sequenceArithmetic = String("") + _ I__mod__ = String("") + _ I__rmod__ = String("") + _ I__imod__ = String("") + _ I__len__ = String("") + _ I__bool__ = String("") + _ I__getitem__ = String("") + _ I__contains__ = String("") +) From 759557527b62840733c6aaeb197a75e8d39a90ec Mon Sep 17 00:00:00 2001 From: Sebastien Binet Date: Wed, 23 Mar 2022 17:35:16 +0100 Subject: [PATCH 3/3] stdlib/string: first import --- stdlib/stdlib.go | 1 + stdlib/string/string.go | 117 +++++++++++++++++++++++++ stdlib/string/string_test.go | 15 ++++ stdlib/string/testdata/test.py | 32 +++++++ stdlib/string/testdata/test_golden.txt | 28 ++++++ 5 files changed, 193 insertions(+) create mode 100644 stdlib/string/string.go create mode 100644 stdlib/string/string_test.go create mode 100644 stdlib/string/testdata/test.py create mode 100644 stdlib/string/testdata/test_golden.txt diff --git a/stdlib/stdlib.go b/stdlib/stdlib.go index 7b46d391..ebe23d06 100644 --- a/stdlib/stdlib.go +++ b/stdlib/stdlib.go @@ -20,6 +20,7 @@ import ( _ "github.com/go-python/gpython/stdlib/builtin" _ "github.com/go-python/gpython/stdlib/math" + _ "github.com/go-python/gpython/stdlib/string" _ "github.com/go-python/gpython/stdlib/sys" _ "github.com/go-python/gpython/stdlib/time" ) diff --git a/stdlib/string/string.go b/stdlib/string/string.go new file mode 100644 index 00000000..314fdd09 --- /dev/null +++ b/stdlib/string/string.go @@ -0,0 +1,117 @@ +// Copyright 2022 The go-python Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package string provides the implementation of the python's 'string' module. +package string + +import ( + "strings" + + "github.com/go-python/gpython/py" +) + +func init() { + py.RegisterModule(&py.ModuleImpl{ + Info: py.ModuleInfo{ + Name: "string", + Doc: module_doc, + }, + Methods: []*py.Method{ + py.MustNewMethod("capwords", capwords, 0, capwords_doc), + }, + Globals: py.StringDict{ + "whitespace": whitespace, + "ascii_lowercase": ascii_lowercase, + "ascii_uppercase": ascii_uppercase, + "ascii_letters": ascii_letters, + "digits": digits, + "hexdigits": hexdigits, + "octdigits": octdigits, + "punctuation": punctuation, + "printable": printable, + }, + }) +} + +const module_doc = `A collection of string constants. + +Public module variables: + +whitespace -- a string containing all ASCII whitespace +ascii_lowercase -- a string containing all ASCII lowercase letters +ascii_uppercase -- a string containing all ASCII uppercase letters +ascii_letters -- a string containing all ASCII letters +digits -- a string containing all ASCII decimal digits +hexdigits -- a string containing all ASCII hexadecimal digits +octdigits -- a string containing all ASCII octal digits +punctuation -- a string containing all ASCII punctuation characters +printable -- a string containing all ASCII characters considered printable +` + +var ( + whitespace = py.String(" \t\n\r\x0b\x0c") + ascii_lowercase = py.String("abcdefghijklmnopqrstuvwxyz") + ascii_uppercase = py.String("ABCDEFGHIJKLMNOPQRSTUVWXYZ") + ascii_letters = ascii_lowercase + ascii_uppercase + digits = py.String("0123456789") + hexdigits = py.String("0123456789abcdefABCDEF") + octdigits = py.String("01234567") + punctuation = py.String("!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~") + printable = py.String("0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c") +) + +const capwords_doc = `capwords(s [,sep]) -> string + +Split the argument into words using split, capitalize each +word using capitalize, and join the capitalized words using +join. If the optional second argument sep is absent or None, +runs of whitespace characters are replaced by a single space +and leading and trailing whitespace are removed, otherwise +sep is used to split and join the words.` + +func capwords(self py.Object, args py.Tuple, kwargs py.StringDict) (py.Object, error) { + var ( + pystr py.Object + pysep py.Object = py.None + ) + err := py.ParseTupleAndKeywords(args, kwargs, "s|z", []string{"s", "sep"}, &pystr, &pysep) + if err != nil { + return nil, err + } + + pystr = py.String(strings.ToLower(string(pystr.(py.String)))) + pyvs, err := pystr.(py.String).Split(py.Tuple{pysep}, nil) + if err != nil { + return nil, err + } + + var ( + lst = pyvs.(*py.List).Items + vs = make([]string, len(lst)) + sep = "" + title = func(s string) string { + if s == "" { + return s + } + return strings.ToUpper(s[:1]) + s[1:] + } + ) + + switch pysep { + case py.None: + for i := range vs { + v := string(lst[i].(py.String)) + vs[i] = title(strings.Trim(v, string(whitespace))) + } + sep = " " + default: + sep = string(pysep.(py.String)) + for i := range vs { + v := string(lst[i].(py.String)) + vs[i] = title(v) + } + } + + return py.String(strings.Join(vs, sep)), nil +} diff --git a/stdlib/string/string_test.go b/stdlib/string/string_test.go new file mode 100644 index 00000000..0ae79ef5 --- /dev/null +++ b/stdlib/string/string_test.go @@ -0,0 +1,15 @@ +// Copyright 2022 The go-python Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package string_test + +import ( + "testing" + + "github.com/go-python/gpython/pytest" +) + +func TestString(t *testing.T) { + pytest.RunScript(t, "./testdata/test.py") +} diff --git a/stdlib/string/testdata/test.py b/stdlib/string/testdata/test.py new file mode 100644 index 00000000..2bec0736 --- /dev/null +++ b/stdlib/string/testdata/test.py @@ -0,0 +1,32 @@ +# Copyright 2022 The go-python Authors. All rights reserved. +# Use of this source code is governed by a BSD-style +# license that can be found in the LICENSE file. + +import string + +print("globals:") +for name in ("whitespace", + "ascii_lowercase", + "ascii_uppercase", + "ascii_letters", + "digits", + "hexdigits", + "octdigits", + "punctuation", + "printable"): + v = getattr(string, name) + print("\nstring.%s:\n%s" % (name,repr(v))) + +def assertEqual(x, y): + assert x == y, "got: %s, want: %s" % (repr(x), repr(y)) + +assertEqual(string.capwords('abc def ghi'), 'Abc Def Ghi') +assertEqual(string.capwords('abc\tdef\nghi'), 'Abc Def Ghi') +assertEqual(string.capwords('abc\t def \nghi'), 'Abc Def Ghi') +assertEqual(string.capwords('ABC DEF GHI'), 'Abc Def Ghi') +assertEqual(string.capwords('ABC-DEF-GHI', '-'), 'Abc-Def-Ghi') +assertEqual(string.capwords('ABC-def DEF-ghi GHI'), 'Abc-def Def-ghi Ghi') +assertEqual(string.capwords(' aBc DeF '), 'Abc Def') +assertEqual(string.capwords('\taBc\tDeF\t'), 'Abc Def') +assertEqual(string.capwords('\taBc\tDeF\t', '\t'), '\tAbc\tDef\t') + diff --git a/stdlib/string/testdata/test_golden.txt b/stdlib/string/testdata/test_golden.txt new file mode 100644 index 00000000..95a3e2ef --- /dev/null +++ b/stdlib/string/testdata/test_golden.txt @@ -0,0 +1,28 @@ +globals: + +string.whitespace: +' \t\n\r\x0b\x0c' + +string.ascii_lowercase: +'abcdefghijklmnopqrstuvwxyz' + +string.ascii_uppercase: +'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + +string.ascii_letters: +'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ' + +string.digits: +'0123456789' + +string.hexdigits: +'0123456789abcdefABCDEF' + +string.octdigits: +'01234567' + +string.punctuation: +'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~' + +string.printable: +'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'