Skip to content

Commit 8df1560

Browse files
Merge pull request #1517 from kluid/mod_csv
Add csv.reader options
2 parents 40783d1 + 25f6561 commit 8df1560

File tree

2 files changed

+96
-20
lines changed

2 files changed

+96
-20
lines changed

tests/snippets/test_csv.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
from testutils import assert_raises
2+
13
import csv
24

35
for row in csv.reader(['one,two,three']):
@@ -21,3 +23,23 @@ def f():
2123
assert six == 'six'
2224

2325
f()
26+
27+
def test_delim():
28+
iter = ['one|two|three', 'four|five|six']
29+
reader = csv.reader(iter, delimiter='|')
30+
31+
[one,two,three] = next(reader)
32+
[four,five,six] = next(reader)
33+
34+
assert one == 'one'
35+
assert two == 'two'
36+
assert three == 'three'
37+
assert four == 'four'
38+
assert five == 'five'
39+
assert six == 'six'
40+
41+
with assert_raises(TypeError):
42+
iter = ['one,,two,,three']
43+
csv.reader(iter, delimiter=',,')
44+
45+
test_delim()

vm/src/stdlib/csv.rs

Lines changed: 74 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@ use std::fmt::{self, Debug, Formatter};
44
use csv as rust_csv;
55
use itertools::join;
66

7+
use crate::function::PyFuncArgs;
8+
79
use crate::obj::objiter;
8-
use crate::obj::objstr::PyString;
10+
use crate::obj::objstr::{self, PyString};
911
use crate::obj::objtype::PyClassRef;
1012
use crate::pyobject::{IntoPyObject, TryFromObject, TypeProtocol};
1113
use crate::pyobject::{PyClassImpl, PyIterable, PyObjectRef, PyRef, PyResult, PyValue};
@@ -20,8 +22,58 @@ pub enum QuoteStyle {
2022
QuoteNone,
2123
}
2224

23-
pub fn build_reader(iterable: PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult {
24-
Reader::new(iterable).into_ref(vm).into_pyobject(vm)
25+
struct ReaderOption {
26+
delimiter: u8,
27+
quotechar: u8,
28+
}
29+
30+
impl ReaderOption {
31+
fn new(args: PyFuncArgs, vm: &VirtualMachine) -> PyResult<Self> {
32+
let delimiter = {
33+
let bytes = args
34+
.get_optional_kwarg("delimiter")
35+
.map_or(",".to_string(), |pyobj| objstr::get_value(&pyobj))
36+
.into_bytes();
37+
38+
match bytes.len() {
39+
1 => bytes[0],
40+
_ => {
41+
let msg = r#""delimiter" must be a 1-character string"#;
42+
return Err(vm.new_type_error(msg.to_string()));
43+
}
44+
}
45+
};
46+
47+
let quotechar = {
48+
let bytes = args
49+
.get_optional_kwarg("quotechar")
50+
.map_or("\"".to_string(), |pyobj| objstr::get_value(&pyobj))
51+
.into_bytes();
52+
53+
match bytes.len() {
54+
1 => bytes[0],
55+
_ => {
56+
let msg = r#""quotechar" must be a 1-character string"#;
57+
return Err(vm.new_type_error(msg.to_string()));
58+
}
59+
}
60+
};
61+
62+
Ok(ReaderOption {
63+
delimiter,
64+
quotechar,
65+
})
66+
}
67+
}
68+
69+
pub fn build_reader(
70+
iterable: PyIterable<PyObjectRef>,
71+
args: PyFuncArgs,
72+
vm: &VirtualMachine,
73+
) -> PyResult {
74+
let config = ReaderOption::new(args, vm)?;
75+
76+
Reader::new(iterable, config).into_ref(vm).into_pyobject(vm)
2577
}
2678

2779
fn into_strings(iterable: &PyIterable<PyObjectRef>, vm: &VirtualMachine) -> PyResult<Vec<String>> {
@@ -46,24 +98,26 @@ type MemIO = std::io::Cursor<Vec<u8>>;
4698

4799
#[allow(dead_code)]
48100
enum ReadState {
49-
PyIter(PyIterable<PyObjectRef>),
101+
PyIter(PyIterable<PyObjectRef>, ReaderOption),
50102
CsvIter(rust_csv::StringRecordsIntoIter<MemIO>),
51103
}
52104

53105
impl ReadState {
54-
fn new(iter: PyIterable) -> Self {
55-
ReadState::PyIter(iter)
106+
fn new(iter: PyIterable, config: ReaderOption) -> Self {
107+
ReadState::PyIter(iter, config)
56108
}
57109

58110
fn cast_to_reader(&mut self, vm: &VirtualMachine) -> PyResult<()> {
59-
if let ReadState::PyIter(ref iterable) = self {
111+
if let ReadState::PyIter(ref iterable, ref config) = self {
60112
let lines = into_strings(iterable, vm)?;
61113
let contents = join(lines, "\n");
62114

63115
let bytes = Vec::from(contents.as_bytes());
64116
let reader = MemIO::new(bytes);
65117

66118
let csv_iter = rust_csv::ReaderBuilder::new()
119+
.delimiter(config.delimiter)
120+
.quote(config.quotechar)
67121
.has_headers(false)
68122
.from_reader(reader)
69123
.into_records();
@@ -92,8 +146,8 @@ impl PyValue for Reader {
92146
}
93147

94148
impl Reader {
95-
fn new(iter: PyIterable<PyObjectRef>) -> Self {
96-
let state = RefCell::new(ReadState::new(iter));
149+
fn new(iter: PyIterable<PyObjectRef>, config: ReaderOption) -> Self {
150+
let state = RefCell::new(ReadState::new(iter, config));
97151
Reader { state }
98152
}
99153
}
@@ -121,7 +175,7 @@ impl Reader {
121175
.collect::<PyResult<Vec<_>>>()?;
122176
Ok(vm.ctx.new_list(iter))
123177
}
124-
Err(_) => {
178+
Err(_err) => {
125179
let msg = String::from("Decode Error");
126180
let decode_error = vm.new_unicode_decode_error(msg);
127181
Err(decode_error)
@@ -136,9 +190,9 @@ impl Reader {
136190
}
137191
}
138192

139-
fn csv_reader(fp: PyObjectRef, vm: &VirtualMachine) -> PyResult {
193+
fn csv_reader(fp: PyObjectRef, args: PyFuncArgs, vm: &VirtualMachine) -> PyResult {
140194
if let Ok(iterable) = PyIterable::<PyObjectRef>::try_from_object(vm, fp) {
141-
build_reader(iterable, vm)
195+
build_reader(iterable, args, vm)
142196
} else {
143197
Err(vm.new_type_error("argument 1 must be an iterator".to_string()))
144198
}
@@ -156,13 +210,13 @@ pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
156210
);
157211

158212
py_module!(vm, "_csv", {
159-
"reader" => ctx.new_rustfunc(csv_reader),
160-
"Reader" => reader_type,
161-
"Error" => error,
162-
// constants
163-
"QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::QuoteMinimal as i32),
164-
"QUOTE_ALL" => ctx.new_int(QuoteStyle::QuoteAll as i32),
165-
"QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::QuoteNonnumeric as i32),
166-
"QUOTE_NONE" => ctx.new_int(QuoteStyle::QuoteNone as i32),
213+
"reader" => ctx.new_rustfunc(csv_reader),
214+
"Reader" => reader_type,
215+
"Error" => error,
216+
// constants
217+
"QUOTE_MINIMAL" => ctx.new_int(QuoteStyle::QuoteMinimal as i32),
218+
"QUOTE_ALL" => ctx.new_int(QuoteStyle::QuoteAll as i32),
219+
"QUOTE_NONNUMERIC" => ctx.new_int(QuoteStyle::QuoteNonnumeric as i32),
220+
"QUOTE_NONE" => ctx.new_int(QuoteStyle::QuoteNone as i32),
167221
})
168222
}

0 commit comments

Comments
 (0)