|
2 | 2 | Tests dtype specification during parsing |
3 | 3 | for all of the parsers defined in parsers.py |
4 | 4 | """ |
| 5 | +from collections import defaultdict |
5 | 6 | from io import StringIO |
6 | 7 |
|
7 | 8 | import numpy as np |
@@ -343,3 +344,40 @@ def test_nullable_int_dtype(all_parsers, any_int_ea_dtype): |
343 | 344 | ) |
344 | 345 | actual = parser.read_csv(StringIO(data), dtype=dtype) |
345 | 346 | tm.assert_frame_equal(actual, expected) |
| 347 | + |
| 348 | + |
| 349 | +@pytest.mark.parametrize("default", ["float", "float64"]) |
| 350 | +def test_dtypes_defaultdict(all_parsers, default): |
| 351 | + # GH#41574 |
| 352 | + data = """a,b |
| 353 | +1,2 |
| 354 | +""" |
| 355 | + dtype = defaultdict(lambda: default, a="int64") |
| 356 | + parser = all_parsers |
| 357 | + result = parser.read_csv(StringIO(data), dtype=dtype) |
| 358 | + expected = DataFrame({"a": [1], "b": 2.0}) |
| 359 | + tm.assert_frame_equal(result, expected) |
| 360 | + |
| 361 | + |
| 362 | +def test_dtypes_defaultdict_mangle_dup_cols(all_parsers): |
| 363 | + # GH#41574 |
| 364 | + data = """a,b,a,b,b.1 |
| 365 | +1,2,3,4,5 |
| 366 | +""" |
| 367 | + dtype = defaultdict(lambda: "float64", a="int64") |
| 368 | + dtype["b.1"] = "int64" |
| 369 | + parser = all_parsers |
| 370 | + result = parser.read_csv(StringIO(data), dtype=dtype) |
| 371 | + expected = DataFrame({"a": [1], "b": [2.0], "a.1": [3], "b.2": [4.0], "b.1": [5]}) |
| 372 | + tm.assert_frame_equal(result, expected) |
| 373 | + |
| 374 | + |
| 375 | +def test_dtypes_defaultdict_invalid(all_parsers): |
| 376 | + # GH#41574 |
| 377 | + data = """a,b |
| 378 | +1,2 |
| 379 | +""" |
| 380 | + dtype = defaultdict(lambda: "invalid_dtype", a="int64") |
| 381 | + parser = all_parsers |
| 382 | + with pytest.raises(TypeError, match="not understood"): |
| 383 | + parser.read_csv(StringIO(data), dtype=dtype) |
0 commit comments