Skip to content

Commit d2edf0d

Browse files
authored
Fixes json parser when input is a stream (#27)
1 parent d462ea8 commit d2edf0d

File tree

3 files changed

+82
-0
lines changed

3 files changed

+82
-0
lines changed

_unittests/ut_df/data/classic.json

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
[{"name":"cudaGetDeviceCount",
2+
"ph":"X",
3+
"cat":"cuda",
4+
"ts":1634290065724226794,
5+
"dur":800,
6+
"tid":"Thread 2080429824: Runtime API",
7+
"pid":"[89792] Process",
8+
"args":{}},
9+
{"name":"_Z25full_reduce_tensor_kernelIfLi256ELi1ELi1ELi256EL21cudnnReduceTensorOp_t0EL21cudnnNanPropagation_t0EEv17cudnnTensorStructPjS3_PT_S5_S4_bii",
10+
"ph":"X",
11+
"cat":"cuda",
12+
"ts":1634290112071305413,
13+
"dur":1888,
14+
"tid":"_Z25full_reduce_tensor_kernelIfLi256ELi1ELi1ELi256EL21cudnnReduceTensorOp_t0EL21cudnnNanPropagation_t0EEv17cudnnTensorStructPjS3_PT_S5_S4_bii",
15+
"pid":"[0:1] Compute",
16+
"args":{"Grid size":"[ 1, 1, 1 ]",
17+
"Block size":"[ 256, 1, 1 ]"}},
18+
{"name":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
19+
"ph":"X",
20+
"cat":"cuda",
21+
"ts":1634290112071308133,
22+
"dur":1440,
23+
"tid":"Compute",
24+
"pid":"[0:1] Overview",
25+
"args":{"Grid size":"[ 1, 1, 1 ]",
26+
"Block size":"[ 1, 256, 1 ]"}},
27+
{"name":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
28+
"ph":"X",
29+
"cat":"cuda",
30+
"ts":1634290112071308133,
31+
"dur":1440,
32+
"tid":"_Z28op_tensor_kernel_alpha2_zeroILi3EfffLi1ELi256ELi1ELi1EL17cudnnOpTensorOp_t0EEv16alpha2_zero_argsIT0_T1_T2_E",
33+
"pid":"[0:1] Compute",
34+
"args":{"Grid size":"[ 1, 1, 1 ]",
35+
"Block size":"[ 1, 256, 1 ]"}}]

_unittests/ut_df/test_dataframe_io_helpers.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,51 @@ def test_bug_documentation(self):
285285
items.append(item)
286286
self.assertEqual(len(items), 2)
287287

288+
def test_read_json_classic(self):
289+
data = self.abs_path_join(__file__, 'data', 'classic.json')
290+
dfs = pandas.read_json(data, orient='records')
291+
dfs['ts2'] = dfs['ts'].apply(lambda t: t / 1e9)
292+
self.assertEqual(dfs.shape[1], 9)
293+
self.assertGreater(dfs.shape[0], 2)
294+
it = StreamingDataFrame.read_json(data)
295+
it['ts2'] = it['ts'].apply(lambda t: t / 1e9)
296+
h1 = it.to_df()
297+
h2 = it.to_df()
298+
self.assertNotEmpty(h1)
299+
self.assertNotEmpty(h2)
300+
self.assertEqualDataFrame(h1, h2)
301+
self.assertEqual(h1.shape[1], 9)
302+
303+
def test_read_json_classic_file(self):
304+
data = self.abs_path_join(__file__, 'data', 'classic.json')
305+
dfs = pandas.read_json(data, orient='records')
306+
self.assertEqual(dfs.shape[1], 8)
307+
self.assertGreater(dfs.shape[0], 2)
308+
with open(data, "r", encoding="utf-8") as f:
309+
it = StreamingDataFrame.read_json(f, orient='records')
310+
h1 = it.to_df()
311+
h2 = it.to_df()
312+
self.assertNotEmpty(h1)
313+
self.assertNotEmpty(h2)
314+
self.assertEqualDataFrame(h1, h2)
315+
self.assertEqual(h1.shape[1], 8)
316+
317+
def test_read_json_classic_file_formula(self):
318+
data = self.abs_path_join(__file__, 'data', 'classic.json')
319+
dfs = pandas.read_json(data, orient='records')
320+
dfs['ts2'] = dfs['ts'].apply(lambda t: t / 1e9)
321+
self.assertEqual(dfs.shape[1], 9)
322+
self.assertGreater(dfs.shape[0], 2)
323+
with open(data, "r", encoding="utf-8") as f:
324+
it = StreamingDataFrame.read_json(f)
325+
it['ts2'] = it['ts'].apply(lambda t: t / 1e9)
326+
h1 = it.to_df()
327+
h2 = it.to_df()
328+
self.assertNotEmpty(h1)
329+
self.assertNotEmpty(h2)
330+
self.assertEqualDataFrame(h1, h2)
331+
self.assertEqual(h1.shape[1], 9)
332+
288333

289334
if __name__ == "__main__":
290335
unittest.main()

pandas_streaming/df/dataframe_io_helpers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ def enumerate_json_items(filename, encoding=None, lines=False, flatten=False, fL
256256
encoding=encoding, lines=False, flatten=flatten, fLOG=fLOG):
257257
yield el
258258
else:
259+
if hasattr(filename, 'seek'):
260+
filename.seek(0)
259261
parser = ijson.parse(filename)
260262
current = None
261263
curkey = None

0 commit comments

Comments
 (0)