@@ -676,6 +676,7 @@ def test_empty_str_methods(self):
676676 tm .assert_series_equal (empty_str , empty .str .pad (42 ))
677677 tm .assert_series_equal (empty_str , empty .str .center (42 ))
678678 tm .assert_series_equal (empty_list , empty .str .split ('a' ))
679+ tm .assert_series_equal (empty_list , empty .str .rsplit ('a' ))
679680 tm .assert_series_equal (empty_list , empty .str .partition ('a' , expand = False ))
680681 tm .assert_series_equal (empty_list , empty .str .rpartition ('a' , expand = False ))
681682 tm .assert_series_equal (empty_str , empty .str .slice (stop = 1 ))
@@ -1212,15 +1213,15 @@ def test_split(self):
12121213 # mixed
12131214 mixed = Series (['a_b_c' , NA , 'd_e_f' , True , datetime .today (),
12141215 None , 1 , 2. ])
1215- rs = mixed .str .split ('_' )
1216- xp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
1216+ result = mixed .str .split ('_' )
1217+ exp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
12171218 NA , NA , NA ])
1218- tm .assert_isinstance (rs , Series )
1219- tm .assert_almost_equal (rs , xp )
1219+ tm .assert_isinstance (result , Series )
1220+ tm .assert_almost_equal (result , exp )
12201221
1221- rs = mixed .str .split ('_' , expand = False )
1222- tm .assert_isinstance (rs , Series )
1223- tm .assert_almost_equal (rs , xp )
1222+ result = mixed .str .split ('_' , expand = False )
1223+ tm .assert_isinstance (result , Series )
1224+ tm .assert_almost_equal (result , exp )
12241225
12251226 # unicode
12261227 values = Series ([u ('a_b_c' ), u ('c_d_e' ), NA , u ('f_g_h' )])
@@ -1234,12 +1235,75 @@ def test_split(self):
12341235 result = values .str .split ('_' , expand = False )
12351236 tm .assert_series_equal (result , exp )
12361237
1238+ # regex split
1239+ values = Series ([u ('a,b_c' ), u ('c_d,e' ), NA , u ('f,g,h' )])
1240+ result = values .str .split ('[,_]' )
1241+ exp = Series ([[u ('a' ), u ('b' ), u ('c' )],
1242+ [u ('c' ), u ('d' ), u ('e' )], NA ,
1243+ [u ('f' ), u ('g' ), u ('h' )]])
1244+ tm .assert_series_equal (result , exp )
1245+
1246+ def test_rsplit (self ):
1247+ values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
1248+ result = values .str .rsplit ('_' )
1249+ exp = Series ([['a' , 'b' , 'c' ], ['c' , 'd' , 'e' ], NA , ['f' , 'g' , 'h' ]])
1250+ tm .assert_series_equal (result , exp )
1251+
1252+ # more than one char
1253+ values = Series (['a__b__c' , 'c__d__e' , NA , 'f__g__h' ])
1254+ result = values .str .rsplit ('__' )
1255+ tm .assert_series_equal (result , exp )
1256+
1257+ result = values .str .rsplit ('__' , expand = False )
1258+ tm .assert_series_equal (result , exp )
1259+
1260+ # mixed
1261+ mixed = Series (['a_b_c' , NA , 'd_e_f' , True , datetime .today (),
1262+ None , 1 , 2. ])
1263+ result = mixed .str .rsplit ('_' )
1264+ exp = Series ([['a' , 'b' , 'c' ], NA , ['d' , 'e' , 'f' ], NA , NA ,
1265+ NA , NA , NA ])
1266+ tm .assert_isinstance (result , Series )
1267+ tm .assert_almost_equal (result , exp )
1268+
1269+ result = mixed .str .rsplit ('_' , expand = False )
1270+ tm .assert_isinstance (result , Series )
1271+ tm .assert_almost_equal (result , exp )
1272+
1273+ # unicode
1274+ values = Series ([u ('a_b_c' ), u ('c_d_e' ), NA , u ('f_g_h' )])
1275+ result = values .str .rsplit ('_' )
1276+ exp = Series ([[u ('a' ), u ('b' ), u ('c' )],
1277+ [u ('c' ), u ('d' ), u ('e' )], NA ,
1278+ [u ('f' ), u ('g' ), u ('h' )]])
1279+ tm .assert_series_equal (result , exp )
1280+
1281+ result = values .str .rsplit ('_' , expand = False )
1282+ tm .assert_series_equal (result , exp )
1283+
1284+ # regex split is not supported by rsplit
1285+ values = Series ([u ('a,b_c' ), u ('c_d,e' ), NA , u ('f,g,h' )])
1286+ result = values .str .rsplit ('[,_]' )
1287+ exp = Series ([[u ('a,b_c' )],
1288+ [u ('c_d,e' )],
1289+ NA ,
1290+ [u ('f,g,h' )]])
1291+ tm .assert_series_equal (result , exp )
1292+
1293+ # setting max number of splits, make sure it's from reverse
1294+ values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
1295+ result = values .str .rsplit ('_' , n = 1 )
1296+ exp = Series ([['a_b' , 'c' ], ['c_d' , 'e' ], NA , ['f_g' , 'h' ]])
1297+ tm .assert_series_equal (result , exp )
1298+
12371299 def test_split_noargs (self ):
12381300 # #1859
12391301 s = Series (['Wes McKinney' , 'Travis Oliphant' ])
1240-
12411302 result = s .str .split ()
1242- self .assertEqual (result [1 ], ['Travis' , 'Oliphant' ])
1303+ expected = ['Travis' , 'Oliphant' ]
1304+ self .assertEqual (result [1 ], expected )
1305+ result = s .str .rsplit ()
1306+ self .assertEqual (result [1 ], expected )
12431307
12441308 def test_split_maxsplit (self ):
12451309 # re.split 0, str.split -1
@@ -1348,6 +1412,55 @@ def test_split_to_multiindex_expand(self):
13481412 with tm .assertRaisesRegexp (ValueError , "expand must be" ):
13491413 idx .str .split ('_' , return_type = "some_invalid_type" )
13501414
1415+ def test_rsplit_to_dataframe_expand (self ):
1416+ s = Series (['nosplit' , 'alsonosplit' ])
1417+ result = s .str .rsplit ('_' , expand = True )
1418+ exp = DataFrame ({0 : Series (['nosplit' , 'alsonosplit' ])})
1419+ tm .assert_frame_equal (result , exp )
1420+
1421+ s = Series (['some_equal_splits' , 'with_no_nans' ])
1422+ result = s .str .rsplit ('_' , expand = True )
1423+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['equal' , 'no' ],
1424+ 2 : ['splits' , 'nans' ]})
1425+ tm .assert_frame_equal (result , exp )
1426+
1427+ result = s .str .rsplit ('_' , expand = True , n = 2 )
1428+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['equal' , 'no' ],
1429+ 2 : ['splits' , 'nans' ]})
1430+ tm .assert_frame_equal (result , exp )
1431+
1432+ result = s .str .rsplit ('_' , expand = True , n = 1 )
1433+ exp = DataFrame ({0 : ['some_equal' , 'with_no' ],
1434+ 1 : ['splits' , 'nans' ]})
1435+ tm .assert_frame_equal (result , exp )
1436+
1437+ s = Series (['some_splits' , 'with_index' ], index = ['preserve' , 'me' ])
1438+ result = s .str .rsplit ('_' , expand = True )
1439+ exp = DataFrame ({0 : ['some' , 'with' ], 1 : ['splits' , 'index' ]},
1440+ index = ['preserve' , 'me' ])
1441+ tm .assert_frame_equal (result , exp )
1442+
1443+ def test_rsplit_to_multiindex_expand (self ):
1444+ idx = Index (['nosplit' , 'alsonosplit' ])
1445+ result = idx .str .rsplit ('_' , expand = True )
1446+ exp = Index ([np .array (['nosplit' ]), np .array (['alsonosplit' ])])
1447+ tm .assert_index_equal (result , exp )
1448+ self .assertEqual (result .nlevels , 1 )
1449+
1450+ idx = Index (['some_equal_splits' , 'with_no_nans' ])
1451+ result = idx .str .rsplit ('_' , expand = True )
1452+ exp = MultiIndex .from_tuples ([('some' , 'equal' , 'splits' ),
1453+ ('with' , 'no' , 'nans' )])
1454+ tm .assert_index_equal (result , exp )
1455+ self .assertEqual (result .nlevels , 3 )
1456+
1457+ idx = Index (['some_equal_splits' , 'with_no_nans' ])
1458+ result = idx .str .rsplit ('_' , expand = True , n = 1 )
1459+ exp = MultiIndex .from_tuples ([('some_equal' , 'splits' ),
1460+ ('with_no' , 'nans' )])
1461+ tm .assert_index_equal (result , exp )
1462+ self .assertEqual (result .nlevels , 2 )
1463+
13511464 def test_partition_series (self ):
13521465 values = Series (['a_b_c' , 'c_d_e' , NA , 'f_g_h' ])
13531466
0 commit comments