@@ -116,7 +116,7 @@ def df_generator(df, chunksize=10 ** 6, logger=None):
116116 rows += chunksize
117117
118118
119- def cast_pandas (df , columns = None , copy_obj = None , ** kwargs ):
119+ def cast_pandas (df , columns = None , copy_obj = None , logger = None , ** kwargs ):
120120 """
121121 Pandas does not handle null values in integer or boolean fields out of the
122122 box, so cast fields that should be these types in the database to object
@@ -140,18 +140,27 @@ def cast_pandas(df, columns=None, copy_obj=None, **kwargs):
140140 fields changed to objects with None values for null
141141 """
142142
143+ logger = get_logger ("cast_pandas" )
144+
143145 if columns is None and copy_obj is None :
144146 raise ValueError ("One of columns or copy_obj must be supplied" )
145147
146148 columns = columns or copy_obj .table_obj .columns
147149 for col in columns :
148- if str (col .type ) in ["INTEGER" , "BIGINT" ]:
149- df [col .name ] = df [col .name ].apply (
150- lambda x : None if isna (x ) else int (x ), convert_dtype = False
151- )
152- elif str (col .type ) == "BOOLEAN" :
153- df [col .name ] = df [col .name ].apply (
154- lambda x : None if isna (x ) else bool (x ), convert_dtype = False
150+ try :
151+ if str (col .type ) in ["INTEGER" , "BIGINT" ]:
152+ df [col .name ] = df [col .name ].apply (
153+ lambda x : None if isna (x ) else int (x ), convert_dtype = False
154+ )
155+ elif str (col .type ) == "BOOLEAN" :
156+ df [col .name ] = df [col .name ].apply (
157+ lambda x : None if isna (x ) else bool (x ), convert_dtype = False
158+ )
159+ except KeyError :
160+ logger .warn (
161+ "Column {} not in DataFrame. Cannot coerce object type." .format (
162+ col .name
163+ )
155164 )
156165
157166 return df
0 commit comments