6363ArrowExtensionArrayT = TypeVar ("ArrowExtensionArrayT" , bound = "ArrowExtensionArray" )
6464
6565
66+ def to_pyarrow_type (
67+ dtype : ArrowDtype | pa .DataType | Dtype | None ,
68+ ) -> pa .DataType | None :
69+ """
70+ Convert dtype to a pyarrow type instance.
71+ """
72+ if isinstance (dtype , ArrowDtype ):
73+ pa_dtype = dtype .pyarrow_dtype
74+ elif isinstance (dtype , pa .DataType ):
75+ pa_dtype = dtype
76+ elif dtype :
77+ pa_dtype = pa .from_numpy_dtype (dtype )
78+ else :
79+ pa_dtype = None
80+ return pa_dtype
81+
82+
6683class ArrowExtensionArray (OpsMixin , ExtensionArray ):
6784 """
6885 Base class for ExtensionArray backed by Arrow ChunkedArray.
@@ -89,13 +106,7 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy=False):
89106 """
90107 Construct a new ExtensionArray from a sequence of scalars.
91108 """
92- if isinstance (dtype , ArrowDtype ):
93- pa_dtype = dtype .pyarrow_dtype
94- elif dtype :
95- pa_dtype = pa .from_numpy_dtype (dtype )
96- else :
97- pa_dtype = None
98-
109+ pa_dtype = to_pyarrow_type (dtype )
99110 if isinstance (scalars , cls ):
100111 data = scalars ._data
101112 if pa_dtype :
@@ -113,7 +124,40 @@ def _from_sequence_of_strings(
113124 """
114125 Construct a new ExtensionArray from a sequence of strings.
115126 """
116- return cls ._from_sequence (strings , dtype = dtype , copy = copy )
127+ pa_type = to_pyarrow_type (dtype )
128+ if pa .types .is_timestamp (pa_type ):
129+ from pandas .core .tools .datetimes import to_datetime
130+
131+ scalars = to_datetime (strings , errors = "raise" )
132+ elif pa .types .is_date (pa_type ):
133+ from pandas .core .tools .datetimes import to_datetime
134+
135+ scalars = to_datetime (strings , errors = "raise" ).date
136+ elif pa .types .is_duration (pa_type ):
137+ from pandas .core .tools .timedeltas import to_timedelta
138+
139+ scalars = to_timedelta (strings , errors = "raise" )
140+ elif pa .types .is_time (pa_type ):
141+ from pandas .core .tools .times import to_time
142+
143+ # "coerce" to allow "null times" (None) to not raise
144+ scalars = to_time (strings , errors = "coerce" )
145+ elif pa .types .is_boolean (pa_type ):
146+ from pandas .core .arrays import BooleanArray
147+
148+ scalars = BooleanArray ._from_sequence_of_strings (strings ).to_numpy ()
149+ elif (
150+ pa .types .is_integer (pa_type )
151+ or pa .types .is_floating (pa_type )
152+ or pa .types .is_decimal (pa_type )
153+ ):
154+ from pandas .core .tools .numeric import to_numeric
155+
156+ scalars = to_numeric (strings , errors = "raise" )
157+ else :
158+ # Let pyarrow try to infer or raise
159+ scalars = strings
160+ return cls ._from_sequence (scalars , dtype = pa_type , copy = copy )
117161
118162 def __getitem__ (self , item : PositionalIndexer ):
119163 """Select a subset of self.
0 commit comments