@@ -41,6 +41,7 @@ def __init__(self, dataobj, header=None, extra=None, file_map=None):
4141 super (DataobjImage , self ).__init__ (header = header , extra = extra ,
4242 file_map = file_map )
4343 self ._dataobj = dataobj
44+ self ._fdata_cache = None
4445 self ._data_cache = None
4546
4647 @property
@@ -55,7 +56,19 @@ def _data(self):
5556 return self ._dataobj
5657
5758 def get_data (self , caching = 'fill' ):
58- """ Return image data from image with any necessary scalng applied
59+ """ Return image data from image with any necessary scaling applied
60+
61+ .. WARNING::
62+
63+ We recommend you use the ``get_fdata`` method instead of the
64+ ``get_data`` method, because it is easier to predict the return
65+ data type. We will deprecate the ``get_data`` method around April
66+ 2018, and remove it around April 2020.
67+
68+ If you don't care about the predictability of the return data type,
69+ and you want the minimum possible data size in memory, you can
70+ replicate the array that would be returned by ``img.get_data()`` by
71+ using ``np.asanyarray(img.dataobj)``.
5972
6073 The image ``dataobj`` property can be an array proxy or an array. An
6174 array proxy is an object that knows how to load the image data from
@@ -191,11 +204,155 @@ def get_data(self, caching='fill'):
191204 self ._data_cache = data
192205 return data
193206
207+ def get_fdata (self , caching = 'fill' , dtype = np .float64 ):
208+ """ Return floating point image data with necessary scaling applied
209+
210+ The image ``dataobj`` property can be an array proxy or an array. An
211+ array proxy is an object that knows how to load the image data from
212+ disk. An image with an array proxy ``dataobj`` is a *proxy image*; an
213+ image with an array in ``dataobj`` is an *array image*.
214+
215+ The default behavior for ``get_fdata()`` on a proxy image is to read
216+ the data from the proxy, and store in an internal cache. Future calls
217+ to ``get_fdata`` will return the cached array. This is the behavior
218+ selected with `caching` == "fill".
219+
220+ Once the data has been cached and returned from an array proxy, if you
221+ modify the returned array, you will also modify the cached array
222+ (because they are the same array). Regardless of the `caching` flag,
223+ this is always true of an array image.
224+
225+ Parameters
226+ ----------
227+ caching : {'fill', 'unchanged'}, optional
228+ See the Notes section for a detailed explanation. This argument
229+ specifies whether the image object should fill in an internal
230+ cached reference to the returned image data array. "fill" specifies
231+ that the image should fill an internal cached reference if
232+ currently empty. Future calls to ``get_fdata`` will return this
233+ cached reference. You might prefer "fill" to save the image object
234+ from having to reload the array data from disk on each call to
235+ ``get_fdata``. "unchanged" means that the image should not fill in
236+ the internal cached reference if the cache is currently empty. You
237+ might prefer "unchanged" to "fill" if you want to make sure that
238+ the call to ``get_fdata`` does not create an extra (cached)
239+ reference to the returned array. In this case it is easier for
240+ Python to free the memory from the returned array.
241+ dtype : numpy dtype specifier
242+ A numpy dtype specifier specifying a floating point type. Data is
243+ returned as this floating point type. Default is ``np.float64``.
244+
245+ Returns
246+ -------
247+ fdata : array
248+ Array of image data of data type `dtype`.
249+
250+ See also
251+ --------
252+ uncache: empty the array data cache
253+
254+ Notes
255+ -----
256+ All images have a property ``dataobj`` that represents the image array
257+ data. Images that have been loaded from files usually do not load the
258+ array data from file immediately, in order to reduce image load time
259+ and memory use. For these images, ``dataobj`` is an *array proxy*; an
260+ object that knows how to load the image array data from file.
261+
262+ By default (`caching` == "fill"), when you call ``get_fdata`` on a
263+ proxy image, we load the array data from disk, store (cache) an
264+ internal reference to this array data, and return the array. The next
265+ time you call ``get_fdata``, you will get the cached reference to the
266+ array, so we don't have to load the array data from disk again.
267+
268+ Array images have a ``dataobj`` property that already refers to an
269+ array in memory, so there is no benefit to caching, and the `caching`
270+ keywords have no effect.
271+
272+ For proxy images, you may not want to fill the cache after reading the
273+ data from disk because the cache will hold onto the array memory until
274+ the image object is deleted, or you use the image ``uncache`` method.
275+ If you don't want to fill the cache, then always use
276+ ``get_fdata(caching='unchanged')``; in this case ``get_fdata`` will not
277+ fill the cache (store the reference to the array) if the cache is empty
278+ (no reference to the array). If the cache is full, "unchanged" leaves
279+ the cache full and returns the cached array reference.
280+
281+ The cache can effect the behavior of the image, because if the cache is
282+ full, or you have an array image, then modifying the returned array
283+ will modify the result of future calls to ``get_fdata()``. For example
284+ you might do this:
285+
286+ >>> import os
287+ >>> import nibabel as nib
288+ >>> from nibabel.testing import data_path
289+ >>> img_fname = os.path.join(data_path, 'example4d.nii.gz')
290+
291+ >>> img = nib.load(img_fname) # This is a proxy image
292+ >>> nib.is_proxy(img.dataobj)
293+ True
294+
295+ The array is not yet cached by a call to "get_fdata", so:
296+
297+ >>> img.in_memory
298+ False
299+
300+ After we call ``get_fdata`` using the default `caching` == 'fill', the
301+ cache contains a reference to the returned array ``data``:
302+
303+ >>> data = img.get_fdata()
304+ >>> img.in_memory
305+ True
306+
307+ We modify an element in the returned data array:
308+
309+ >>> data[0, 0, 0, 0]
310+ 0.0
311+ >>> data[0, 0, 0, 0] = 99
312+ >>> data[0, 0, 0, 0]
313+ 99.0
314+
315+ The next time we call 'get_fdata', the method returns the cached
316+ reference to the (modified) array:
317+
318+ >>> data_again = img.get_fdata()
319+ >>> data_again is data
320+ True
321+ >>> data_again[0, 0, 0, 0]
322+ 99.0
323+
324+ If you had *initially* used `caching` == 'unchanged' then the returned
325+ ``data`` array would have been loaded from file, but not cached, and:
326+
327+ >>> img = nib.load(img_fname) # a proxy image again
328+ >>> data = img.get_fdata(caching='unchanged')
329+ >>> img.in_memory
330+ False
331+ >>> data[0, 0, 0] = 99
332+ >>> data_again = img.get_fdata(caching='unchanged')
333+ >>> data_again is data
334+ False
335+ >>> data_again[0, 0, 0, 0]
336+ 0.0
337+ """
338+ if caching not in ('fill' , 'unchanged' ):
339+ raise ValueError ('caching value should be "fill" or "unchanged"' )
340+ dtype = np .dtype (dtype )
341+ if not issubclass (dtype .type , np .inexact ):
342+ raise ValueError ('{} should be floating point type' .format (dtype ))
343+ if self ._fdata_cache is not None :
344+ return self ._fdata_cache
345+ data = np .asanyarray (self ._dataobj ).astype (dtype )
346+ if caching == 'fill' :
347+ self ._fdata_cache = data
348+ return data
349+
194350 @property
195351 def in_memory (self ):
196- """ True when array data is in memory
352+ """ True when any array data is in memory cache
197353 """
198354 return (isinstance (self ._dataobj , np .ndarray ) or
355+ self ._fdata_cache is not None or
199356 self ._data_cache is not None )
200357
201358 def uncache (self ):
@@ -206,23 +363,24 @@ def uncache(self):
206363 * *array images* where the data ``img.dataobj`` is an array
207364 * *proxy images* where the data ``img.dataobj`` is a proxy object
208365
209- If you call ``img.get_data ()`` on a proxy image, the result of reading
366+ If you call ``img.get_fdata ()`` on a proxy image, the result of reading
210367 from the proxy gets cached inside the image object, and this cache is
211- what gets returned from the next call to ``img.get_data ()``. If you
368+ what gets returned from the next call to ``img.get_fdata ()``. If you
212369 modify the returned data, as in::
213370
214- data = img.get_data ()
371+ data = img.get_fdata ()
215372 data[:] = 42
216373
217- then the next call to ``img.get_data ()`` returns the modified array,
374+ then the next call to ``img.get_fdata ()`` returns the modified array,
218375 whether the image is an array image or a proxy image::
219376
220- assert np.all(img.get_data () == 42)
377+ assert np.all(img.get_fdata () == 42)
221378
222379 When you uncache an array image, this has no effect on the return of
223- ``img.get_data ()``, but when you uncache a proxy image, the result of
224- ``img.get_data ()`` returns to its original value.
380+ ``img.get_fdata ()``, but when you uncache a proxy image, the result of
381+ ``img.get_fdata ()`` returns to its original value.
225382 """
383+ self ._fdata_cache = None
226384 self ._data_cache = None
227385
228386 @property
0 commit comments