PYTHON-1742 add postBatchResumeToken support

prashantmital · prashantmital · commit ddac30d2ffb8 · 2019-07-25T17:18:35.000-07:00
PYTHON-1815 add tests for postBatchResumeToken support PYTHON-1845 clarify resume token used in resuming and getResumeToken
diff --git a/doc/changelog.rst b/doc/changelog.rst
@@ -62,6 +62,8 @@ Version 3.9 adds support for MongoDB 4.2. Highlights include:
   :meth:`~pymongo.operations.UpdateMany`.
 - :class:`~bson.binary.Binary` now supports any bytes-like type that implements
   the buffer protocol.
+- Resume tokens can now be accessed from a ``ChangeStream`` cursor using the
+  :attr:`~pymongo.change_stream.ChangeStream.resume_token` attribute.
 
 .. _URI options specification: https://github.com/mongodb/specifications/blob/master/source/uri-options/uri-options.rst
 
diff --git a/pymongo/change_stream.py b/pymongo/change_stream.py
@@ -77,13 +77,16 @@ def __init__(self, target, pipeline, full_document, resume_after,
 
         self._pipeline = copy.deepcopy(pipeline)
         self._full_document = full_document
-        self._resume_token = copy.deepcopy(resume_after)
+        self._uses_start_after = start_after is not None
+        self._uses_resume_after = resume_after is not None
+        self._resume_token = copy.deepcopy(start_after or resume_after)
         self._max_await_time_ms = max_await_time_ms
         self._batch_size = batch_size
         self._collation = collation
         self._start_at_operation_time = start_at_operation_time
         self._session = session
-        self._start_after = copy.deepcopy(start_after)
+
+        # Initialize cursor.
         self._cursor = self._create_cursor()
 
     @property
@@ -102,10 +105,14 @@ def _change_stream_options(self):
         options = {}
         if self._full_document is not None:
             options['fullDocument'] = self._full_document
-        if self._resume_token is not None:
-            options['resumeAfter'] = self._resume_token
-        if self._start_after is not None:
-            options['startAfter'] = self._start_after
+
+        resume_token = self.resume_token
+        if resume_token is not None:
+            if self._uses_start_after:
+                options['startAfter'] = resume_token
+            if self._uses_resume_after:
+                options['resumeAfter'] = resume_token
+
         if self._start_at_operation_time is not None:
             options['startAtOperationTime'] = self._start_at_operation_time
         return options
@@ -127,12 +134,18 @@ def _aggregation_pipeline(self):
         return full_pipeline
 
     def _process_result(self, result, session, server, sock_info, slave_ok):
-        """Callback that records a change stream cursor's operationTime."""
-        if (self._start_at_operation_time is None and
-                self._resume_token is None and
-                self._start_after is None and
-                sock_info.max_wire_version >= 7):
-            self._start_at_operation_time = result["operationTime"]
+        """Callback that caches the startAtOperationTime from a changeStream
+        aggregate command response containing an empty batch of change
+        documents.
+
+        This is implemented as a callback because we need access to the wire
+        version in order to determine whether to cache this value.
+        """
+        if not result['cursor']['firstBatch']:
+            if (self._start_at_operation_time is None and
+                    self.resume_token is None and
+                    sock_info.max_wire_version >= 7):
+                self._start_at_operation_time = result["operationTime"]
 
     def _run_aggregation_cmd(self, session, explicit_session):
         """Run the full aggregation pipeline for this ChangeStream and return
@@ -168,6 +181,15 @@ def close(self):
     def __iter__(self):
         return self
 
+    @property
+    def resume_token(self):
+        """The cached resume token that will be used to resume after the most
+        recently returned change.
+
+        .. versionadded:: 3.9
+        """
+        return copy.deepcopy(self._resume_token)
+
     def next(self):
         """Advance the cursor.
 
@@ -249,20 +271,39 @@ def try_next(self):
             self._resume()
             change = self._cursor._try_next(False)
 
-        # No changes are available.
+        # If no changes are available.
         if change is None:
-            return None
-
+            # We have either iterated over all documents in the cursor,
+            # OR the most-recently returned batch is empty. In either case,
+            # update the cached resume token with the postBatchResumeToken if
+            # one was returned. We also clear the startAtOperationTime.
+            if self._cursor._post_batch_resume_token is not None:
+                self._resume_token = self._cursor._post_batch_resume_token
+                self._start_at_operation_time = None
+            return change
+
+        # Else, changes are available.
         try:
             resume_token = change['_id']
         except KeyError:
             self.close()
             raise InvalidOperation(
                 "Cannot provide resume functionality when the resume "
                 "token is missing.")
-        self._resume_token = copy.copy(resume_token)
+
+        # If this is the last change document from the current batch, cache the
+        # postBatchResumeToken.
+        if (not self._cursor._has_next() and
+                self._cursor._post_batch_resume_token):
+            resume_token = self._cursor._post_batch_resume_token
+
+        # Hereafter, don't use startAfter; instead use resumeAfter.
+        self._uses_start_after = False
+        self._uses_resume_after = True
+
+        # Cache the resume token and clear startAtOperationTime.
+        self._resume_token = resume_token
         self._start_at_operation_time = None
-        self._start_after = None
 
         if self._decode_custom:
             return _bson_to_dict(change.raw, self._orig_codec_options)
diff --git a/pymongo/command_cursor.py b/pymongo/command_cursor.py
@@ -14,18 +14,14 @@
 
 """CommandCursor class to iterate over command results."""
 
-import datetime
-
 from collections import deque
 
 from bson.py3compat import integer_types
-from pymongo import helpers
 from pymongo.errors import (ConnectionFailure,
                             InvalidOperation,
                             NotMasterError,
                             OperationFailure)
-from pymongo.message import (_convert_exception,
-                             _CursorAddress,
+from pymongo.message import (_CursorAddress,
                              _GetMore,
                              _RawBatchGetMore)
 
@@ -43,8 +39,9 @@ def __init__(self, collection, cursor_info, address, retrieved=0,
         """
         self.__collection = collection
         self.__id = cursor_info['id']
-        self.__address = address
         self.__data = deque(cursor_info['firstBatch'])
+        self.__postbatchresumetoken = cursor_info.get('postBatchResumeToken')
+        self.__address = address
         self.__batch_size = batch_size
         self.__max_await_time_ms = max_await_time_ms
         self.__session = session
@@ -119,6 +116,17 @@ def batch_size(self, batch_size):
         self.__batch_size = batch_size == 1 and 2 or batch_size
         return self
 
+    def _has_next(self):
+        """Returns `True` if the cursor has documents remaining from the
+        previous batch."""
+        return len(self.__data) > 0
+
+    @property
+    def _post_batch_resume_token(self):
+        """Retrieve the postBatchResumeToken from the response to a
+        changeStream aggregate or getMore."""
+        return self.__postbatchresumetoken
+
     def __send_message(self, operation):
         """Send a getmore message and handle the response.
         """
@@ -157,6 +165,7 @@ def kill():
         if from_command:
             cursor = docs[0]['cursor']
             documents = cursor['nextBatch']
+            self.__postbatchresumetoken = cursor.get('postBatchResumeToken')
             self.__id = cursor['id']
         else:
             documents = docs
diff --git a/test/test_change_stream.py b/test/test_change_stream.py