Skip to content

tests: add coverage for skip < offset case #165

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
May 18, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions google/cloud/datastore/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -593,8 +593,12 @@ def _next_page(self):
# skipped all of the results yet. Don't return any results.
# Instead, rerun query, adjusting offsets. Datastore doesn't process
# more than 1000 skipped results in a query.
old_query_pb = query_pb
query_pb = query_pb2.Query()
query_pb._pb.CopyFrom(old_query_pb._pb) # copy for testability
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@craiglabenz is this going to hit the bad path for perf and protoplus?

I have hesitation about making a product change to make it possible to test. Can you shed more light on this change @tseaver

Copy link
Contributor Author

@tseaver tseaver May 18, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For perf:

  • We are dealing with the raw protos here -- no protoplus attributes are touched, and CopyFrom is super-fast (implemented in C).
  • The copy will only be made once per page, in the edge case where we are skipping over more than a page of results.

For correctness:

  • Scribbling on the original query_pb is a puzzlement, and might actually defeat some forms of debugging (maybe even a full-retry of the query, if needed somehow?)

query_pb.start_cursor = response_pb.batch.skipped_cursor
query_pb.offset -= response_pb.batch.skipped_results

response_pb = self.client._datastore_api.run_query(
request={
"project_id": self._query.project,
Expand All @@ -604,6 +608,7 @@ def _next_page(self):
},
**kwargs,
)

entity_pbs = self._process_query_results(response_pb)
return page_iterator.Page(self, entity_pbs, self.item_to_value)

Expand Down
60 changes: 58 additions & 2 deletions tests/unit/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,6 +609,58 @@ def test__next_page_no_more(self):
self.assertIsNone(page)
ds_api.run_query.assert_not_called()

def test__next_page_w_skipped_lt_offset(self):
from google.api_core import page_iterator
from google.cloud.datastore_v1.types import datastore as datastore_pb2
from google.cloud.datastore_v1.types import entity as entity_pb2
from google.cloud.datastore_v1.types import query as query_pb2
from google.cloud.datastore.query import Query

project = "prujekt"
skipped_1 = 100
skipped_cursor_1 = b"DEADBEEF"
skipped_2 = 50
skipped_cursor_2 = b"FACEDACE"

more_enum = query_pb2.QueryResultBatch.MoreResultsType.NOT_FINISHED

result_1 = _make_query_response([], b"", more_enum, skipped_1)
result_1.batch.skipped_cursor = skipped_cursor_1
result_2 = _make_query_response([], b"", more_enum, skipped_2)
result_2.batch.skipped_cursor = skipped_cursor_2

ds_api = _make_datastore_api(result_1, result_2)
client = _Client(project, datastore_api=ds_api)

query = Query(client)
offset = 150
iterator = self._make_one(query, client, offset=offset)

page = iterator._next_page()

self.assertIsInstance(page, page_iterator.Page)
self.assertIs(page._parent, iterator)

partition_id = entity_pb2.PartitionId(project_id=project)
read_options = datastore_pb2.ReadOptions()

query_1 = query_pb2.Query(offset=offset)
query_2 = query_pb2.Query(
start_cursor=skipped_cursor_1, offset=(offset - skipped_1)
)
expected_calls = [
mock.call(
request={
"project_id": project,
"partition_id": partition_id,
"read_options": read_options,
"query": query,
}
)
for query in [query_1, query_2]
]
self.assertEqual(ds_api.run_query.call_args_list, expected_calls)


class Test__item_to_entity(unittest.TestCase):
def _call_fut(self, iterator, entity_pb):
Expand Down Expand Up @@ -789,6 +841,10 @@ def _make_query_response(
)


def _make_datastore_api(result=None):
run_query = mock.Mock(return_value=result, spec=[])
def _make_datastore_api(*results):
if len(results) == 0:
run_query = mock.Mock(return_value=None, spec=[])
else:
run_query = mock.Mock(side_effect=results, spec=[])

return mock.Mock(run_query=run_query, spec=["run_query"])