Skip to content

Commit 8833877

Browse files
authored
Adding /_api/import (#61)
1 parent 325c4e0 commit 8833877

File tree

3 files changed

+153
-0
lines changed

3 files changed

+153
-0
lines changed

arangoasync/collection.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1578,6 +1578,108 @@ def response_handler(
15781578

15791579
return await self._executor.execute(request, response_handler)
15801580

1581+
async def import_bulk(
1582+
self,
1583+
documents: bytes | str,
1584+
doc_type: Optional[str] = None,
1585+
complete: Optional[bool] = True,
1586+
details: Optional[bool] = True,
1587+
from_prefix: Optional[str] = None,
1588+
to_prefix: Optional[str] = None,
1589+
overwrite: Optional[bool] = None,
1590+
overwrite_collection_prefix: Optional[bool] = None,
1591+
on_duplicate: Optional[str] = None,
1592+
wait_for_sync: Optional[bool] = None,
1593+
ignore_missing: Optional[bool] = None,
1594+
) -> Result[Json]:
1595+
"""Load JSON data in bulk into ArangoDB.
1596+
1597+
Args:
1598+
documents (bytes | str): String representation of the JSON data to import.
1599+
doc_type (str | None): Determines how the body of the request is interpreted.
1600+
Possible values: "", "documents", "array", "auto".
1601+
complete (bool | None): If set to `True`, the whole import fails if any error occurs.
1602+
Otherwise, the import continues even if some documents are invalid and cannot
1603+
be imported, skipping the problematic documents.
1604+
details (bool | None): If set to `True`, the result includes a `details`
1605+
attribute with information about documents that could not be imported.
1606+
from_prefix (str | None): String prefix prepended to the value of "_from"
1607+
field in each edge document inserted. For example, prefix "foo"
1608+
prepended to "_from": "bar" will result in "_from": "foo/bar".
1609+
Applies only to edge collections.
1610+
to_prefix (str | None): String prefix prepended to the value of "_to"
1611+
field in each edge document inserted. For example, prefix "foo"
1612+
prepended to "_to": "bar" will result in "_to": "foo/bar".
1613+
Applies only to edge collections.
1614+
overwrite (bool | None): If set to `True`, all existing documents are removed
1615+
prior to the import. Indexes are still preserved.
1616+
overwrite_collection_prefix (bool | None): Force the `fromPrefix` and
1617+
`toPrefix`, possibly replacing existing collection name prefixes.
1618+
on_duplicate (str | None): Action to take on unique key constraint violations
1619+
(for documents with "_key" fields). Allowed values are "error" (do
1620+
not import the new documents and count them as errors), "update"
1621+
(update the existing documents while preserving any fields missing
1622+
in the new ones), "replace" (replace the existing documents with
1623+
new ones), and "ignore" (do not import the new documents and count
1624+
them as ignored, as opposed to counting them as errors). Options
1625+
"update" and "replace" may fail on secondary unique key constraint
1626+
violations.
1627+
wait_for_sync (bool | None): Block until operation is synchronized to disk.
1628+
ignore_missing (bool | None): When importing JSON arrays of tabular data
1629+
(type parameter is omitted), the first line of the request body defines
1630+
the attribute keys and the subsequent lines the attribute values for each
1631+
document. Subsequent lines with a different number of elements than the
1632+
first line are not imported by default. You can enable this option to
1633+
import them anyway. For the missing elements, the document attributes
1634+
are omitted. Excess elements are ignored.
1635+
1636+
Returns:
1637+
dict: Result of the import operation.
1638+
1639+
Raises:
1640+
DocumentInsertError: If import fails.
1641+
1642+
References:
1643+
- `import-json-data-as-documents <https://docs.arangodb.com/stable/develop/http-api/import/#import-json-data-as-documents>`__
1644+
""" # noqa: E501
1645+
params: Params = dict()
1646+
params["collection"] = self.name
1647+
if doc_type is not None:
1648+
params["type"] = doc_type
1649+
if complete is not None:
1650+
params["complete"] = complete
1651+
if details is not None:
1652+
params["details"] = details
1653+
if from_prefix is not None:
1654+
params["fromPrefix"] = from_prefix
1655+
if to_prefix is not None:
1656+
params["toPrefix"] = to_prefix
1657+
if overwrite is not None:
1658+
params["overwrite"] = overwrite
1659+
if overwrite_collection_prefix is not None:
1660+
params["overwriteCollectionPrefix"] = overwrite_collection_prefix
1661+
if on_duplicate is not None:
1662+
params["onDuplicate"] = on_duplicate
1663+
if wait_for_sync is not None:
1664+
params["waitForSync"] = wait_for_sync
1665+
if ignore_missing is not None:
1666+
params["ignoreMissing"] = ignore_missing
1667+
1668+
def response_handler(resp: Response) -> Json:
1669+
if not resp.is_success:
1670+
raise DocumentInsertError(resp, request)
1671+
result: Json = self.deserializer.loads(resp.raw_body)
1672+
return result
1673+
1674+
request = Request(
1675+
method=Method.POST,
1676+
endpoint="/_api/import",
1677+
data=documents,
1678+
params=params,
1679+
)
1680+
1681+
return await self._executor.execute(request, response_handler)
1682+
15811683

15821684
class StandardCollection(Collection[T, U, V]):
15831685
"""Standard collection API wrapper.

docs/document.rst

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,39 @@ Standard documents are managed via collection API wrapper:
150150
# Delete one or more matching documents.
151151
await students.delete_match({"first": "Emma"})
152152
153+
Importing documents in bulk is faster when using specialized methods. Suppose
154+
our data is in a file containing JSON Lines (JSONL) format. Each line is expected
155+
to be one JSON object. Example of a "students.jsonl" file:
156+
157+
.. code-block:: json
158+
159+
{"_key":"john","name":"John Smith","age":35}
160+
{"_key":"katie","name":"Katie Foster","age":28}
161+
162+
To import this file into the "students" collection, we can use the `import_bulk` API:
163+
164+
.. code-block:: python
165+
166+
from arangoasync import ArangoClient
167+
from arangoasync.auth import Auth
168+
import aiofiles
169+
170+
async with ArangoClient(hosts="http://localhost:8529") as client:
171+
auth = Auth(username="root", password="passwd")
172+
173+
# Connect to "test" database as root user.
174+
db = await client.db("test", auth=auth)
175+
176+
# Get the API wrapper for "students" collection.
177+
students = db.collection("students")
178+
179+
# Read the JSONL file asynchronously.
180+
async with aiofiles.open('students.jsonl', mode='r') as f:
181+
documents = await f.read()
182+
183+
# Import documents in bulk.
184+
result = await students.import_bulk(documents, doc_type="documents")
185+
153186
You can manage documents via database API wrappers also, but only simple
154187
operations (i.e. get, insert, update, replace, delete) are supported and you
155188
must provide document IDs instead of keys:

tests/test_collection.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
CollectionStatisticsError,
1717
CollectionTruncateError,
1818
DocumentCountError,
19+
DocumentInsertError,
1920
IndexCreateError,
2021
IndexDeleteError,
2122
IndexGetError,
@@ -263,3 +264,20 @@ async def test_collection_truncate_count(docs, doc_col, bad_col):
263264
await doc_col.truncate(wait_for_sync=True, compact=True)
264265
cnt = await doc_col.count()
265266
assert cnt == 0
267+
268+
269+
@pytest.mark.asyncio
270+
async def test_collection_import_bulk(doc_col, bad_col, docs):
271+
documents = "\n".join(doc_col.serializer.dumps(doc) for doc in docs)
272+
273+
# Test errors
274+
with pytest.raises(DocumentInsertError):
275+
await bad_col.import_bulk(documents, doc_type="documents")
276+
277+
# Insert documents in bulk
278+
result = await doc_col.import_bulk(documents, doc_type="documents")
279+
280+
# Verify the documents were inserted
281+
count = await doc_col.count()
282+
assert count == len(docs)
283+
assert result["created"] == count

0 commit comments

Comments
 (0)