diff --git a/notebooks/ArangoDB_Graphistry_Visualization.ipynb b/notebooks/ArangoDB_Graphistry_Visualization.ipynb index be9896c..a247a59 100644 --- a/notebooks/ArangoDB_Graphistry_Visualization.ipynb +++ b/notebooks/ArangoDB_Graphistry_Visualization.ipynb @@ -1,531 +1,421 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "gL2nCxLApYqx" + }, + "source": [ + "![arangodb](https://github.com/arangodb/interactive_tutorials/blob/master/notebooks/img/ArangoDB_logo.png?raw=1)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JwueEvyppkEW" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "xQdRdCV6zoBh" + }, + "source": [ + "**ArangoDB with Graphistry**\n", + "\n", + "We explore the fraud detection data in ArangoDB to show how Arango's graph support interops with Graphistry pretty quickly.\n", + "\n", + "This tutorial shares two sample transforms:\n", + "\n", + "* Visualize the full graph\n", + "* Visualize the result of a traversal query\n", + "\n", + "Each runs an AQL query via python-arango, automatically converts to pandas, and plots with graphistry." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "a5dI-knm0g9y" + }, + "source": [ + "Setup" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "nSoiCZdonwYj" + }, + "outputs": [], + "source": [ + "%%capture\n", + "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", + "!git clone -b 2.0.0 --single-branch https://github.com/arangoml/networkx-adapter.git\n", + "!rsync -av networkx-adapter/examples/ ./ --exclude=.git\n", + "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", + "!pip3 install adbnx_adapter==2.0.0\n", + "!pip3 install matplotlib\n", + "!pip3 install pyArango\n", + "!pip3 install --user graphistry\n", + "!pip install jsonlines" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { "colab": { - "name": "graphistry.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "name": "python3", - "display_name": "Python 3" + "base_uri": "https://localhost:8080/" }, - "language_info": { - "name": "python" - } + "id": "UbTGRVQ_dPSU", + "outputId": "9de45216-e7b0-495e-c0ae-87aff6e6cdbb" + }, + "outputs": [], + "source": [ + "## The runtime must restart on Colab for graphistry to work\n", + "print(\"The runtime must restart due to graphistry\")\n", + "exit()" + ] }, - "cells": [ - { - "cell_type": "markdown", - "metadata": { - "id": "xQdRdCV6zoBh" - }, - "source": [ - "**ArangoDB with Graphistry**\n", - "\n", - "We explore the fraud detection data in ArangoDB to show how Arango's graph support interops with Graphistry pretty quickly.\n", - "\n", - "This tutorial shares two sample transforms:\n", - "\n", - "* Visualize the full graph\n", - "* Visualize the result of a traversal query\n", - "\n", - "Each runs an AQL query via python-arango, automatically converts to pandas, and plots with graphistry." - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "a5dI-knm0g9y" - }, - "source": [ - "Setup" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "nSoiCZdonwYj" - }, - "source": [ - "%%capture\n", - "!git clone -b oasis_connector --single-branch https://github.com/arangodb/interactive_tutorials.git\n", - "!git clone -b 2.0.0 --single-branch https://github.com/arangoml/networkx-adapter.git\n", - "!rsync -av networkx-adapter/examples/ ./ --exclude=.git\n", - "!rsync -av interactive_tutorials/ ./ --exclude=.git\n", - "!pip3 install adbnx_adapter==2.0.0\n", - "!pip3 install matplotlib\n", - "!pip3 install pyArango\n", - "!pip3 install --user graphistry\n", - "!pip install jsonlines" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "UbTGRVQ_dPSU", - "outputId": "9de45216-e7b0-495e-c0ae-87aff6e6cdbb" - }, - "source": [ - "## The runtime must restart on Colab for graphistry to work\n", - "print(\"The runtime must restart due to graphistry\")\n", - "exit()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "The runtime must restart due to graphistry\n" - ] - } - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "c2Y-FbFGn4su" - }, - "source": [ - "## NOTE: Notebook will intentionally exit, continue running from this point.\n", - "## On Colab: Click this code block and then CTRL/CMD + F10 or Runtime > Run After\n", - "print(\"NOTE: Notebook will intentionally exit, continue running from this point.\")\n", - "\n", - "import json\n", - "import oasis\n", - "import pandas as pd\n", - "import graphistry" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "0ExNiseM0u65" - }, - "source": [ - "**Create a Temporary ArangoDB Instance**" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "AbQTghrdoHQj", - "outputId": "fc83b4ad-9641-4589-cddc-6fb35224a014" - }, - "source": [ - "# Request temporary instance from the managed ArangoDB Cloud Oasis.\n", - "con = oasis.getTempCredentials(tutorialName=\"graphistry\")\n", - "\n", - "# Connect to the db via the python-arango driver\n", - "python_arango_db_driver = oasis.connect_python_arango(con)\n", - "\n", - "# (Alternative) Connect to the db via the pyArango driver\n", - "# pyarango_db_driver = oasis.connect(con)[con['dbName']]\n", - "\n", - "print()\n", - "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", - "print(\"Username: \" + con[\"username\"])\n", - "print(\"Password: \" + con[\"password\"])\n", - "print(\"Database: \" + con[\"dbName\"])" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Requesting new temp credentials.\n", - "Temp database ready to use.\n", - "\n", - "https://tutorials.arangodb.cloud:8529\n", - "Username: TUTnjb9ap9v2c0bhfdco1n2kt\n", - "Password: TUTu8bk4bz8p5bgtbkqr6t457\n", - "Database: TUT6j0dfbrpl28b2s9a3ugi4c\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "4-l2dM6LAH1x" - }, - "source": [ - "The following updates the data to no longer include hyphens as this is an issue when we want to display the data using graphify later on." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "6DkqkAXyrZu3" - }, - "source": [ - "!gunzip /content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json.gz" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "rdvGbnUw5Ikj" - }, - "source": [ - "import jsonlines\n", - "\n", - "data = []\n", - "\n", - "with jsonlines.open(\"/content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json\", \"r\") as reader:\n", - " for obj in reader:\n", - " obj['data']['Ssn'] = str(obj['data']['Ssn']).replace('-', '')\n", - " data.append(obj) \n", - "\n", - "with jsonlines.open(\"/content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json\", \"w\") as writer:\n", - " for obj in data:\n", - " writer.write(obj)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "code", - "metadata": { - "id": "3gIz-XqPsfCj" - }, - "source": [ - "!gzip /content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json " - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "1aRg96Ku1GWv" - }, - "source": [ - "**Load the fraud detection dataset**" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "3gMe9l6ooMvj", - "outputId": "6d432f7f-ddb4-4970-8320-1fb75d314b2d" - }, - "source": [ - "!chmod -R 755 ./tools\n", - "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --default-replication-factor 3 --input-directory \"/content/networkx-adapter/examples/data/fraud_dump\"\n" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "\u001b[0m2021-12-03T20:54:05Z [275] INFO [05c30] {restore} Connected to ArangoDB 'http+ssl://tutorials.arangodb.cloud:8529'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:06Z [275] INFO [abeb4] {restore} Database name in source dump is 'fraud-detection'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:06Z [275] INFO [9b414] {restore} # Re-creating document collection 'account'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:06Z [275] INFO [9b414] {restore} # Re-creating document collection 'bank'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:07Z [275] INFO [9b414] {restore} # Re-creating document collection 'branch'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:08Z [275] INFO [9b414] {restore} # Re-creating document collection 'Class'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:08Z [275] INFO [9b414] {restore} # Re-creating document collection 'customer'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:09Z [275] INFO [9b414] {restore} # Re-creating edge collection 'accountHolder'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:09Z [275] INFO [9b414] {restore} # Re-creating edge collection 'Relationship'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:10Z [275] INFO [9b414] {restore} # Re-creating edge collection 'transaction'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [f723c] {restore} # Creating views...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [d88c6] {restore} # Creating indexes for collection 'account'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6d69f] {restore} # Dispatched 8 job(s), using 2 worker(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into document collection 'bank', data size: 183 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored document collection 'bank'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into document collection 'branch', data size: 465 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into document collection 'account', data size: 1696 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored document collection 'branch'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into document collection 'Class', data size: 196 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored document collection 'account'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [d88c6] {restore} # Creating indexes for collection 'customer'...\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored document collection 'Class'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into edge collection 'accountHolder', data size: 1076 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into document collection 'customer', data size: 851 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored edge collection 'accountHolder'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into edge collection 'Relationship', data size: 275 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored document collection 'customer'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [94913] {restore} # Loading data into edge collection 'transaction', data size: 2292 byte(s)\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored edge collection 'Relationship'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [6ae09] {restore} # Successfully restored edge collection 'transaction'\n", - "\u001b[0m\u001b[0m2021-12-03T20:54:11Z [275] INFO [a66e1] {restore} Processed 8 collection(s) in 6.727011 s, read 50055 byte(s) from datafiles, sent 8 data batch(es) of 50047 byte(s) total size\n", - "\u001b[0m" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "G5hS_BBD1NKZ" - }, - "source": [ - "**Define the ArangoDB Named Graph**" - ] - }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/" - }, - "id": "4B2K80vEoSmt", - "outputId": "4ac6e23d-92ed-4c83-8d77-0eed32c955ca" - }, - "source": [ - "edge_definitions = [\n", - " {\n", - " \"edge_collection\": \"accountHolder\",\n", - " \"from_vertex_collections\": [\"customer\"],\n", - " \"to_vertex_collections\": [\"account\"],\n", - " },\n", - " {\n", - " \"edge_collection\": \"transaction\",\n", - " \"from_vertex_collections\": [\"account\"],\n", - " \"to_vertex_collections\": [\"account\"],\n", - " },\n", - "]\n", - "\n", - "name = \"fraud-detection\"\n", - "python_arango_db_driver.delete_graph(name, ignore_missing=True)\n", - "fraud_graph = python_arango_db_driver.create_graph(name, edge_definitions=edge_definitions)\n", - "\n", - "print(\"Graph Setup done.\")\n", - "print(fraud_graph)" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "stream", - "name": "stdout", - "text": [ - "Graph Setup done.\n", - "\n" - ] - } - ] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "KHFgzwo71aX0" - }, - "source": [ - "Define Graphistry Transformation Functions" - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "Vm_Vua9IpG6H" - }, - "source": [ - "def paths_to_graph(paths, source='_from', destination='_to', node='_id'):\n", - " nodes_df = pd.DataFrame()\n", - " edges_df = pd.DataFrame()\n", - " for graph in paths:\n", - " nodes_df = pd.concat([ nodes_df, pd.DataFrame(graph['vertices']) ], ignore_index=True)\n", - " edges_df = pd.concat([ edges_df, pd.DataFrame(graph['edges']) ], ignore_index=True)\n", - " nodes_df = nodes_df.drop_duplicates([node])\n", - " edges_df = edges_df.drop_duplicates([node])\n", - " return graphistry.bind(source=source, destination=destination, node=node).nodes(nodes_df).edges(edges_df)\n", - "\n", - "def graph_to_graphistry(graph, source='_from', destination='_to', node='_id'):\n", - " nodes_df = pd.DataFrame()\n", - " for vc_name in graph.vertex_collections():\n", - " nodes_df = pd.concat([nodes_df, pd.DataFrame([x for x in graph.vertex_collection(vc_name)])], ignore_index=True)\n", - " edges_df = pd.DataFrame()\n", - " for edge_def in graph.edge_definitions():\n", - " edges_df = pd.concat([edges_df, pd.DataFrame([x for x in graph.edge_collection(edge_def['edge_collection'])])], ignore_index=True)\n", - " return graphistry.bind(source=source, destination=destination, node=node).nodes(nodes_df).edges(edges_df)" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "DNDCa4PE1leD" - }, - "source": [ - "**Connect to Graphistry hub.graphistry.com server**\n", - "\n", - "You need to [set up an account on graphistry.com](https://hub.graphistry.com/?ref=_ptnr_graphistry_ste_core) and login in order to generate a temporary API key." - ] - }, - { - "cell_type": "code", - "metadata": { - "id": "bvn6O0uMp9Ok" - }, - "source": [ - "#login to hub.graphistry.com\n", - "graphistry.register(api=3, protocol=\"https\", server=\"hub.graphistry.com\", username=\"\", password=\"\")\n", - "#register your temprary API key\n", - "graphistry.register(key='', server='hub.graphistry.com') #https://www.graphistry.com/api-request\n" - ], - "execution_count": null, - "outputs": [] - }, - { - "cell_type": "markdown", - "metadata": { - "id": "NHM12eop3PLc" - }, - "source": [ - "**Demo 1: Traversal visualization**\n", - "\n", - "* Use python-arango's traverse() call to accounts and users connected to Betty Blue's account (account/10000016)\n", - "* Convert result paths to pandas and Graphistry\n", - "Plot, and \n", - "* instead of using raw Arango vertex IDs, use the Name" - ] + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "c2Y-FbFGn4su" + }, + "outputs": [], + "source": [ + "## NOTE: Notebook will intentionally exit, continue running from this point.\n", + "## On Colab: Click this code block and then CTRL/CMD + F10 or Runtime > Run After\n", + "print(\"NOTE: Notebook will intentionally exit, continue running from this point.\")\n", + "\n", + "import json\n", + "import oasis\n", + "import pandas as pd\n", + "import graphistry" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "0ExNiseM0u65" + }, + "source": [ + "**Create a Temporary ArangoDB Instance**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "metadata": { - "id": "t3tAiZfk2asO" - }, - "source": [ - "paths = python_arango_db_driver.graph('fraud-detection').traverse(start_vertex='account/10000016')['paths']\n" - ], - "execution_count": null, - "outputs": [] + "id": "AbQTghrdoHQj", + "outputId": "fc83b4ad-9641-4589-cddc-6fb35224a014" + }, + "outputs": [], + "source": [ + "# Request temporary instance from the managed ArangoDB Cloud Oasis.\n", + "con = oasis.getTempCredentials(tutorialName=\"graphistry\")\n", + "\n", + "# Connect to the db via the python-arango driver\n", + "python_arango_db_driver = oasis.connect_python_arango(con)\n", + "\n", + "# (Alternative) Connect to the db via the pyArango driver\n", + "# pyarango_db_driver = oasis.connect(con)[con['dbName']]\n", + "\n", + "print()\n", + "print(\"https://{}:{}\".format(con[\"hostname\"], con[\"port\"]))\n", + "print(\"Username: \" + con[\"username\"])\n", + "print(\"Password: \" + con[\"password\"])\n", + "print(\"Database: \" + con[\"dbName\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "4-l2dM6LAH1x" + }, + "source": [ + "The following updates the data to no longer include hyphens as this is an issue when we want to display the data using graphify later on." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "6DkqkAXyrZu3" + }, + "outputs": [], + "source": [ + "!gunzip /content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json.gz" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "rdvGbnUw5Ikj" + }, + "outputs": [], + "source": [ + "import jsonlines\n", + "\n", + "data = []\n", + "\n", + "with jsonlines.open(\"/content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json\", \"r\") as reader:\n", + " for obj in reader:\n", + " obj['data']['Ssn'] = str(obj['data']['Ssn']).replace('-', '')\n", + " data.append(obj) \n", + "\n", + "with jsonlines.open(\"/content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json\", \"w\") as writer:\n", + " for obj in data:\n", + " writer.write(obj)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3gIz-XqPsfCj" + }, + "outputs": [], + "source": [ + "!gzip /content/networkx-adapter/examples/data/fraud_dump/customer_91ec1f9324753048c0096d036a694f86.data.json " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "1aRg96Ku1GWv" + }, + "source": [ + "**Load the fraud detection dataset**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 543 - }, - "id": "-qb7r8rtDwRF", - "outputId": "16707645-1cd3-4d40-fe00-13bbe5322fa8" - }, - "source": [ - "g = paths_to_graph(paths)\n", - "g.bind(point_title='Name').plot()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 11 - } - ] + "id": "3gMe9l6ooMvj", + "outputId": "6d432f7f-ddb4-4970-8320-1fb75d314b2d" + }, + "outputs": [], + "source": [ + "!chmod -R 755 ./tools\n", + "!./tools/arangorestore -c none --server.endpoint http+ssl://{con[\"hostname\"]}:{con[\"port\"]} --server.username {con[\"username\"]} --server.database {con[\"dbName\"]} --server.password {con[\"password\"]} --default-replication-factor 3 --input-directory \"/content/networkx-adapter/examples/data/fraud_dump\"\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G5hS_BBD1NKZ" + }, + "source": [ + "**Define the ArangoDB Named Graph**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" }, - { - "cell_type": "markdown", - "metadata": { - "id": "hFn9VvD_7ABu" - }, - "source": [ - "**Demo 2: Full graph visualization**\n", - "\n", - "* Use python-arango on a graph to identify and download the involved vertex/edge collections\n", - "* Convert the results to pandas and Graphistry\n", - "* Plot, and instead of using raw Arango vertex IDs, use Name\n", - "\n" - ] + "id": "4B2K80vEoSmt", + "outputId": "4ac6e23d-92ed-4c83-8d77-0eed32c955ca" + }, + "outputs": [], + "source": [ + "edge_definitions = [\n", + " {\n", + " \"edge_collection\": \"accountHolder\",\n", + " \"from_vertex_collections\": [\"customer\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + " {\n", + " \"edge_collection\": \"transaction\",\n", + " \"from_vertex_collections\": [\"account\"],\n", + " \"to_vertex_collections\": [\"account\"],\n", + " },\n", + "]\n", + "\n", + "name = \"fraud-detection\"\n", + "python_arango_db_driver.delete_graph(name, ignore_missing=True)\n", + "fraud_graph = python_arango_db_driver.create_graph(name, edge_definitions=edge_definitions)\n", + "\n", + "print(\"Graph Setup done.\")\n", + "print(fraud_graph)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KHFgzwo71aX0" + }, + "source": [ + "Define Graphistry Transformation Functions" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Vm_Vua9IpG6H" + }, + "outputs": [], + "source": [ + "def paths_to_graph(paths, source='_from', destination='_to', node='_id'):\n", + " nodes_df = pd.DataFrame()\n", + " edges_df = pd.DataFrame()\n", + " for graph in paths:\n", + " nodes_df = pd.concat([ nodes_df, pd.DataFrame(graph['vertices']) ], ignore_index=True)\n", + " edges_df = pd.concat([ edges_df, pd.DataFrame(graph['edges']) ], ignore_index=True)\n", + " nodes_df = nodes_df.drop_duplicates([node])\n", + " edges_df = edges_df.drop_duplicates([node])\n", + " return graphistry.bind(source=source, destination=destination, node=node).nodes(nodes_df).edges(edges_df)\n", + "\n", + "def graph_to_graphistry(graph, source='_from', destination='_to', node='_id'):\n", + " nodes_df = pd.DataFrame()\n", + " for vc_name in graph.vertex_collections():\n", + " nodes_df = pd.concat([nodes_df, pd.DataFrame([x for x in graph.vertex_collection(vc_name)])], ignore_index=True)\n", + " edges_df = pd.DataFrame()\n", + " for edge_def in graph.edge_definitions():\n", + " edges_df = pd.concat([edges_df, pd.DataFrame([x for x in graph.edge_collection(edge_def['edge_collection'])])], ignore_index=True)\n", + " return graphistry.bind(source=source, destination=destination, node=node).nodes(nodes_df).edges(edges_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "DNDCa4PE1leD" + }, + "source": [ + "**Connect to Graphistry hub.graphistry.com server**\n", + "\n", + "You need to [set up an account on graphistry.com](https://hub.graphistry.com/?ref=_ptnr_graphistry_ste_core) and login in order to generate a temporary API key." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "bvn6O0uMp9Ok" + }, + "outputs": [], + "source": [ + "#login to hub.graphistry.com\n", + "graphistry.register(api=3, protocol=\"https\", server=\"hub.graphistry.com\", username=\"\", password=\"\")\n", + "#register your temprary API key\n", + "graphistry.register(key='', server='hub.graphistry.com') #https://www.graphistry.com/api-request\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NHM12eop3PLc" + }, + "source": [ + "**Demo 1: Traversal visualization**\n", + "\n", + "* Use python-arango's traverse() call to accounts and users connected to Betty Blue's account (account/10000016)\n", + "* Convert result paths to pandas and Graphistry\n", + "Plot, and \n", + "* instead of using raw Arango vertex IDs, use the Name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "t3tAiZfk2asO" + }, + "outputs": [], + "source": [ + "paths = python_arango_db_driver.graph('fraud-detection').traverse(start_vertex='account/10000016')['paths']\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 543 }, - { - "cell_type": "code", - "metadata": { - "colab": { - "base_uri": "https://localhost:8080/", - "height": 543 - }, - "id": "ie1o-FqdEwQj", - "outputId": "c948e29e-8c7e-49ab-abc5-fbb309e32d32" - }, - "source": [ - "g = graph_to_graphistry( python_arango_db_driver.graph('fraud-detection') )\n", - "g.bind(point_title='_id').plot()" - ], - "execution_count": null, - "outputs": [ - { - "output_type": "execute_result", - "data": { - "text/html": [ - "\n", - " \n", - " \n", - " \n", - " " - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "execution_count": 12 - } - ] + "id": "-qb7r8rtDwRF", + "outputId": "16707645-1cd3-4d40-fe00-13bbe5322fa8" + }, + "outputs": [], + "source": [ + "g = paths_to_graph(paths)\n", + "g.bind(point_title='Name').plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hFn9VvD_7ABu" + }, + "source": [ + "**Demo 2: Full graph visualization**\n", + "\n", + "* Use python-arango on a graph to identify and download the involved vertex/edge collections\n", + "* Convert the results to pandas and Graphistry\n", + "* Plot, and instead of using raw Arango vertex IDs, use Name\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 543 }, - { - "cell_type": "code", - "metadata": { - "id": "qNPsLvT-GD4k" - }, - "source": [ - "" - ], - "execution_count": null, - "outputs": [] - } - ] + "id": "ie1o-FqdEwQj", + "outputId": "c948e29e-8c7e-49ab-abc5-fbb309e32d32" + }, + "outputs": [], + "source": [ + "g = graph_to_graphistry( python_arango_db_driver.graph('fraud-detection') )\n", + "g.bind(point_title='_id').plot()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "qNPsLvT-GD4k" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "ArangoDB_Graphistry_Visualization.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 }