Skip to content

Commit 027b345

Browse files
committed
support for GraphBinary serialization
Signed-off-by: pm-osc <pm2.osc@gmail.com>
1 parent b1c71c7 commit 027b345

File tree

9 files changed

+423
-34
lines changed

9 files changed

+423
-34
lines changed

README.md

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,19 @@ from gremlin_python.driver.driver_remote_connection import DriverRemoteConnectio
1616
from janusgraph_python.driver.serializer import JanusGraphSONSerializersV3d0
1717

1818
connection = DriverRemoteConnection(
19-
'ws://localhost:8182/gremlin', 'g',
20-
message_serializer=JanusGraphSONSerializersV3d0())
19+
'ws://localhost:8182/gremlin', 'g',
20+
message_serializer=JanusGraphSONSerializersV3d0())
21+
```
22+
23+
This can be done like this for GraphBinary:
24+
25+
```python
26+
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
27+
from janusgraph_python.driver.serializer import JanusGraphBinarySerializersV1
28+
29+
connection = DriverRemoteConnection(
30+
'ws://localhost:8182/gremlin', 'g',
31+
message_serializer=JanusGraphBinarySerializersV1())
2132
```
2233

2334
Note that the client should be disposed on shut down to release resources and
@@ -76,15 +87,14 @@ version.
7687

7788
## Serialization Formats
7889

79-
JanusGraph-Python supports GraphSON 3 only. GraphBinary is not yet
80-
supported.
90+
JanusGraph-Python supports GraphSON 3 as well as GraphBinary.
8191

8292
Not all of the JanusGraph-specific types are already supported by the formats:
8393

8494
| Format | RelationIdentifier | Text predicates | Geoshapes | Geo predicates |
8595
| ----------- | ------------------ | --------------- | --------- | -------------- |
8696
| GraphSON3 | x | x | - | - |
87-
| GraphBinary | - | - | - | - |
97+
| GraphBinary | x | x | - | - |
8898

8999
## Community
90100

janusgraph_python/driver/serializer.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,19 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from gremlin_python.driver.serializer import GraphSONSerializersV3d0
16-
from janusgraph_python.structure.io import graphsonV3d0
15+
from gremlin_python.driver.serializer import GraphSONSerializersV3d0, GraphBinarySerializersV1
16+
from janusgraph_python.structure.io import graphsonV3d0, graphbinaryV1
1717

1818
class JanusGraphSONSerializersV3d0(GraphSONSerializersV3d0):
1919
"""Message serializer for GraphSON 3.0 extended with JanusGraph-specific types"""
2020
def __init__(self):
2121
reader = graphsonV3d0.JanusGraphSONReader()
2222
writer = graphsonV3d0.JanusGraphSONWriter()
23-
super(GraphSONSerializersV3d0, self).__init__(reader, writer)
23+
super(GraphSONSerializersV3d0, self).__init__(reader, writer)
24+
25+
class JanusGraphBinarySerializersV1(GraphBinarySerializersV1):
26+
"""Message serializer for GraphBinary 1.0 extended with JanusGraph-specific types"""
27+
def __init__(self):
28+
reader = graphbinaryV1.JanusGraphBinaryReader()
29+
writer = graphbinaryV1.JanusGraphBinaryWriter()
30+
super().__init__(reader, writer)
Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
# Copyright 2023 JanusGraph-Python Authors
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from gremlin_python.structure.io.graphbinaryV1 import (
16+
_GraphBinaryTypeIO, StringIO, GraphBinaryReader, GraphBinaryWriter, DataType,
17+
_make_packer,
18+
uint64_pack, uint64_unpack, uint8_pack, uint8_unpack,
19+
)
20+
from janusgraph_python.process.traversal import _JanusGraphP, RelationIdentifier
21+
22+
uint16_pack, uint16_unpack = _make_packer('>H')
23+
uint32_pack, uint32_unpack = _make_packer('>I')
24+
25+
class JanusGraphBinaryReader(GraphBinaryReader):
26+
def __init__(self):
27+
# register JanusGraph-specific deserializer for custom type code
28+
deserializer_map = {
29+
DataType.custom: JanusGraphBinaryTypeIO
30+
}
31+
32+
GraphBinaryReader.__init__(self, deserializer_map)
33+
34+
class JanusGraphBinaryWriter(GraphBinaryWriter):
35+
def __init__(self):
36+
# register JanusGraph-specific RelationIdentifier and text-predicate serializer
37+
serializer_map = [
38+
(RelationIdentifier, JanusGraphRelationIdentifierIO),
39+
(_JanusGraphP, JanusGraphPSerializer)
40+
]
41+
42+
GraphBinaryWriter.__init__(self, serializer_map)
43+
44+
class JanusGraphBinaryTypeIO(_GraphBinaryTypeIO):
45+
# registry of JanusGraph-specific types with their type_id, type_name and class for deserialization
46+
io_registry = {}
47+
48+
@classmethod
49+
def register_deserializer(cls, type_class):
50+
"""
51+
Method to register a deserializer for a JanusGraph-specific type
52+
"""
53+
cls.io_registry[type_class.graphbinary_type_id] = (type_class.graphbinary_type_name, type_class)
54+
55+
@classmethod
56+
def objectify(cls, buff, reader, nullable=True):
57+
"""
58+
Method used for deserialization of JanusGraph-specific type
59+
"""
60+
return cls.is_null(buff, reader, cls._read_data, nullable)
61+
62+
@classmethod
63+
def _read_data(cls, b, r):
64+
"""
65+
Method used for identifying a JanusGraph-specific type and
66+
find a deserializer class for it
67+
"""
68+
# check if first byte is custom type code byte
69+
if uint8_unpack(b.read(1)) != DataType.custom.value:
70+
return None
71+
72+
# get the custom type name length
73+
custom_type_name_length = uint16_unpack(b.read(2))
74+
custom_type_name = b.read(custom_type_name_length).decode()
75+
76+
# read the custom type id
77+
custom_type_id = uint32_unpack(b.read(4))
78+
79+
# try to get a deserializer class for the JanusGraph-specific type
80+
custom_serializer = cls.io_registry.get(custom_type_id)
81+
if not custom_serializer:
82+
raise NotImplementedError(f"No deserializer found for JanusGraph type with id: {custom_type_id}")
83+
84+
# check the type name
85+
if custom_serializer[0] != custom_type_name:
86+
raise NotImplementedError(f"No deserializer found for JanusGraph type with name: {custom_type_name}")
87+
88+
return custom_serializer[1].objectify(b, r)
89+
90+
@classmethod
91+
def prefix_bytes_custom_type(cls, writer, to_extend, as_value=False):
92+
"""
93+
Helper method to add a specific byte array prefix while serializing
94+
JanusGraph-specific type as custom type
95+
"""
96+
if to_extend is None:
97+
to_extend = bytearray()
98+
99+
# use the custom type code
100+
if not as_value:
101+
to_extend += uint8_pack(DataType.custom.value)
102+
103+
# add the name of the custom JanusGraph type
104+
StringIO.dictify(cls.graphbinary_type_name, writer, to_extend, True, False)
105+
106+
# add the id of the custom JanusGraph type
107+
to_extend += uint32_pack(cls.graphbinary_type_id)
108+
109+
# use the custom type code
110+
if not as_value:
111+
to_extend += uint8_pack(DataType.custom.value)
112+
113+
class JanusGraphPSerializer(JanusGraphBinaryTypeIO):
114+
graphbinary_type_id = 0x1002
115+
graphbinary_type_name = "janusgraph.P"
116+
python_type = _JanusGraphP
117+
118+
@classmethod
119+
def dictify(cls, obj, writer, to_extend, as_value=False, nullable=True):
120+
"""
121+
Method to serialize JanusGraph-specific Text predicate
122+
"""
123+
cls.prefix_bytes_custom_type(writer, to_extend, as_value)
124+
125+
# serialize the custom JanusGraph operator
126+
StringIO.dictify(obj.operator, writer, to_extend, True, False)
127+
128+
# serialize the value
129+
writer.to_dict(obj.value, to_extend)
130+
131+
return to_extend
132+
133+
class JanusGraphRelationIdentifierIO(JanusGraphBinaryTypeIO):
134+
graphbinary_type_id = 0x1001
135+
graphbinary_type_name = "janusgraph.RelationIdentifier"
136+
python_type = RelationIdentifier
137+
138+
long_marker = 0
139+
string_marker = 1
140+
141+
@classmethod
142+
def dictify(cls, obj, writer, to_extend, as_value=False, nullable=True):
143+
"""
144+
Method to serialize JanusGraph-specific RelationIdentifier
145+
"""
146+
cls.prefix_bytes_custom_type(writer, to_extend, as_value)
147+
148+
# serialize out vertex ID
149+
if isinstance(obj.out_vertex_id, int):
150+
to_extend += uint8_pack(cls.long_marker)
151+
to_extend += uint64_pack(obj.out_vertex_id)
152+
else:
153+
to_extend += uint8_pack(cls.string_marker)
154+
cls._write_string(obj.out_vertex_id, writer, to_extend)
155+
156+
# serialize edge type ID and relation ID
157+
to_extend += uint64_pack(obj.type_id)
158+
to_extend += uint64_pack(obj.relation_id)
159+
160+
# serialize in vertex ID
161+
if obj.in_vertex_id is None:
162+
to_extend += uint8_pack(cls.long_marker)
163+
to_extend += uint64_pack(0)
164+
elif isinstance(obj.in_vertex_id, int):
165+
to_extend += uint8_pack(cls.long_marker)
166+
to_extend += uint64_pack(obj.in_vertex_id)
167+
else:
168+
to_extend += uint8_pack(cls.string_marker)
169+
cls._write_string(obj.in_vertex_id, writer, to_extend)
170+
171+
return to_extend
172+
173+
@classmethod
174+
def objectify(cls, b, r):
175+
"""
176+
Method to deserialize JanusGraph-specific RelationIdentifier
177+
"""
178+
if uint8_unpack(b.read(1)) != DataType.custom.value:
179+
raise Exception("Unexpected type while deserializing JanusGraph RelationIdentifier")
180+
181+
# read the next byte that shows if the out vertex id is string or long
182+
out_vertex_id_marker = uint8_unpack(b.read(1))
183+
184+
# deserialize out vertex ID
185+
if out_vertex_id_marker == cls.string_marker:
186+
out_vertex_id = cls._read_string(b)
187+
else:
188+
out_vertex_id = uint64_unpack(b.read(8))
189+
190+
# deserialize edge type ID and relation ID
191+
type_id = uint64_unpack(b.read(8))
192+
relation_id = uint64_unpack(b.read(8))
193+
194+
# deserialize in vertex ID
195+
in_vertex_id_marker = uint8_unpack(b.read(1))
196+
if in_vertex_id_marker == cls.string_marker:
197+
in_vertex_id = cls._read_string(b)
198+
else:
199+
in_vertex_id = uint64_unpack(b.read(8))
200+
if in_vertex_id == 0:
201+
in_vertex_id = None
202+
203+
return RelationIdentifier.from_ids(out_vertex_id, type_id, relation_id, in_vertex_id)
204+
205+
@classmethod
206+
def _read_string(cls, buff):
207+
"""
208+
Helper method to read a string represented as byte array.
209+
The length of the string is not known upfront so the byte
210+
array needs to be red until a byte occurs that is marked
211+
with a special end marker
212+
"""
213+
final_string = ""
214+
while True:
215+
c = 0xFF & uint8_unpack(buff.read(1))
216+
final_string += chr(c & 0x7F)
217+
218+
# check if the character is marked with end marker
219+
# if yes that is the end of the string
220+
if c & 0x80 > 0:
221+
break
222+
223+
return final_string
224+
225+
@classmethod
226+
def _write_string(cls, string, writer, to_extend):
227+
"""
228+
Helper method to create a byte array from a string and
229+
mark the string's last character with special end marker
230+
"""
231+
b = bytearray()
232+
b.extend(map(ord, string))
233+
234+
# add end marker to the last character
235+
b[-1] |= 0x80
236+
237+
to_extend += b
238+
239+
# register the JanusGraph-specific RelationIdentifier as deserializer
240+
JanusGraphBinaryTypeIO.register_deserializer(JanusGraphRelationIdentifierIO)

tests/integration/RelationIdentifier_test.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,39 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
from pytest import mark, param
1516
from janusgraph_python.process.traversal import RelationIdentifier
1617

1718
class _RelationIdentifierSerializer(object):
1819
# g is expected to be set once this class is inherited
1920
g = None
2021

21-
def test_RelationIdentifier_as_edge_id(self):
22-
edge_id = self.g.E().id_().next()
22+
@mark.parametrize(
23+
'vertex_id,edge_type',
24+
[
25+
param(1280, 'mother', id='long in and out vertex ID'),
26+
param('jupiter', 'lives', id='long in vertex ID, string out vertex ID'),
27+
param('jupiter', 'brother', id='string in and out vertex ID'),
28+
param(1024, 'father', id='string in vertex ID, long out vertex ID'),
29+
]
30+
)
31+
def test_RelationIdentifier_as_edge_id(self, vertex_id, edge_type):
32+
edge_id = self.g.V(vertex_id).both_e(edge_type).id_().next()
2333

2434
count = self.g.E(edge_id).count().next()
2535
assert count == 1
2636

27-
def test_Edge(self):
28-
edge = self.g.E().next()
37+
@mark.parametrize(
38+
'vertex_id,edge_type',
39+
[
40+
param(1280, 'mother', id='long in and out vertex ID'),
41+
param('jupiter', 'lives', id='long in vertex ID, string out vertex ID'),
42+
param('jupiter', 'brother', id='string in and out vertex ID'),
43+
param(1024, 'father', id='string in vertex ID, long out vertex ID'),
44+
]
45+
)
46+
def test_Edge(self, vertex_id, edge_type):
47+
edge = self.g.V(vertex_id).both_e(edge_type).next()
2948

3049
count = self.g.E(edge).count().next()
3150
assert count == 1
@@ -34,12 +53,30 @@ class _RelationIdentifierDeserializer(object):
3453
# g is expected to be set once this class is inherited
3554
g = None
3655

37-
def test_valid_RelationIdentifier(self):
38-
relation_identifier = self.g.V().has('demigod', 'name', 'hercules').out_e('father').id_().next()
56+
@mark.parametrize(
57+
'vertex_id,edge_type',
58+
[
59+
param(1280, 'mother', id='long in and out vertex ID'),
60+
param('jupiter', 'lives', id='long in vertex ID, string out vertex ID'),
61+
param('jupiter', 'brother', id='string in and out vertex ID'),
62+
param(1024, 'father', id='string in vertex ID, long out vertex ID'),
63+
]
64+
)
65+
def test_valid_RelationIdentifier(self, vertex_id, edge_type):
66+
relation_identifier = self.g.V(vertex_id).both_e(edge_type).id_().next()
3967

4068
assert type(relation_identifier) is RelationIdentifier
4169

42-
def test_Edge(self):
43-
edge = self.g.V().has('demigod', 'name', 'hercules').out_e('father').next()
70+
@mark.parametrize(
71+
'vertex_id,edge_type',
72+
[
73+
param(1280, 'mother', id='long in and out vertex ID'),
74+
param('jupiter', 'lives', id='long in vertex ID, string out vertex ID'),
75+
param('jupiter', 'brother', id='string in and out vertex ID'),
76+
param(1024, 'father', id='string in vertex ID, long out vertex ID'),
77+
]
78+
)
79+
def test_Edge(self, vertex_id, edge_type):
80+
edge = self.g.V(vertex_id).both_e(edge_type).next()
4481

4582
assert type(edge.id) is RelationIdentifier

0 commit comments

Comments
 (0)