Skip to content

Commit 88ce837

Browse files
committed
Add binarize & debinarize fns
1 parent 37e48ba commit 88ce837

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

supar/utils/fn.py

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,18 @@
11
# -*- coding: utf-8 -*-
22

3+
import mmap
34
import os
5+
import pickle
46
import sys
57
import unicodedata
68
import urllib
79
import zipfile
8-
from typing import Dict, List, Tuple, Union
10+
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
911

1012
import torch
1113
from omegaconf import DictConfig, OmegaConf
1214
from supar.utils.common import CACHE
15+
from supar.utils.logging import progress_bar
1316

1417

1518
def ispunct(token: str) -> bool:
@@ -265,6 +268,33 @@ def download(url: str, reload: bool = False) -> str:
265268
return path
266269

267270

271+
def binarize(data: Iterable, fbin: str = None) -> None:
272+
start, meta = 0, []
273+
with open(fbin, 'wb') as f:
274+
for s in progress_bar(data):
275+
bytes = pickle.dumps(s)
276+
f.write(bytes)
277+
end = start + len(bytes)
278+
meta.append((start, end))
279+
start = end
280+
meta = pickle.dumps(torch.tensor(meta))
281+
# append the meta data to the end of the bin file
282+
f.write(meta)
283+
# record the positions of the meta data
284+
f.write(pickle.dumps(torch.tensor((start, start + len(meta)))))
285+
286+
287+
def debinarize(fbin: str, offset: Optional[int] = 0, length: Optional[int] = 0, meta: bool = False) -> Any:
288+
with open(fbin, 'rb') as f:
289+
with mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) as mm:
290+
if meta:
291+
length = len(pickle.dumps(torch.tensor((offset, length))))
292+
mm.seek(-length, os.SEEK_END)
293+
offset, length = pickle.loads(mm.read(length)).tolist()
294+
mm.seek(offset)
295+
return pickle.loads(mm.read(length))
296+
297+
268298
def get_rng_state():
269299
state = {'rng_state': torch.get_rng_state()}
270300
if torch.cuda.is_available():

0 commit comments

Comments
 (0)