Skip to content

Commit 988fb70

Browse files
committed
Faster list-to-tree conversion
1 parent 89b1b99 commit 988fb70

File tree

1 file changed

+1
-15
lines changed

1 file changed

+1
-15
lines changed

supar/utils/transform.py

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
from __future__ import annotations
44

55
import os
6-
import re
76
import shutil
87
import tempfile
98
from collections.abc import Iterable
@@ -472,7 +471,6 @@ def totree(
472471
cls,
473472
tokens: List[Union[str, Tuple]],
474473
root: str = '',
475-
special_tokens: Dict = {'(': '-LRB-', ')': '-RRB-'}
476474
) -> nltk.Tree:
477475
r"""
478476
Converts a list of tokens to a :class:`nltk.tree.Tree`.
@@ -483,9 +481,6 @@ def totree(
483481
This can be either a list of words or word/pos pairs.
484482
root (str):
485483
The root label of the tree. Default: ''.
486-
special_tokens (dict):
487-
A dict for normalizing some special tokens to avoid tree construction crash.
488-
Default: {'(': '-LRB-', ')': '-RRB-'}.
489484
490485
Returns:
491486
A :class:`nltk.tree.Tree` object.
@@ -497,16 +492,7 @@ def totree(
497492

498493
if isinstance(tokens[0], str):
499494
tokens = [(token, '_') for token in tokens]
500-
mapped, pattern = [], re.compile(f'[{"".join(special_tokens)}]')
501-
for i, (word, pos) in enumerate(tokens):
502-
match = re.search(pattern, word)
503-
if match:
504-
tokens[i] = (pattern.sub(lambda m: special_tokens[m[0]], word), pos)
505-
mapped.append((i, word))
506-
tree = nltk.Tree.fromstring(f"({root} {' '.join([f'( ({pos} {word}))' for word, pos in tokens])})")
507-
for i, word in mapped:
508-
tree[i][0][0] = word
509-
return tree
495+
return nltk.Tree(root, [nltk.Tree('', [nltk.Tree(pos, [word])]) for word, pos in tokens])
510496

511497
@classmethod
512498
def binarize(cls, tree: nltk.Tree) -> nltk.Tree:

0 commit comments

Comments
 (0)