3
3
from __future__ import annotations
4
4
5
5
import os
6
- import re
7
6
import shutil
8
7
import tempfile
9
8
from collections .abc import Iterable
@@ -472,7 +471,6 @@ def totree(
472
471
cls ,
473
472
tokens : List [Union [str , Tuple ]],
474
473
root : str = '' ,
475
- special_tokens : Dict = {'(' : '-LRB-' , ')' : '-RRB-' }
476
474
) -> nltk .Tree :
477
475
r"""
478
476
Converts a list of tokens to a :class:`nltk.tree.Tree`.
@@ -483,9 +481,6 @@ def totree(
483
481
This can be either a list of words or word/pos pairs.
484
482
root (str):
485
483
The root label of the tree. Default: ''.
486
- special_tokens (dict):
487
- A dict for normalizing some special tokens to avoid tree construction crash.
488
- Default: {'(': '-LRB-', ')': '-RRB-'}.
489
484
490
485
Returns:
491
486
A :class:`nltk.tree.Tree` object.
@@ -497,16 +492,7 @@ def totree(
497
492
498
493
if isinstance (tokens [0 ], str ):
499
494
tokens = [(token , '_' ) for token in tokens ]
500
- mapped , pattern = [], re .compile (f'[{ "" .join (special_tokens )} ]' )
501
- for i , (word , pos ) in enumerate (tokens ):
502
- match = re .search (pattern , word )
503
- if match :
504
- tokens [i ] = (pattern .sub (lambda m : special_tokens [m [0 ]], word ), pos )
505
- mapped .append ((i , word ))
506
- tree = nltk .Tree .fromstring (f"({ root } { ' ' .join ([f'( ({ pos } { word } ))' for word , pos in tokens ])} )" )
507
- for i , word in mapped :
508
- tree [i ][0 ][0 ] = word
509
- return tree
495
+ return nltk .Tree (root , [nltk .Tree ('' , [nltk .Tree (pos , [word ])]) for word , pos in tokens ])
510
496
511
497
@classmethod
512
498
def binarize (cls , tree : nltk .Tree ) -> nltk .Tree :
0 commit comments