@@ -590,20 +590,34 @@ async def readuntil(self, separator=b'\n'):
590
590
If the data cannot be read because of over limit, a
591
591
LimitOverrunError exception will be raised, and the data
592
592
will be left in the internal buffer, so it can be read again.
593
+
594
+ The ``separator`` may also be an iterable of separators. In this
595
+ case the return value will be the shortest possible that has any
596
+ separator as the suffix. For the purposes of LimitOverrunError,
597
+ the shortest possible separator is considered to be the one that
598
+ matched.
593
599
"""
594
- seplen = len (separator )
595
- if seplen == 0 :
600
+ if isinstance (separator , bytes ):
601
+ separator = [separator ]
602
+ else :
603
+ # Makes sure shortest matches wins, and supports arbitrary iterables
604
+ separator = sorted (separator , key = len )
605
+ if not separator :
606
+ raise ValueError ('Separator should contain at least one element' )
607
+ min_seplen = len (separator [0 ])
608
+ max_seplen = len (separator [- 1 ])
609
+ if min_seplen == 0 :
596
610
raise ValueError ('Separator should be at least one-byte string' )
597
611
598
612
if self ._exception is not None :
599
613
raise self ._exception
600
614
601
615
# Consume whole buffer except last bytes, which length is
602
- # one less than seplen . Let's check corner cases with
603
- # separator='SEPARATOR':
616
+ # one less than max_seplen . Let's check corner cases with
617
+ # separator[-1] ='SEPARATOR':
604
618
# * we have received almost complete separator (without last
605
619
# byte). i.e buffer='some textSEPARATO'. In this case we
606
- # can safely consume len(separator) - 1 bytes.
620
+ # can safely consume max_seplen - 1 bytes.
607
621
# * last byte of buffer is first byte of separator, i.e.
608
622
# buffer='abcdefghijklmnopqrS'. We may safely consume
609
623
# everything except that last byte, but this require to
@@ -616,26 +630,35 @@ async def readuntil(self, separator=b'\n'):
616
630
# messages :)
617
631
618
632
# `offset` is the number of bytes from the beginning of the buffer
619
- # where there is no occurrence of `separator`.
633
+ # where there is no occurrence of any `separator`.
620
634
offset = 0
621
635
622
- # Loop until we find `separator` in the buffer, exceed the buffer size,
636
+ # Loop until we find a `separator` in the buffer, exceed the buffer size,
623
637
# or an EOF has happened.
624
638
while True :
625
639
buflen = len (self ._buffer )
626
640
627
- # Check if we now have enough data in the buffer for `separator` to
628
- # fit.
629
- if buflen - offset >= seplen :
630
- isep = self ._buffer .find (separator , offset )
631
-
632
- if isep != - 1 :
633
- # `separator` is in the buffer. `isep` will be used later
634
- # to retrieve the data.
641
+ # Check if we now have enough data in the buffer for shortest
642
+ # separator to fit.
643
+ if buflen - offset >= min_seplen :
644
+ match_start = None
645
+ match_end = None
646
+ for sep in separator :
647
+ isep = self ._buffer .find (sep , offset )
648
+
649
+ if isep != - 1 :
650
+ # `separator` is in the buffer. `match_start` and
651
+ # `match_end` will be used later to retrieve the
652
+ # data.
653
+ end = isep + len (sep )
654
+ if match_end is None or end < match_end :
655
+ match_end = end
656
+ match_start = isep
657
+ if match_end is not None :
635
658
break
636
659
637
660
# see upper comment for explanation.
638
- offset = buflen + 1 - seplen
661
+ offset = max ( 0 , buflen + 1 - max_seplen )
639
662
if offset > self ._limit :
640
663
raise exceptions .LimitOverrunError (
641
664
'Separator is not found, and chunk exceed the limit' ,
@@ -644,7 +667,7 @@ async def readuntil(self, separator=b'\n'):
644
667
# Complete message (with full separator) may be present in buffer
645
668
# even when EOF flag is set. This may happen when the last chunk
646
669
# adds data which makes separator be found. That's why we check for
647
- # EOF *ater * inspecting the buffer.
670
+ # EOF *after * inspecting the buffer.
648
671
if self ._eof :
649
672
chunk = bytes (self ._buffer )
650
673
self ._buffer .clear ()
@@ -653,12 +676,12 @@ async def readuntil(self, separator=b'\n'):
653
676
# _wait_for_data() will resume reading if stream was paused.
654
677
await self ._wait_for_data ('readuntil' )
655
678
656
- if isep > self ._limit :
679
+ if match_start > self ._limit :
657
680
raise exceptions .LimitOverrunError (
658
- 'Separator is found, but chunk is longer than limit' , isep )
681
+ 'Separator is found, but chunk is longer than limit' , match_start )
659
682
660
- chunk = self ._buffer [:isep + seplen ]
661
- del self ._buffer [:isep + seplen ]
683
+ chunk = self ._buffer [:match_end ]
684
+ del self ._buffer [:match_end ]
662
685
self ._maybe_resume_transport ()
663
686
return bytes (chunk )
664
687
0 commit comments