Skip to content
This repository was archived by the owner on Mar 22, 2024. It is now read-only.

Commit 4416158

Browse files
committed
fix multiple bugs; pass tests
1 parent 7f0dad7 commit 4416158

File tree

1 file changed

+70
-27
lines changed

1 file changed

+70
-27
lines changed

interp.rs

Lines changed: 70 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub(crate) struct State<'a> {
2525
pub string_position: usize,
2626
popped_context: Option<MatchContext>,
2727
pub has_matched: Option<bool>,
28+
pub match_all: bool,
2829
}
2930

3031
impl<'a> State<'a> {
@@ -51,6 +52,7 @@ impl<'a> State<'a> {
5152
string_position: start,
5253
popped_context: None,
5354
has_matched: None,
55+
match_all: false,
5456
}
5557
}
5658

@@ -105,6 +107,7 @@ impl<'a> State<'a> {
105107
string_offset: self.string.offset(0, self.start),
106108
code_position: 0,
107109
has_matched: None,
110+
toplevel: true,
108111
};
109112
self.context_stack.push(ctx);
110113

@@ -132,6 +135,7 @@ impl<'a> State<'a> {
132135
pub fn search(mut self) -> Self {
133136
// TODO: optimize by op info and skip prefix
134137
while self.start <= self.end {
138+
self.match_all = false;
135139
self = self.pymatch();
136140

137141
if self.has_matched == Some(true) {
@@ -232,12 +236,13 @@ impl<'a> StrDrive<'a> {
232236
}
233237
}
234238

235-
#[derive(Debug, Copy, Clone)]
239+
#[derive(Debug, Clone, Copy)]
236240
struct MatchContext {
237241
string_position: usize,
238242
string_offset: usize,
239243
code_position: usize,
240244
has_matched: Option<bool>,
245+
toplevel: bool,
241246
}
242247

243248
trait MatchContextDrive {
@@ -463,8 +468,12 @@ impl OpcodeDispatcher {
463468
drive.ctx_mut().has_matched = Some(false);
464469
}),
465470
SreOpcode::SUCCESS => once(|drive| {
466-
drive.state.string_position = drive.ctx().string_position;
467-
drive.ctx_mut().has_matched = Some(true);
471+
if drive.ctx().toplevel && drive.state.match_all && !drive.at_end() {
472+
drive.ctx_mut().has_matched = Some(false);
473+
} else {
474+
drive.state.string_position = drive.ctx().string_position;
475+
drive.ctx_mut().has_matched = Some(true);
476+
}
468477
}),
469478
SreOpcode::ANY => once(|drive| {
470479
if drive.at_end() || drive.at_linebreak() {
@@ -482,15 +491,19 @@ impl OpcodeDispatcher {
482491
drive.skip_char(1);
483492
}
484493
}),
494+
/* assert subpattern */
495+
/* <ASSERT> <skip> <back> <pattern> */
485496
SreOpcode::ASSERT => twice(
486497
|drive| {
487498
let back = drive.peek_code(2) as usize;
488-
if back > drive.ctx().string_position {
499+
let passed = drive.ctx().string_position - drive.state.start;
500+
if passed < back {
489501
drive.ctx_mut().has_matched = Some(false);
490502
return;
491503
}
492504
drive.state.string_position = drive.ctx().string_position - back;
493505
drive.push_new_context(3);
506+
drive.state.context_stack.last_mut().unwrap().toplevel = false;
494507
},
495508
|drive| {
496509
let child_ctx = drive.state.popped_context.unwrap();
@@ -504,12 +517,14 @@ impl OpcodeDispatcher {
504517
SreOpcode::ASSERT_NOT => twice(
505518
|drive| {
506519
let back = drive.peek_code(2) as usize;
507-
if back > drive.ctx().string_position {
520+
let passed = drive.ctx().string_position - drive.state.start;
521+
if passed < back {
508522
drive.skip_code(drive.peek_code(1) as usize + 1);
509523
return;
510524
}
511525
drive.state.string_position = drive.ctx().string_position - back;
512526
drive.push_new_context(3);
527+
drive.state.context_stack.last_mut().unwrap().toplevel = false;
513528
},
514529
|drive| {
515530
let child_ctx = drive.state.popped_context.unwrap();
@@ -770,17 +785,17 @@ fn charset(set: &[u32], ch: u32) -> bool {
770785
}
771786
SreOpcode::CHARSET => {
772787
/* <CHARSET> <bitmap> */
773-
let set = &set[1..];
788+
let set = &set[i + 1..];
774789
if ch < 256 && ((set[(ch >> 5) as usize] & (1u32 << (ch & 31))) != 0) {
775790
return ok;
776791
}
777-
i += 8;
792+
i += 1 + 8;
778793
}
779794
SreOpcode::BIGCHARSET => {
780795
/* <BIGCHARSET> <blockcount> <256 blockindices> <blocks> */
781796
let count = set[i + 1] as usize;
782797
if ch < 0x10000 {
783-
let set = &set[2..];
798+
let set = &set[i + 2..];
784799
let block_index = ch >> 8;
785800
let (_, blockindices, _) = unsafe { set.align_to::<u8>() };
786801
let blocks = &set[64..];
@@ -1085,6 +1100,7 @@ struct OpMinRepeatOne {
10851100
count: usize,
10861101
}
10871102
impl OpcodeExecutor for OpMinRepeatOne {
1103+
/* <MIN_REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
10881104
fn next(&mut self, drive: &mut StackDrive) -> Option<()> {
10891105
match self.jump_id {
10901106
0 => {
@@ -1110,7 +1126,11 @@ impl OpcodeExecutor for OpMinRepeatOne {
11101126
count
11111127
};
11121128

1113-
if drive.peek_code(drive.peek_code(1) as usize + 1) == SreOpcode::SUCCESS as u32 {
1129+
let next_code = drive.peek_code(drive.peek_code(1) as usize + 1);
1130+
if next_code == SreOpcode::SUCCESS as u32
1131+
&& !(drive.ctx().toplevel && drive.state.match_all && !drive.at_end())
1132+
{
1133+
// tail is empty. we're finished
11141134
drive.state.string_position = drive.ctx().string_position;
11151135
drive.ctx_mut().has_matched = Some(true);
11161136
return None;
@@ -1377,9 +1397,18 @@ struct OpRepeatOne {
13771397
jump_id: usize,
13781398
mincount: usize,
13791399
maxcount: usize,
1380-
count: isize,
1400+
count: usize,
1401+
following_literal: Option<u32>,
13811402
}
13821403
impl OpcodeExecutor for OpRepeatOne {
1404+
/* match repeated sequence (maximizing regexp) */
1405+
1406+
/* this operator only works if the repeated item is
1407+
exactly one character wide, and we're not already
1408+
collecting backtracking points. for other cases,
1409+
use the MAX_REPEAT operator */
1410+
1411+
/* <REPEAT_ONE> <skip> <1=min> <2=max> item <SUCCESS> tail */
13831412
fn next(&mut self, drive: &mut StackDrive) -> Option<()> {
13841413
match self.jump_id {
13851414
0 => {
@@ -1388,62 +1417,76 @@ impl OpcodeExecutor for OpRepeatOne {
13881417

13891418
if drive.remaining_chars() < self.mincount {
13901419
drive.ctx_mut().has_matched = Some(false);
1420+
return None;
13911421
}
1422+
13921423
drive.state.string_position = drive.ctx().string_position;
1393-
self.count = count(drive, self.maxcount) as isize;
1394-
drive.skip_char(self.count as usize);
1395-
if self.count < self.mincount as isize {
1424+
1425+
self.count = count(drive, self.maxcount);
1426+
drive.skip_char(self.count);
1427+
if self.count < self.mincount {
13961428
drive.ctx_mut().has_matched = Some(false);
13971429
return None;
13981430
}
13991431

14001432
let next_code = drive.peek_code(drive.peek_code(1) as usize + 1);
1401-
if next_code == SreOpcode::SUCCESS as u32 {
1433+
if next_code == SreOpcode::SUCCESS as u32 && drive.at_end() && !drive.ctx().toplevel
1434+
{
14021435
// tail is empty. we're finished
14031436
drive.state.string_position = drive.ctx().string_position;
14041437
drive.ctx_mut().has_matched = Some(true);
14051438
return None;
14061439
}
14071440

14081441
drive.state.marks_push();
1409-
// TODO:
1442+
14101443
// Special case: Tail starts with a literal. Skip positions where
14111444
// the rest of the pattern cannot possibly match.
1445+
if next_code == SreOpcode::LITERAL as u32 {
1446+
self.following_literal = Some(drive.peek_code(drive.peek_code(1) as usize + 2))
1447+
}
1448+
14121449
self.jump_id = 1;
14131450
self.next(drive)
14141451
}
14151452
1 => {
1416-
// General case: backtracking
1417-
if self.count >= self.mincount as isize {
1418-
drive.state.string_position = drive.ctx().string_position;
1419-
drive.push_new_context(drive.peek_code(1) as usize + 1);
1420-
self.jump_id = 2;
1421-
return Some(());
1453+
if let Some(c) = self.following_literal {
1454+
while drive.at_end() || drive.peek_char() != c {
1455+
if self.count <= self.mincount {
1456+
drive.state.marks_pop_discard();
1457+
drive.ctx_mut().has_matched = Some(false);
1458+
return None;
1459+
}
1460+
drive.back_skip_char(1);
1461+
self.count -= 1;
1462+
}
14221463
}
14231464

1424-
drive.state.marks_pop_discard();
1425-
drive.ctx_mut().has_matched = Some(false);
1426-
None
1465+
// General case: backtracking
1466+
drive.state.string_position = drive.ctx().string_position;
1467+
drive.push_new_context(drive.peek_code(1) as usize + 1);
1468+
self.jump_id = 2;
1469+
Some(())
14271470
}
14281471
2 => {
14291472
let child_ctx = drive.state.popped_context.unwrap();
14301473
if child_ctx.has_matched == Some(true) {
14311474
drive.ctx_mut().has_matched = Some(true);
14321475
return None;
14331476
}
1434-
if self.count <= self.mincount as isize {
1477+
if self.count <= self.mincount {
14351478
drive.state.marks_pop_discard();
14361479
drive.ctx_mut().has_matched = Some(false);
14371480
return None;
14381481
}
14391482

1440-
// TODO: unnesscary double check
14411483
drive.back_skip_char(1);
14421484
self.count -= 1;
1485+
14431486
drive.state.marks_pop_keep();
14441487

14451488
self.jump_id = 1;
1446-
Some(())
1489+
self.next(drive)
14471490
}
14481491
_ => unreachable!(),
14491492
}

0 commit comments

Comments
 (0)