6
6
#![ allow( rustdoc:: private_intra_doc_links) ]
7
7
8
8
use std:: cmp:: Ordering ;
9
- use std:: io:: { self , BufReader } ;
9
+ use std:: io:: { self , BufReader , ErrorKind } ;
10
10
use std:: {
11
11
fs:: { File , remove_file} ,
12
12
io:: { BufRead , BufWriter , Write } ,
@@ -71,6 +71,35 @@ impl CsplitOptions {
71
71
}
72
72
}
73
73
74
+ pub struct LinesWithNewlines < T : BufRead > {
75
+ inner : T ,
76
+ }
77
+
78
+ impl < T : BufRead > LinesWithNewlines < T > {
79
+ fn new ( s : T ) -> Self {
80
+ Self { inner : s }
81
+ }
82
+ }
83
+
84
+ impl < T : BufRead > Iterator for LinesWithNewlines < T > {
85
+ type Item = io:: Result < String > ;
86
+
87
+ fn next ( & mut self ) -> Option < Self :: Item > {
88
+ fn ret ( v : Vec < u8 > ) -> io:: Result < String > {
89
+ String :: from_utf8 ( v) . map_err ( |_| {
90
+ io:: Error :: new ( ErrorKind :: InvalidData , "stream did not contain valid UTF-8" )
91
+ } )
92
+ }
93
+
94
+ let mut v = Vec :: new ( ) ;
95
+ match self . inner . read_until ( b'\n' , & mut v) {
96
+ Ok ( 0 ) => None ,
97
+ Ok ( _) => Some ( ret ( v) ) ,
98
+ Err ( e) => Some ( Err ( e) ) ,
99
+ }
100
+ }
101
+ }
102
+
74
103
/// Splits a file into severals according to the command line patterns.
75
104
///
76
105
/// # Errors
@@ -87,8 +116,7 @@ pub fn csplit<T>(options: &CsplitOptions, patterns: &[String], input: T) -> Resu
87
116
where
88
117
T : BufRead ,
89
118
{
90
- let enumerated_input_lines = input
91
- . lines ( )
119
+ let enumerated_input_lines = LinesWithNewlines :: new ( input)
92
120
. map ( |line| line. map_err_context ( || "read error" . to_string ( ) ) )
93
121
. enumerate ( ) ;
94
122
let mut input_iter = InputSplitter :: new ( enumerated_input_lines) ;
@@ -243,7 +271,7 @@ impl SplitWriter<'_> {
243
271
self . dev_null = true ;
244
272
}
245
273
246
- /// Writes the line to the current split, appending a newline character .
274
+ /// Writes the line to the current split.
247
275
/// If [`self.dev_null`] is true, then the line is discarded.
248
276
///
249
277
/// # Errors
@@ -255,8 +283,7 @@ impl SplitWriter<'_> {
255
283
Some ( ref mut current_writer) => {
256
284
let bytes = line. as_bytes ( ) ;
257
285
current_writer. write_all ( bytes) ?;
258
- current_writer. write_all ( b"\n " ) ?;
259
- self . size += bytes. len ( ) + 1 ;
286
+ self . size += bytes. len ( ) ;
260
287
}
261
288
None => panic ! ( "trying to write to a split that was not created" ) ,
262
289
}
@@ -321,11 +348,11 @@ impl SplitWriter<'_> {
321
348
322
349
let mut ret = Err ( CsplitError :: LineOutOfRange ( pattern_as_str. to_string ( ) ) ) ;
323
350
while let Some ( ( ln, line) ) = input_iter. next ( ) {
324
- let l = line?;
351
+ let line = line?;
325
352
match n. cmp ( & ( & ln + 1 ) ) {
326
353
Ordering :: Less => {
327
354
assert ! (
328
- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
355
+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
329
356
"the buffer is big enough to contain 1 line"
330
357
) ;
331
358
ret = Ok ( ( ) ) ;
@@ -334,15 +361,15 @@ impl SplitWriter<'_> {
334
361
Ordering :: Equal => {
335
362
assert ! (
336
363
self . options. suppress_matched
337
- || input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
364
+ || input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
338
365
"the buffer is big enough to contain 1 line"
339
366
) ;
340
367
ret = Ok ( ( ) ) ;
341
368
break ;
342
369
}
343
370
Ordering :: Greater => ( ) ,
344
371
}
345
- self . writeln ( & l ) ?;
372
+ self . writeln ( & line ) ?;
346
373
}
347
374
self . finish_split ( ) ;
348
375
ret
@@ -379,23 +406,33 @@ impl SplitWriter<'_> {
379
406
input_iter. set_size_of_buffer ( 1 ) ;
380
407
381
408
while let Some ( ( ln, line) ) = input_iter. next ( ) {
382
- let l = line?;
409
+ let line = line?;
410
+ let l = if let Some ( l) = line. strip_suffix ( '\n' ) {
411
+ if let Some ( l) = l. strip_suffix ( '\r' ) {
412
+ l
413
+ } else {
414
+ l
415
+ }
416
+ } else {
417
+ & line
418
+ }
419
+ . to_string ( ) ;
383
420
if regex. is_match ( & l) {
384
421
let mut next_line_suppress_matched = false ;
385
422
match ( self . options . suppress_matched , offset) {
386
423
// no offset, add the line to the next split
387
424
( false , 0 ) => {
388
425
assert ! (
389
- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
426
+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
390
427
"the buffer is big enough to contain 1 line"
391
428
) ;
392
429
}
393
430
// a positive offset, some more lines need to be added to the current split
394
- ( false , _) => self . writeln ( & l ) ?,
431
+ ( false , _) => self . writeln ( & line ) ?,
395
432
// suppress matched option true, but there is a positive offset, so the line is printed
396
433
( true , 1 ..) => {
397
434
next_line_suppress_matched = true ;
398
- self . writeln ( & l ) ?;
435
+ self . writeln ( & line ) ?;
399
436
}
400
437
_ => ( ) ,
401
438
} ;
@@ -424,7 +461,7 @@ impl SplitWriter<'_> {
424
461
}
425
462
return Ok ( ( ) ) ;
426
463
}
427
- self . writeln ( & l ) ?;
464
+ self . writeln ( & line ) ?;
428
465
}
429
466
} else {
430
467
// With a negative offset we use a buffer to keep the lines within the offset.
@@ -435,7 +472,17 @@ impl SplitWriter<'_> {
435
472
let offset_usize = -offset as usize ;
436
473
input_iter. set_size_of_buffer ( offset_usize) ;
437
474
while let Some ( ( ln, line) ) = input_iter. next ( ) {
438
- let l = line?;
475
+ let line = line?;
476
+ let l = if let Some ( l) = line. strip_suffix ( '\n' ) {
477
+ if let Some ( l) = l. strip_suffix ( '\r' ) {
478
+ l
479
+ } else {
480
+ l
481
+ }
482
+ } else {
483
+ & line
484
+ }
485
+ . to_string ( ) ;
439
486
if regex. is_match ( & l) {
440
487
for line in input_iter. shrink_buffer_to_size ( ) {
441
488
self . writeln ( & line) ?;
@@ -444,12 +491,12 @@ impl SplitWriter<'_> {
444
491
// since offset_usize is for sure greater than 0
445
492
// the first element of the buffer should be removed and this
446
493
// line inserted to be coherent with GNU implementation
447
- input_iter. add_line_to_buffer ( ln, l ) ;
494
+ input_iter. add_line_to_buffer ( ln, line ) ;
448
495
} else {
449
496
// add 1 to the buffer size to make place for the matched line
450
497
input_iter. set_size_of_buffer ( offset_usize + 1 ) ;
451
498
assert ! (
452
- input_iter. add_line_to_buffer( ln, l ) . is_none( ) ,
499
+ input_iter. add_line_to_buffer( ln, line ) . is_none( ) ,
453
500
"should be big enough to hold every lines"
454
501
) ;
455
502
}
@@ -460,7 +507,7 @@ impl SplitWriter<'_> {
460
507
}
461
508
return Ok ( ( ) ) ;
462
509
}
463
- if let Some ( line) = input_iter. add_line_to_buffer ( ln, l ) {
510
+ if let Some ( line) = input_iter. add_line_to_buffer ( ln, line ) {
464
511
self . writeln ( & line) ?;
465
512
}
466
513
}
0 commit comments