|
17 | 17 | * pending read. When that isn't possible, the existing pending read is sent
|
18 | 18 | * to StartReadBuffers() so that a new one can begin to form.
|
19 | 19 | *
|
20 |
| - * The algorithm for controlling the look-ahead distance tries to classify the |
21 |
| - * stream into three ideal behaviors: |
22 |
| - * |
23 |
| - * A) No I/O is necessary, because the requested blocks are fully cached |
24 |
| - * already. There is no benefit to looking ahead more than one block, so |
25 |
| - * distance is 1. This is the default initial assumption. |
26 |
| - * |
27 |
| - * B) I/O is necessary, but read-ahead advice is undesirable because the |
28 |
| - * access is sequential and we can rely on the kernel's read-ahead heuristics, |
29 |
| - * or impossible because direct I/O is enabled, or the system doesn't support |
30 |
| - * read-ahead advice. There is no benefit in looking ahead more than |
31 |
| - * io_combine_limit, because in this case the only goal is larger read system |
32 |
| - * calls. Looking further ahead would pin many buffers and perform |
33 |
| - * speculative work for no benefit. |
34 |
| - * |
35 |
| - * C) I/O is necessary, it appears to be random, and this system supports |
36 |
| - * read-ahead advice. We'll look further ahead in order to reach the |
37 |
| - * configured level of I/O concurrency. |
38 |
| - * |
39 |
| - * The distance increases rapidly and decays slowly, so that it moves towards |
40 |
| - * those levels as different I/O patterns are discovered. For example, a |
41 |
| - * sequential scan of fully cached data doesn't bother looking ahead, but a |
42 |
| - * sequential scan that hits a region of uncached blocks will start issuing |
43 |
| - * increasingly wide read calls until it plateaus at io_combine_limit. |
| 20 | + * The algorithm for controlling the look-ahead distance is based on recent |
| 21 | + * cache hit and miss history. When no I/O is necessary, there is no benefit |
| 22 | + * in looking ahead more than one block. This is the default initial |
| 23 | + * assumption, but when blocks needing I/O are streamed, the distance is |
| 24 | + * increased rapidly to try to benefit from I/O combining and concurrency. It |
| 25 | + * is reduced gradually when cached blocks are streamed. |
44 | 26 | *
|
45 | 27 | * The main data structure is a circular queue of buffers of size
|
46 | 28 | * max_pinned_buffers plus some extra space for technical reasons, ready to be
|
@@ -333,7 +315,7 @@ read_stream_start_pending_read(ReadStream *stream)
|
333 | 315 | /* Remember whether we need to wait before returning this buffer. */
|
334 | 316 | if (!need_wait)
|
335 | 317 | {
|
336 |
| - /* Look-ahead distance decays, no I/O necessary (behavior A). */ |
| 318 | + /* Look-ahead distance decays, no I/O necessary. */ |
337 | 319 | if (stream->distance > 1)
|
338 | 320 | stream->distance--;
|
339 | 321 | }
|
@@ -634,7 +616,7 @@ read_stream_begin_impl(int flags,
|
634 | 616 | /*
|
635 | 617 | * Skip the initial ramp-up phase if the caller says we're going to be
|
636 | 618 | * reading the whole relation. This way we start out assuming we'll be
|
637 |
| - * doing full io_combine_limit sized reads (behavior B). |
| 619 | + * doing full io_combine_limit sized reads. |
638 | 620 | */
|
639 | 621 | if (flags & READ_STREAM_FULL)
|
640 | 622 | stream->distance = Min(max_pinned_buffers, stream->io_combine_limit);
|
@@ -725,10 +707,10 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
|
725 | 707 | #ifndef READ_STREAM_DISABLE_FAST_PATH
|
726 | 708 |
|
727 | 709 | /*
|
728 |
| - * A fast path for all-cached scans (behavior A). This is the same as the |
729 |
| - * usual algorithm, but it is specialized for no I/O and no per-buffer |
730 |
| - * data, so we can skip the queue management code, stay in the same buffer |
731 |
| - * slot and use singular StartReadBuffer(). |
| 710 | + * A fast path for all-cached scans. This is the same as the usual |
| 711 | + * algorithm, but it is specialized for no I/O and no per-buffer data, so |
| 712 | + * we can skip the queue management code, stay in the same buffer slot and |
| 713 | + * use singular StartReadBuffer(). |
732 | 714 | */
|
733 | 715 | if (likely(stream->fast_path))
|
734 | 716 | {
|
@@ -848,28 +830,10 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
|
848 | 830 | if (++stream->oldest_io_index == stream->max_ios)
|
849 | 831 | stream->oldest_io_index = 0;
|
850 | 832 |
|
851 |
| - if (stream->ios[io_index].op.flags & READ_BUFFERS_ISSUE_ADVICE) |
852 |
| - { |
853 |
| - /* Distance ramps up fast (behavior C). */ |
854 |
| - distance = stream->distance * 2; |
855 |
| - distance = Min(distance, stream->max_pinned_buffers); |
856 |
| - stream->distance = distance; |
857 |
| - } |
858 |
| - else |
859 |
| - { |
860 |
| - /* No advice; move towards io_combine_limit (behavior B). */ |
861 |
| - if (stream->distance > stream->io_combine_limit) |
862 |
| - { |
863 |
| - stream->distance--; |
864 |
| - } |
865 |
| - else |
866 |
| - { |
867 |
| - distance = stream->distance * 2; |
868 |
| - distance = Min(distance, stream->io_combine_limit); |
869 |
| - distance = Min(distance, stream->max_pinned_buffers); |
870 |
| - stream->distance = distance; |
871 |
| - } |
872 |
| - } |
| 833 | + /* Look-ahead distance ramps up rapidly after we do I/O. */ |
| 834 | + distance = stream->distance * 2; |
| 835 | + distance = Min(distance, stream->max_pinned_buffers); |
| 836 | + stream->distance = distance; |
873 | 837 |
|
874 | 838 | /*
|
875 | 839 | * If we've reached the first block of a sequential region we're
|
|
0 commit comments