Skip to content

Commit c24dcd0

Browse files
committed
Use pg_pread() and pg_pwrite() for data files and WAL.
Cut down on system calls by doing random I/O using offset-based OS routines where available. Remove the code for tracking the 'virtual' seek position. The only reason left to call FileSeek() was to get the file's size, so provide a new function FileSize() instead. Author: Oskari Saarenmaa, Thomas Munro Reviewed-by: Thomas Munro, Jesper Pedersen, Tom Lane, Alvaro Herrera Discussion: https://postgr.es/m/CAEepm=02rapCpPR3ZGF2vW=SBHSdFYO_bz_f-wwWJonmA3APgw@mail.gmail.com Discussion: https://postgr.es/m/b8748d39-0b19-0514-a1b9-4e5a28e6a208%40gmail.com Discussion: https://postgr.es/m/a86bd200-ebbe-d829-e3ca-0c4474b2fcb7%40ohmu.fi
1 parent 3fd2a79 commit c24dcd0

File tree

6 files changed

+42
-288
lines changed

6 files changed

+42
-288
lines changed

src/backend/access/heap/rewriteheap.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -935,7 +935,7 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
935935
* Note that we deviate from the usual WAL coding practices here,
936936
* check the above "Logical rewrite support" comment for reasoning.
937937
*/
938-
written = FileWrite(src->vfd, waldata_start, len,
938+
written = FileWrite(src->vfd, waldata_start, len, src->off,
939939
WAIT_EVENT_LOGICAL_REWRITE_WRITE);
940940
if (written != len)
941941
ereport(ERROR,

src/backend/access/transam/xlog.c

Lines changed: 3 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2478,18 +2478,6 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24782478
Size nleft;
24792479
int written;
24802480

2481-
/* Need to seek in the file? */
2482-
if (openLogOff != startoffset)
2483-
{
2484-
if (lseek(openLogFile, (off_t) startoffset, SEEK_SET) < 0)
2485-
ereport(PANIC,
2486-
(errcode_for_file_access(),
2487-
errmsg("could not seek in log file %s to offset %u: %m",
2488-
XLogFileNameP(ThisTimeLineID, openLogSegNo),
2489-
startoffset)));
2490-
openLogOff = startoffset;
2491-
}
2492-
24932481
/* OK to write the page(s) */
24942482
from = XLogCtl->pages + startidx * (Size) XLOG_BLCKSZ;
24952483
nbytes = npages * (Size) XLOG_BLCKSZ;
@@ -2498,7 +2486,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
24982486
{
24992487
errno = 0;
25002488
pgstat_report_wait_start(WAIT_EVENT_WAL_WRITE);
2501-
written = write(openLogFile, from, nleft);
2489+
written = pg_pwrite(openLogFile, from, nleft, startoffset);
25022490
pgstat_report_wait_end();
25032491
if (written <= 0)
25042492
{
@@ -2513,6 +2501,7 @@ XLogWrite(XLogwrtRqst WriteRqst, bool flexible)
25132501
}
25142502
nleft -= written;
25152503
from += written;
2504+
startoffset += written;
25162505
} while (nleft > 0);
25172506

25182507
/* Update state for write */
@@ -11821,22 +11810,9 @@ XLogPageRead(XLogReaderState *xlogreader, XLogRecPtr targetPagePtr, int reqLen,
1182111810

1182211811
/* Read the requested page */
1182311812
readOff = targetPageOff;
11824-
if (lseek(readFile, (off_t) readOff, SEEK_SET) < 0)
11825-
{
11826-
char fname[MAXFNAMELEN];
11827-
int save_errno = errno;
11828-
11829-
XLogFileName(fname, curFileTLI, readSegNo, wal_segment_size);
11830-
errno = save_errno;
11831-
ereport(emode_for_corrupt_record(emode, targetPagePtr + reqLen),
11832-
(errcode_for_file_access(),
11833-
errmsg("could not seek in log segment %s to offset %u: %m",
11834-
fname, readOff)));
11835-
goto next_record_is_invalid;
11836-
}
1183711813

1183811814
pgstat_report_wait_start(WAIT_EVENT_WAL_READ);
11839-
r = read(readFile, readBuf, XLOG_BLCKSZ);
11815+
r = pg_pread(readFile, readBuf, XLOG_BLCKSZ, (off_t) readOff);
1184011816
if (r != XLOG_BLCKSZ)
1184111817
{
1184211818
char fname[MAXFNAMELEN];

src/backend/storage/file/buffile.c

Lines changed: 5 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,6 @@ struct BufFile
6767
int numFiles; /* number of physical files in set */
6868
/* all files except the last have length exactly MAX_PHYSICAL_FILESIZE */
6969
File *files; /* palloc'd array with numFiles entries */
70-
off_t *offsets; /* palloc'd array with numFiles entries */
71-
72-
/*
73-
* offsets[i] is the current seek position of files[i]. We use this to
74-
* avoid making redundant FileSeek calls.
75-
*/
7670

7771
bool isInterXact; /* keep open over transactions? */
7872
bool dirty; /* does buffer need to be written? */
@@ -116,7 +110,6 @@ makeBufFileCommon(int nfiles)
116110
BufFile *file = (BufFile *) palloc(sizeof(BufFile));
117111

118112
file->numFiles = nfiles;
119-
file->offsets = (off_t *) palloc0(sizeof(off_t) * nfiles);
120113
file->isInterXact = false;
121114
file->dirty = false;
122115
file->resowner = CurrentResourceOwner;
@@ -170,10 +163,7 @@ extendBufFile(BufFile *file)
170163

171164
file->files = (File *) repalloc(file->files,
172165
(file->numFiles + 1) * sizeof(File));
173-
file->offsets = (off_t *) repalloc(file->offsets,
174-
(file->numFiles + 1) * sizeof(off_t));
175166
file->files[file->numFiles] = pfile;
176-
file->offsets[file->numFiles] = 0L;
177167
file->numFiles++;
178168
}
179169

@@ -396,7 +386,6 @@ BufFileClose(BufFile *file)
396386
FileClose(file->files[i]);
397387
/* release the buffer space */
398388
pfree(file->files);
399-
pfree(file->offsets);
400389
pfree(file);
401390
}
402391

@@ -422,27 +411,17 @@ BufFileLoadBuffer(BufFile *file)
422411
file->curOffset = 0L;
423412
}
424413

425-
/*
426-
* May need to reposition physical file.
427-
*/
428-
thisfile = file->files[file->curFile];
429-
if (file->curOffset != file->offsets[file->curFile])
430-
{
431-
if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
432-
return; /* seek failed, read nothing */
433-
file->offsets[file->curFile] = file->curOffset;
434-
}
435-
436414
/*
437415
* Read whatever we can get, up to a full bufferload.
438416
*/
417+
thisfile = file->files[file->curFile];
439418
file->nbytes = FileRead(thisfile,
440419
file->buffer.data,
441420
sizeof(file->buffer),
421+
file->curOffset,
442422
WAIT_EVENT_BUFFILE_READ);
443423
if (file->nbytes < 0)
444424
file->nbytes = 0;
445-
file->offsets[file->curFile] += file->nbytes;
446425
/* we choose not to advance curOffset here */
447426

448427
if (file->nbytes > 0)
@@ -491,23 +470,14 @@ BufFileDumpBuffer(BufFile *file)
491470
if ((off_t) bytestowrite > availbytes)
492471
bytestowrite = (int) availbytes;
493472

494-
/*
495-
* May need to reposition physical file.
496-
*/
497473
thisfile = file->files[file->curFile];
498-
if (file->curOffset != file->offsets[file->curFile])
499-
{
500-
if (FileSeek(thisfile, file->curOffset, SEEK_SET) != file->curOffset)
501-
return; /* seek failed, give up */
502-
file->offsets[file->curFile] = file->curOffset;
503-
}
504474
bytestowrite = FileWrite(thisfile,
505475
file->buffer.data + wpos,
506476
bytestowrite,
477+
file->curOffset,
507478
WAIT_EVENT_BUFFILE_WRITE);
508479
if (bytestowrite <= 0)
509480
return; /* failed to write */
510-
file->offsets[file->curFile] += bytestowrite;
511481
file->curOffset += bytestowrite;
512482
wpos += bytestowrite;
513483

@@ -803,11 +773,10 @@ BufFileSize(BufFile *file)
803773
{
804774
off_t lastFileSize;
805775

806-
/* Get the size of the last physical file by seeking to end. */
807-
lastFileSize = FileSeek(file->files[file->numFiles - 1], 0, SEEK_END);
776+
/* Get the size of the last physical file. */
777+
lastFileSize = FileSize(file->files[file->numFiles - 1]);
808778
if (lastFileSize < 0)
809779
return -1;
810-
file->offsets[file->numFiles - 1] = lastFileSize;
811780

812781
return ((file->numFiles - 1) * (off_t) MAX_PHYSICAL_FILESIZE) +
813782
lastFileSize;
@@ -849,13 +818,8 @@ BufFileAppend(BufFile *target, BufFile *source)
849818

850819
target->files = (File *)
851820
repalloc(target->files, sizeof(File) * newNumFiles);
852-
target->offsets = (off_t *)
853-
repalloc(target->offsets, sizeof(off_t) * newNumFiles);
854821
for (i = target->numFiles; i < newNumFiles; i++)
855-
{
856822
target->files[i] = source->files[i - target->numFiles];
857-
target->offsets[i] = source->offsets[i - target->numFiles];
858-
}
859823
target->numFiles = newNumFiles;
860824

861825
return startBlock;

0 commit comments

Comments
 (0)