core.fsyncmethod: add writeout-only mode
This commit introduces the `core.fsyncMethod` configuration knob, which can currently be set to `fsync` or `writeout-only`. The new writeout-only mode attempts to tell the operating system to flush its in-memory page cache to the storage hardware without issuing a CACHE_FLUSH command to the storage controller. Writeout-only fsync is significantly faster than a vanilla fsync on common hardware, since data is written to a disk-side cache rather than all the way to a durable medium. Later changes in this patch series will take advantage of this primitive to implement batching of hardware flushes. When git_fsync is called with FSYNC_WRITEOUT_ONLY, it may fail and the caller is expected to do an ordinary fsync as needed. On Apple platforms, the fsync system call does not issue a CACHE_FLUSH directive to the storage controller. This change updates fsync to do fcntl(F_FULLFSYNC) to make fsync actually durable. We maintain parity with existing behavior on Apple platforms by setting the default value of the new core.fsyncMethod option. Signed-off-by: Neeraj Singh <neerajsi@microsoft.com> Signed-off-by: Junio C Hamano <gitster@pobox.com>
This commit is contained in:
parent
19d3f228c8
commit
abf38abec2
@ -547,6 +547,15 @@ core.whitespace::
|
|||||||
is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
|
is relevant for `indent-with-non-tab` and when Git fixes `tab-in-indent`
|
||||||
errors. The default tab width is 8. Allowed values are 1 to 63.
|
errors. The default tab width is 8. Allowed values are 1 to 63.
|
||||||
|
|
||||||
|
core.fsyncMethod::
|
||||||
|
A value indicating the strategy Git will use to harden repository data
|
||||||
|
using fsync and related primitives.
|
||||||
|
+
|
||||||
|
* `fsync` uses the fsync() system call or platform equivalents.
|
||||||
|
* `writeout-only` issues pagecache writeback requests, but depending on the
|
||||||
|
filesystem and storage hardware, data added to the repository may not be
|
||||||
|
durable in the event of a system crash. This is the default mode on macOS.
|
||||||
|
|
||||||
core.fsyncObjectFiles::
|
core.fsyncObjectFiles::
|
||||||
This boolean will enable 'fsync()' when writing object files.
|
This boolean will enable 'fsync()' when writing object files.
|
||||||
+
|
+
|
||||||
|
6
Makefile
6
Makefile
@ -411,6 +411,8 @@ all::
|
|||||||
#
|
#
|
||||||
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
|
# Define HAVE_CLOCK_MONOTONIC if your platform has CLOCK_MONOTONIC.
|
||||||
#
|
#
|
||||||
|
# Define HAVE_SYNC_FILE_RANGE if your platform has sync_file_range.
|
||||||
|
#
|
||||||
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
|
# Define NEEDS_LIBRT if your platform requires linking with librt (glibc version
|
||||||
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
|
# before 2.17) for clock_gettime and CLOCK_MONOTONIC.
|
||||||
#
|
#
|
||||||
@ -1897,6 +1899,10 @@ ifdef HAVE_CLOCK_MONOTONIC
|
|||||||
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
|
BASIC_CFLAGS += -DHAVE_CLOCK_MONOTONIC
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifdef HAVE_SYNC_FILE_RANGE
|
||||||
|
BASIC_CFLAGS += -DHAVE_SYNC_FILE_RANGE
|
||||||
|
endif
|
||||||
|
|
||||||
ifdef NEEDS_LIBRT
|
ifdef NEEDS_LIBRT
|
||||||
EXTLIBS += -lrt
|
EXTLIBS += -lrt
|
||||||
endif
|
endif
|
||||||
|
7
cache.h
7
cache.h
@ -995,6 +995,13 @@ extern char *git_replace_ref_base;
|
|||||||
|
|
||||||
extern int fsync_object_files;
|
extern int fsync_object_files;
|
||||||
extern int use_fsync;
|
extern int use_fsync;
|
||||||
|
|
||||||
|
enum fsync_method {
|
||||||
|
FSYNC_METHOD_FSYNC,
|
||||||
|
FSYNC_METHOD_WRITEOUT_ONLY
|
||||||
|
};
|
||||||
|
|
||||||
|
extern enum fsync_method fsync_method;
|
||||||
extern int core_preload_index;
|
extern int core_preload_index;
|
||||||
extern int precomposed_unicode;
|
extern int precomposed_unicode;
|
||||||
extern int protect_hfs;
|
extern int protect_hfs;
|
||||||
|
@ -329,6 +329,9 @@ int mingw_getpagesize(void);
|
|||||||
#define getpagesize mingw_getpagesize
|
#define getpagesize mingw_getpagesize
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
int win32_fsync_no_flush(int fd);
|
||||||
|
#define fsync_no_flush win32_fsync_no_flush
|
||||||
|
|
||||||
struct rlimit {
|
struct rlimit {
|
||||||
unsigned int rlim_cur;
|
unsigned int rlim_cur;
|
||||||
};
|
};
|
||||||
|
28
compat/win32/flush.c
Normal file
28
compat/win32/flush.c
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
#include "git-compat-util.h"
|
||||||
|
#include <winternl.h>
|
||||||
|
#include "lazyload.h"
|
||||||
|
|
||||||
|
int win32_fsync_no_flush(int fd)
|
||||||
|
{
|
||||||
|
IO_STATUS_BLOCK io_status;
|
||||||
|
|
||||||
|
#define FLUSH_FLAGS_FILE_DATA_ONLY 1
|
||||||
|
|
||||||
|
DECLARE_PROC_ADDR(ntdll.dll, NTSTATUS, NTAPI, NtFlushBuffersFileEx,
|
||||||
|
HANDLE FileHandle, ULONG Flags, PVOID Parameters, ULONG ParameterSize,
|
||||||
|
PIO_STATUS_BLOCK IoStatusBlock);
|
||||||
|
|
||||||
|
if (!INIT_PROC_ADDR(NtFlushBuffersFileEx)) {
|
||||||
|
errno = ENOSYS;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
memset(&io_status, 0, sizeof(io_status));
|
||||||
|
if (NtFlushBuffersFileEx((HANDLE)_get_osfhandle(fd), FLUSH_FLAGS_FILE_DATA_ONLY,
|
||||||
|
NULL, 0, &io_status)) {
|
||||||
|
errno = EINVAL;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
12
config.c
12
config.c
@ -1600,6 +1600,18 @@ static int git_default_core_config(const char *var, const char *value, void *cb)
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!strcmp(var, "core.fsyncmethod")) {
|
||||||
|
if (!value)
|
||||||
|
return config_error_nonbool(var);
|
||||||
|
if (!strcmp(value, "fsync"))
|
||||||
|
fsync_method = FSYNC_METHOD_FSYNC;
|
||||||
|
else if (!strcmp(value, "writeout-only"))
|
||||||
|
fsync_method = FSYNC_METHOD_WRITEOUT_ONLY;
|
||||||
|
else
|
||||||
|
warning(_("ignoring unknown core.fsyncMethod value '%s'"), value);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
if (!strcmp(var, "core.fsyncobjectfiles")) {
|
if (!strcmp(var, "core.fsyncobjectfiles")) {
|
||||||
fsync_object_files = git_config_bool(var, value);
|
fsync_object_files = git_config_bool(var, value);
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -57,6 +57,7 @@ ifeq ($(uname_S),Linux)
|
|||||||
HAVE_CLOCK_MONOTONIC = YesPlease
|
HAVE_CLOCK_MONOTONIC = YesPlease
|
||||||
# -lrt is needed for clock_gettime on glibc <= 2.16
|
# -lrt is needed for clock_gettime on glibc <= 2.16
|
||||||
NEEDS_LIBRT = YesPlease
|
NEEDS_LIBRT = YesPlease
|
||||||
|
HAVE_SYNC_FILE_RANGE = YesPlease
|
||||||
HAVE_GETDELIM = YesPlease
|
HAVE_GETDELIM = YesPlease
|
||||||
FREAD_READS_DIRECTORIES = UnfortunatelyYes
|
FREAD_READS_DIRECTORIES = UnfortunatelyYes
|
||||||
BASIC_CFLAGS += -DHAVE_SYSINFO
|
BASIC_CFLAGS += -DHAVE_SYSINFO
|
||||||
@ -463,6 +464,7 @@ endif
|
|||||||
CFLAGS =
|
CFLAGS =
|
||||||
BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
|
BASIC_CFLAGS = -nologo -I. -Icompat/vcbuild/include -DWIN32 -D_CONSOLE -DHAVE_STRING_H -D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_DEPRECATE
|
||||||
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
|
COMPAT_OBJS = compat/msvc.o compat/winansi.o \
|
||||||
|
compat/win32/flush.o \
|
||||||
compat/win32/path-utils.o \
|
compat/win32/path-utils.o \
|
||||||
compat/win32/pthread.o compat/win32/syslog.o \
|
compat/win32/pthread.o compat/win32/syslog.o \
|
||||||
compat/win32/trace2_win32_process_info.o \
|
compat/win32/trace2_win32_process_info.o \
|
||||||
@ -640,6 +642,7 @@ ifeq ($(uname_S),MINGW)
|
|||||||
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
|
COMPAT_CFLAGS += -DSTRIP_EXTENSION=\".exe\"
|
||||||
COMPAT_OBJS += compat/mingw.o compat/winansi.o \
|
COMPAT_OBJS += compat/mingw.o compat/winansi.o \
|
||||||
compat/win32/trace2_win32_process_info.o \
|
compat/win32/trace2_win32_process_info.o \
|
||||||
|
compat/win32/flush.o \
|
||||||
compat/win32/path-utils.o \
|
compat/win32/path-utils.o \
|
||||||
compat/win32/pthread.o compat/win32/syslog.o \
|
compat/win32/pthread.o compat/win32/syslog.o \
|
||||||
compat/win32/dirent.o
|
compat/win32/dirent.o
|
||||||
|
@ -1082,6 +1082,14 @@ AC_COMPILE_IFELSE([CLOCK_MONOTONIC_SRC],
|
|||||||
[AC_MSG_RESULT([no])
|
[AC_MSG_RESULT([no])
|
||||||
HAVE_CLOCK_MONOTONIC=])
|
HAVE_CLOCK_MONOTONIC=])
|
||||||
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
|
GIT_CONF_SUBST([HAVE_CLOCK_MONOTONIC])
|
||||||
|
|
||||||
|
#
|
||||||
|
# Define HAVE_SYNC_FILE_RANGE=YesPlease if sync_file_range is available.
|
||||||
|
GIT_CHECK_FUNC(sync_file_range,
|
||||||
|
[HAVE_SYNC_FILE_RANGE=YesPlease],
|
||||||
|
[HAVE_SYNC_FILE_RANGE])
|
||||||
|
GIT_CONF_SUBST([HAVE_SYNC_FILE_RANGE])
|
||||||
|
|
||||||
#
|
#
|
||||||
# Define NO_SETITIMER if you don't have setitimer.
|
# Define NO_SETITIMER if you don't have setitimer.
|
||||||
GIT_CHECK_FUNC(setitimer,
|
GIT_CHECK_FUNC(setitimer,
|
||||||
|
@ -261,10 +261,18 @@ if(CMAKE_SYSTEM_NAME STREQUAL "Windows")
|
|||||||
NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
|
NOGDI OBJECT_CREATION_MODE=1 __USE_MINGW_ANSI_STDIO=0
|
||||||
USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
|
USE_NED_ALLOCATOR OVERRIDE_STRDUP MMAP_PREVENTS_DELETE USE_WIN32_MMAP
|
||||||
UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET HAVE_RTLGENRANDOM)
|
UNICODE _UNICODE HAVE_WPGMPTR ENSURE_MSYSTEM_IS_SET HAVE_RTLGENRANDOM)
|
||||||
list(APPEND compat_SOURCES compat/mingw.c compat/winansi.c compat/win32/path-utils.c
|
list(APPEND compat_SOURCES
|
||||||
compat/win32/pthread.c compat/win32mmap.c compat/win32/syslog.c
|
compat/mingw.c
|
||||||
compat/win32/trace2_win32_process_info.c compat/win32/dirent.c
|
compat/winansi.c
|
||||||
compat/nedmalloc/nedmalloc.c compat/strdup.c)
|
compat/win32/flush.c
|
||||||
|
compat/win32/path-utils.c
|
||||||
|
compat/win32/pthread.c
|
||||||
|
compat/win32mmap.c
|
||||||
|
compat/win32/syslog.c
|
||||||
|
compat/win32/trace2_win32_process_info.c
|
||||||
|
compat/win32/dirent.c
|
||||||
|
compat/nedmalloc/nedmalloc.c
|
||||||
|
compat/strdup.c)
|
||||||
set(NO_UNIX_SOCKETS 1)
|
set(NO_UNIX_SOCKETS 1)
|
||||||
|
|
||||||
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
|
||||||
|
@ -44,6 +44,7 @@ int zlib_compression_level = Z_BEST_SPEED;
|
|||||||
int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
int pack_compression_level = Z_DEFAULT_COMPRESSION;
|
||||||
int fsync_object_files;
|
int fsync_object_files;
|
||||||
int use_fsync = -1;
|
int use_fsync = -1;
|
||||||
|
enum fsync_method fsync_method = FSYNC_METHOD_DEFAULT;
|
||||||
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
|
size_t packed_git_window_size = DEFAULT_PACKED_GIT_WINDOW_SIZE;
|
||||||
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
|
size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
|
||||||
size_t delta_base_cache_limit = 96 * 1024 * 1024;
|
size_t delta_base_cache_limit = 96 * 1024 * 1024;
|
||||||
|
@ -1271,6 +1271,30 @@ __attribute__((format (printf, 1, 2))) NORETURN
|
|||||||
void BUG(const char *fmt, ...);
|
void BUG(const char *fmt, ...);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_WRITEOUT_ONLY
|
||||||
|
#else
|
||||||
|
#define FSYNC_METHOD_DEFAULT FSYNC_METHOD_FSYNC
|
||||||
|
#endif
|
||||||
|
|
||||||
|
enum fsync_action {
|
||||||
|
FSYNC_WRITEOUT_ONLY,
|
||||||
|
FSYNC_HARDWARE_FLUSH
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Issues an fsync against the specified file according to the specified mode.
|
||||||
|
*
|
||||||
|
* FSYNC_WRITEOUT_ONLY attempts to use interfaces available on some operating
|
||||||
|
* systems to flush the OS cache without issuing a flush command to the storage
|
||||||
|
* controller. If those interfaces are unavailable, the function fails with
|
||||||
|
* ENOSYS.
|
||||||
|
*
|
||||||
|
* FSYNC_HARDWARE_FLUSH does an OS writeout and hardware flush to ensure that
|
||||||
|
* changes are durable. It is not expected to fail.
|
||||||
|
*/
|
||||||
|
int git_fsync(int fd, enum fsync_action action);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Preserves errno, prints a message, but gives no warning for ENOENT.
|
* Preserves errno, prints a message, but gives no warning for ENOENT.
|
||||||
* Returns 0 on success, which includes trying to unlink an object that does
|
* Returns 0 on success, which includes trying to unlink an object that does
|
||||||
|
64
wrapper.c
64
wrapper.c
@ -546,6 +546,70 @@ int xmkstemp_mode(char *filename_template, int mode)
|
|||||||
return fd;
|
return fd;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Some platforms return EINTR from fsync. Since fsync is invoked in some
|
||||||
|
* cases by a wrapper that dies on failure, do not expose EINTR to callers.
|
||||||
|
*/
|
||||||
|
static int fsync_loop(int fd)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
do {
|
||||||
|
err = fsync(fd);
|
||||||
|
} while (err < 0 && errno == EINTR);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
int git_fsync(int fd, enum fsync_action action)
|
||||||
|
{
|
||||||
|
switch (action) {
|
||||||
|
case FSYNC_WRITEOUT_ONLY:
|
||||||
|
|
||||||
|
#ifdef __APPLE__
|
||||||
|
/*
|
||||||
|
* On macOS, fsync just causes filesystem cache writeback but
|
||||||
|
* does not flush hardware caches.
|
||||||
|
*/
|
||||||
|
return fsync_loop(fd);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_SYNC_FILE_RANGE
|
||||||
|
/*
|
||||||
|
* On linux 2.6.17 and above, sync_file_range is the way to
|
||||||
|
* issue a writeback without a hardware flush. An offset of
|
||||||
|
* 0 and size of 0 indicates writeout of the entire file and the
|
||||||
|
* wait flags ensure that all dirty data is written to the disk
|
||||||
|
* (potentially in a disk-side cache) before we continue.
|
||||||
|
*/
|
||||||
|
|
||||||
|
return sync_file_range(fd, 0, 0, SYNC_FILE_RANGE_WAIT_BEFORE |
|
||||||
|
SYNC_FILE_RANGE_WRITE |
|
||||||
|
SYNC_FILE_RANGE_WAIT_AFTER);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef fsync_no_flush
|
||||||
|
return fsync_no_flush(fd);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
errno = ENOSYS;
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
case FSYNC_HARDWARE_FLUSH:
|
||||||
|
/*
|
||||||
|
* On macOS, a special fcntl is required to really flush the
|
||||||
|
* caches within the storage controller. As of this writing,
|
||||||
|
* this is a very expensive operation on Apple SSDs.
|
||||||
|
*/
|
||||||
|
#ifdef __APPLE__
|
||||||
|
return fcntl(fd, F_FULLFSYNC);
|
||||||
|
#else
|
||||||
|
return fsync_loop(fd);
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
BUG("unexpected git_fsync(%d) call", action);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static int warn_if_unremovable(const char *op, const char *file, int rc)
|
static int warn_if_unremovable(const char *op, const char *file, int rc)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
@ -62,10 +62,13 @@ void fsync_or_die(int fd, const char *msg)
|
|||||||
use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
|
use_fsync = git_env_bool("GIT_TEST_FSYNC", 1);
|
||||||
if (!use_fsync)
|
if (!use_fsync)
|
||||||
return;
|
return;
|
||||||
while (fsync(fd) < 0) {
|
|
||||||
if (errno != EINTR)
|
if (fsync_method == FSYNC_METHOD_WRITEOUT_ONLY &&
|
||||||
die_errno("fsync error on '%s'", msg);
|
git_fsync(fd, FSYNC_WRITEOUT_ONLY) >= 0)
|
||||||
}
|
return;
|
||||||
|
|
||||||
|
if (git_fsync(fd, FSYNC_HARDWARE_FLUSH) < 0)
|
||||||
|
die_errno("fsync error on '%s'", msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void write_or_die(int fd, const void *buf, size_t count)
|
void write_or_die(int fd, const void *buf, size_t count)
|
||||||
|
Loading…
Reference in New Issue
Block a user