2013-04-16 23:57:39 -04:00
|
|
|
#include <toku_portability.h>
|
2013-04-16 23:57:30 -04:00
|
|
|
#include <stdio.h>
|
2013-04-16 23:58:56 -04:00
|
|
|
#include <toku_assert.h>
|
2013-04-16 23:57:30 -04:00
|
|
|
#include <stdint.h>
|
|
|
|
#include <unistd.h>
|
|
|
|
#include <windows.h>
|
2013-04-16 23:58:58 -04:00
|
|
|
#include <toku_atomic.h>
|
|
|
|
#include <toku_time.h>
|
2013-04-16 23:59:01 -04:00
|
|
|
#include <fcntl.h>
|
2013-04-16 23:57:30 -04:00
|
|
|
|
|
|
|
int64_t
|
|
|
|
pread(int fildes, void *buf, size_t nbyte, int64_t offset) {
|
2013-04-16 23:57:31 -04:00
|
|
|
HANDLE filehandle;
|
|
|
|
OVERLAPPED win_offset = {0};
|
|
|
|
filehandle = (HANDLE)_get_osfhandle(fildes);
|
|
|
|
int64_t r;
|
|
|
|
if (filehandle==INVALID_HANDLE_VALUE) {
|
|
|
|
r = errno; assert(r!=0);
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
win_offset.Offset = offset % (1LL<<32LL);
|
|
|
|
win_offset.OffsetHigh = offset / (1LL<<32LL);
|
|
|
|
|
|
|
|
DWORD bytes_read;
|
|
|
|
r = ReadFile(filehandle, buf, nbyte, &bytes_read, &win_offset);
|
|
|
|
if (!r) {
|
|
|
|
r = GetLastError();
|
|
|
|
if (r==ERROR_HANDLE_EOF) r = bytes_read;
|
2013-04-16 23:57:34 -04:00
|
|
|
else {
|
|
|
|
errno = r;
|
|
|
|
r = -1;
|
|
|
|
}
|
2013-04-16 23:57:31 -04:00
|
|
|
}
|
2013-04-16 23:57:31 -04:00
|
|
|
else r = bytes_read;
|
|
|
|
|
2013-04-16 23:57:30 -04:00
|
|
|
// printf("%s: %d %p %u %I64d %I64d\n", __FUNCTION__, fildes, buf, nbyte, offset, r); fflush(stdout);
|
2013-04-16 23:57:31 -04:00
|
|
|
cleanup:
|
2013-04-16 23:57:30 -04:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int64_t
|
|
|
|
pwrite(int fildes, const void *buf, size_t nbyte, int64_t offset) {
|
2013-04-16 23:57:31 -04:00
|
|
|
HANDLE filehandle;
|
|
|
|
OVERLAPPED win_offset = {0};
|
|
|
|
filehandle = (HANDLE)_get_osfhandle(fildes);
|
|
|
|
int64_t r;
|
|
|
|
if (filehandle==INVALID_HANDLE_VALUE) {
|
2013-04-16 23:58:56 -04:00
|
|
|
r = -1;
|
|
|
|
assert(errno!=0);
|
2013-04-16 23:57:31 -04:00
|
|
|
goto cleanup;
|
2013-04-16 23:57:31 -04:00
|
|
|
}
|
2013-04-16 23:57:31 -04:00
|
|
|
win_offset.Offset = offset % (1LL<<32LL);
|
|
|
|
win_offset.OffsetHigh = offset / (1LL<<32LL);
|
|
|
|
|
|
|
|
DWORD bytes_written;
|
|
|
|
r = WriteFile(filehandle, buf, nbyte, &bytes_written, &win_offset);
|
2013-04-16 23:57:34 -04:00
|
|
|
if (!r) {
|
|
|
|
errno = GetLastError();
|
2013-04-16 23:58:06 -04:00
|
|
|
if (errno == ERROR_HANDLE_DISK_FULL ||
|
|
|
|
errno == ERROR_DISK_FULL) {
|
2013-04-16 23:58:56 -04:00
|
|
|
errno = ENOSPC;
|
2013-04-16 23:58:06 -04:00
|
|
|
}
|
2013-04-16 23:57:34 -04:00
|
|
|
r = -1;
|
|
|
|
}
|
2013-04-16 23:57:31 -04:00
|
|
|
else r = bytes_written;
|
|
|
|
|
2013-04-16 23:57:30 -04:00
|
|
|
// printf("%s: %d %p %u %I64d %I64d\n", __FUNCTION__, fildes, buf, nbyte, offset, r); fflush(stdout);
|
2013-04-16 23:57:31 -04:00
|
|
|
cleanup:
|
2013-04-16 23:57:30 -04:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
fsync(int fd) {
|
|
|
|
int r = _commit(fd);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2013-04-16 23:59:02 -04:00
|
|
|
ftruncate(int fd, toku_off_t offset) {
|
2013-04-16 23:57:31 -04:00
|
|
|
int r = _chsize_s(fd, offset);
|
2013-04-16 23:59:02 -04:00
|
|
|
if (r!=0) {
|
|
|
|
r = -1;
|
|
|
|
assert(errno!=0);
|
|
|
|
}
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
truncate(const char *path, toku_off_t length) {
|
|
|
|
int r;
|
|
|
|
int saved_errno;
|
|
|
|
int fd = open(path, _O_BINARY|_O_RDWR, _S_IREAD|_S_IWRITE);
|
|
|
|
if (fd<0) {
|
|
|
|
r = -1;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
r = ftruncate(fd, length);
|
|
|
|
saved_errno = errno;
|
|
|
|
if (r!=0) {
|
|
|
|
r = -1;
|
|
|
|
assert(errno!=0);
|
|
|
|
}
|
|
|
|
int r2 = close(fd);
|
|
|
|
if (r==0) {
|
|
|
|
r = r2;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
errno = saved_errno;
|
|
|
|
}
|
|
|
|
done:
|
2013-04-16 23:57:31 -04:00
|
|
|
return r;
|
2013-04-16 23:57:30 -04:00
|
|
|
}
|
|
|
|
|
2013-04-16 23:57:42 -04:00
|
|
|
static ssize_t (*t_pwrite)(int, const void *, size_t, toku_off_t) = 0;
|
2013-04-16 23:57:39 -04:00
|
|
|
static ssize_t (*t_write)(int, const void *, size_t) = 0;
|
|
|
|
|
2013-04-16 23:57:42 -04:00
|
|
|
int toku_set_func_pwrite (ssize_t (*pwrite_fun)(int, const void *, size_t, toku_off_t)) {
|
2013-04-16 23:57:39 -04:00
|
|
|
t_pwrite = pwrite_fun;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
int toku_set_func_write (ssize_t (*write_fun)(int, const void *, size_t)) {
|
|
|
|
t_write = write_fun;
|
2013-04-16 23:57:39 -04:00
|
|
|
t_write = t_write; //So far unused
|
2013-04-16 23:57:39 -04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-16 23:58:58 -04:00
|
|
|
//Print any necessary errors
|
|
|
|
//Return whether we should try the write again.
|
|
|
|
static void
|
|
|
|
try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) {
|
|
|
|
int try_again = 0;
|
|
|
|
|
|
|
|
assert(r_write < 0);
|
|
|
|
int errno_write = errno;
|
|
|
|
assert(errno_write != 0);
|
|
|
|
switch (errno_write) {
|
|
|
|
case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7).
|
|
|
|
char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying.") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars
|
|
|
|
snprintf(err_msg, sizeof(err_msg), "Write of [%"PRIu64"] bytes to fd=[%d] interrupted. Retrying.", (uint64_t)len, fd);
|
|
|
|
perror(err_msg);
|
|
|
|
fflush(stderr);
|
|
|
|
try_again = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
case ENOSPC: {
|
|
|
|
char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars
|
|
|
|
snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", (uint64_t)len, fd);
|
|
|
|
perror(err_msg);
|
|
|
|
fflush(stderr);
|
|
|
|
int out_of_disk_space = 1;
|
|
|
|
assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives.
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
assert(try_again);
|
|
|
|
errno = errno_write;
|
|
|
|
}
|
2013-04-16 23:57:39 -04:00
|
|
|
|
2013-04-16 23:58:55 -04:00
|
|
|
void
|
2013-04-16 23:58:58 -04:00
|
|
|
toku_os_full_pwrite (int fd, const void *org_buf, size_t len, toku_off_t off)
|
|
|
|
{
|
|
|
|
const uint8_t *buf = org_buf;
|
|
|
|
while (len > 0) {
|
|
|
|
ssize_t r;
|
|
|
|
if (t_pwrite) {
|
|
|
|
r = t_pwrite(fd, buf, len, off);
|
|
|
|
} else {
|
|
|
|
r = pwrite(fd, buf, len, off);
|
|
|
|
}
|
|
|
|
if (r > 0) {
|
|
|
|
len -= r;
|
|
|
|
buf += r;
|
|
|
|
off += r;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
try_again_after_handling_write_error(fd, len, r);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(len == 0);
|
|
|
|
}
|
|
|
|
/*
|
2013-04-16 23:57:39 -04:00
|
|
|
{
|
2013-04-16 23:58:55 -04:00
|
|
|
ssize_t r;
|
2013-04-16 23:57:39 -04:00
|
|
|
if (t_pwrite) {
|
2013-04-16 23:58:55 -04:00
|
|
|
r = t_pwrite(fd, buf, len, off);
|
2013-04-16 23:57:39 -04:00
|
|
|
} else {
|
2013-04-16 23:58:55 -04:00
|
|
|
r = pwrite(fd, buf, len, off);
|
2013-04-16 23:57:39 -04:00
|
|
|
}
|
2013-04-16 23:58:56 -04:00
|
|
|
if (r==-1 && errno==ENOSPC) {
|
|
|
|
char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars
|
|
|
|
snprintf(err_msg, sizeof(err_msg), "Failed write of [%"PRIu64"] bytes to fd=[%d].", len, fd);
|
|
|
|
perror(err_msg);
|
|
|
|
fflush(stderr);
|
|
|
|
int out_of_disk_space = 1;
|
|
|
|
assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives.
|
|
|
|
}
|
2013-04-16 23:58:55 -04:00
|
|
|
assert(r==len);
|
2013-04-16 23:57:39 -04:00
|
|
|
}
|
2013-04-16 23:58:58 -04:00
|
|
|
*/
|
|
|
|
void
|
|
|
|
toku_os_full_write (int fd, const void *org_buf, size_t len) {
|
|
|
|
const uint8_t *buf = org_buf;
|
|
|
|
while (len > 0) {
|
|
|
|
ssize_t r;
|
|
|
|
if (t_write) {
|
|
|
|
r = t_write(fd, buf, len);
|
|
|
|
} else {
|
|
|
|
r = write(fd, buf, len);
|
|
|
|
}
|
|
|
|
if (r > 0) {
|
|
|
|
len -= r;
|
|
|
|
buf += r;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
try_again_after_handling_write_error(fd, len, r);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
assert(len == 0);
|
|
|
|
}
|
2013-04-16 23:57:39 -04:00
|
|
|
|
2013-04-16 23:59:01 -04:00
|
|
|
int
|
|
|
|
toku_os_write (int fd, const void *org_buf, size_t len) {
|
|
|
|
const uint8_t *buf = org_buf;
|
|
|
|
while (len > 0) {
|
|
|
|
ssize_t r;
|
|
|
|
if (t_write) {
|
|
|
|
r = t_write(fd, buf, len);
|
|
|
|
} else {
|
|
|
|
r = write(fd, buf, len);
|
|
|
|
}
|
|
|
|
if (r < 0)
|
|
|
|
return errno;
|
|
|
|
len -= r;
|
|
|
|
buf += r;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2013-04-16 23:58:58 -04:00
|
|
|
// t_fsync exists for testing purposes only
|
2013-04-16 23:58:52 -04:00
|
|
|
static int (*t_fsync)(int) = 0;
|
2013-04-16 23:58:52 -04:00
|
|
|
static uint64_t toku_fsync_count;
|
|
|
|
static uint64_t toku_fsync_time;
|
2013-04-16 23:58:52 -04:00
|
|
|
|
2013-04-16 23:58:59 -04:00
|
|
|
#if !TOKU_WINDOWS_HAS_ATOMIC_64
|
2013-04-16 23:58:58 -04:00
|
|
|
static toku_pthread_mutex_t fsync_lock;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
int
|
|
|
|
toku_fsync_init(void) {
|
|
|
|
int r = 0;
|
2013-04-16 23:58:59 -04:00
|
|
|
#if !TOKU_WINDOWS_HAS_ATOMIC_64
|
2013-04-16 23:58:58 -04:00
|
|
|
r = toku_pthread_mutex_init(&fsync_lock, NULL); assert(r == 0);
|
|
|
|
#endif
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
toku_fsync_destroy(void) {
|
|
|
|
int r = 0;
|
2013-04-16 23:58:59 -04:00
|
|
|
#if !TOKU_WINDOWS_HAS_ATOMIC_64
|
2013-04-16 23:58:58 -04:00
|
|
|
r = toku_pthread_mutex_destroy(&fsync_lock); assert(r == 0);
|
|
|
|
#endif
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
2013-04-16 23:58:52 -04:00
|
|
|
int
|
|
|
|
toku_set_func_fsync(int (*fsync_function)(int)) {
|
|
|
|
t_fsync = fsync_function;
|
|
|
|
return 0;
|
|
|
|
}
|
2013-04-16 23:58:58 -04:00
|
|
|
static uint64_t get_tnow(void) {
|
|
|
|
struct timeval tv;
|
|
|
|
int r = gettimeofday(&tv, NULL); assert(r == 0);
|
|
|
|
return tv.tv_sec * 1000000ULL + tv.tv_usec;
|
|
|
|
}
|
2013-04-16 23:58:52 -04:00
|
|
|
|
2013-04-16 23:58:58 -04:00
|
|
|
// keep trying if fsync fails because of EINTR
|
2013-04-16 23:58:52 -04:00
|
|
|
int
|
2013-04-16 23:59:02 -04:00
|
|
|
toku_file_fsync_without_accounting (int fd) {
|
2013-04-16 23:58:58 -04:00
|
|
|
int r = -1;
|
|
|
|
while (r != 0) {
|
|
|
|
if (t_fsync)
|
|
|
|
r = t_fsync(fd);
|
|
|
|
else
|
|
|
|
r = fsync(fd);
|
|
|
|
if (r)
|
|
|
|
assert(errno==EINTR);
|
|
|
|
}
|
2013-04-16 23:59:02 -04:00
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
toku_file_fsync(int fd) {
|
|
|
|
uint64_t tstart = get_tnow();
|
|
|
|
int r = toku_file_fsync_without_accounting(fd);
|
2013-04-16 23:58:59 -04:00
|
|
|
#if TOKU_WINDOWS_HAS_ATOMIC_64
|
2013-04-16 23:58:58 -04:00
|
|
|
toku_sync_fetch_and_increment_uint64(&toku_fsync_count);
|
|
|
|
toku_sync_fetch_and_add_uint64(&toku_fsync_time, get_tnow() - tstart);
|
|
|
|
#else
|
|
|
|
//These two need to be fully 64 bit and atomic.
|
|
|
|
//The windows atomic add 64 bit is not available.
|
|
|
|
//toku_sync_fetch_and_add_uint64 (and increment) treat it as 32 bit, and
|
|
|
|
//would overflow.
|
|
|
|
//Even on 32 bit machines, aligned 64 bit writes/writes are atomic, so we just
|
|
|
|
//need to make sure there's only one writer for these two variables.
|
|
|
|
//Protect with a mutex. Fsync is rare/slow enough that this should be ok.
|
|
|
|
int r_mutex;
|
|
|
|
r_mutex = toku_pthread_mutex_lock(&fsync_lock); assert(r_mutex == 0);
|
|
|
|
toku_fsync_count++;
|
|
|
|
toku_fsync_time += get_tnow() - tstart;
|
|
|
|
r_mutex = toku_pthread_mutex_unlock(&fsync_lock); assert(r_mutex == 0);
|
|
|
|
#endif
|
2013-04-16 23:58:52 -04:00
|
|
|
return r;
|
|
|
|
}
|
2013-04-16 23:58:52 -04:00
|
|
|
|
|
|
|
void
|
|
|
|
toku_get_fsync_times(uint64_t *fsync_count, uint64_t *fsync_time) {
|
|
|
|
*fsync_count = toku_fsync_count;
|
|
|
|
*fsync_time = toku_fsync_time;
|
|
|
|
}
|
|
|
|
|
2013-04-16 23:59:01 -04:00
|
|
|
static toku_pthread_mutex_t mkstemp_lock;
|
|
|
|
|
|
|
|
int
|
|
|
|
toku_mkstemp_init(void) {
|
|
|
|
int r = 0;
|
|
|
|
r = toku_pthread_mutex_init(&mkstemp_lock, NULL); assert(r == 0);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
toku_mkstemp_destroy(void) {
|
|
|
|
int r = 0;
|
|
|
|
r = toku_pthread_mutex_destroy(&mkstemp_lock); assert(r == 0);
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
|
|
|
|
int mkstemp (char * template) {
|
|
|
|
int fd;
|
|
|
|
int r_mutex;
|
|
|
|
r_mutex = toku_pthread_mutex_lock(&mkstemp_lock);
|
|
|
|
assert(r_mutex == 0);
|
|
|
|
errno_t err = _mktemp_s(template, strlen(template)+1);
|
|
|
|
if (err!=0) {
|
|
|
|
fd = -1;
|
|
|
|
errno = err;
|
|
|
|
goto cleanup;
|
|
|
|
}
|
|
|
|
assert(err==0);
|
|
|
|
fd = open(template, _O_BINARY|_O_CREAT|_O_SHORT_LIVED|_O_EXCL|_O_RDWR, _S_IREAD|_S_IWRITE);
|
|
|
|
cleanup:
|
|
|
|
r_mutex = toku_pthread_mutex_unlock(&mkstemp_lock);
|
|
|
|
assert(r_mutex == 0);
|
|
|
|
return fd;
|
|
|
|
}
|
|
|
|
|
2013-04-16 23:59:02 -04:00
|
|
|
toku_off_t
|
|
|
|
ftello(FILE *stream) {
|
|
|
|
toku_off_t offset = _ftelli64(stream);
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|