 5aec7231c8
			
		
	
	5aec7231c8
	
	
	
		
			
			Code to write out reftable has seen some optimization and simplification. * ps/reftable-write-optim: reftable/block: reuse compressed array reftable/block: reuse zstream when writing log blocks reftable/writer: reset `last_key` instead of releasing it reftable/writer: unify releasing memory reftable/writer: refactorings for `writer_flush_nonempty_block()` reftable/writer: refactorings for `writer_add_record()` refs/reftable: don't recompute committer ident reftable: remove name checks refs/reftable: skip duplicate name checks refs/reftable: perform explicit D/F check when writing symrefs refs/reftable: fix D/F conflict error message on ref copy
		
			
				
	
	
		
			552 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			552 lines
		
	
	
		
			14 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
| Copyright 2020 Google LLC
 | |
| 
 | |
| Use of this source code is governed by a BSD-style
 | |
| license that can be found in the LICENSE file or at
 | |
| https://developers.google.com/open-source/licenses/bsd
 | |
| */
 | |
| 
 | |
| #include "block.h"
 | |
| 
 | |
| #include "blocksource.h"
 | |
| #include "constants.h"
 | |
| #include "record.h"
 | |
| #include "reftable-error.h"
 | |
| #include "system.h"
 | |
| #include <zlib.h>
 | |
| 
 | |
| int header_size(int version)
 | |
| {
 | |
| 	switch (version) {
 | |
| 	case 1:
 | |
| 		return 24;
 | |
| 	case 2:
 | |
| 		return 28;
 | |
| 	}
 | |
| 	abort();
 | |
| }
 | |
| 
 | |
| int footer_size(int version)
 | |
| {
 | |
| 	switch (version) {
 | |
| 	case 1:
 | |
| 		return 68;
 | |
| 	case 2:
 | |
| 		return 72;
 | |
| 	}
 | |
| 	abort();
 | |
| }
 | |
| 
 | |
| static int block_writer_register_restart(struct block_writer *w, int n,
 | |
| 					 int is_restart, struct strbuf *key)
 | |
| {
 | |
| 	int rlen = w->restart_len;
 | |
| 	if (rlen >= MAX_RESTARTS) {
 | |
| 		is_restart = 0;
 | |
| 	}
 | |
| 
 | |
| 	if (is_restart) {
 | |
| 		rlen++;
 | |
| 	}
 | |
| 	if (2 + 3 * rlen + n > w->block_size - w->next)
 | |
| 		return -1;
 | |
| 	if (is_restart) {
 | |
| 		REFTABLE_ALLOC_GROW(w->restarts, w->restart_len + 1, w->restart_cap);
 | |
| 		w->restarts[w->restart_len++] = w->next;
 | |
| 	}
 | |
| 
 | |
| 	w->next += n;
 | |
| 
 | |
| 	strbuf_reset(&w->last_key);
 | |
| 	strbuf_addbuf(&w->last_key, key);
 | |
| 	w->entries++;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void block_writer_init(struct block_writer *bw, uint8_t typ, uint8_t *buf,
 | |
| 		       uint32_t block_size, uint32_t header_off, int hash_size)
 | |
| {
 | |
| 	bw->buf = buf;
 | |
| 	bw->hash_size = hash_size;
 | |
| 	bw->block_size = block_size;
 | |
| 	bw->header_off = header_off;
 | |
| 	bw->buf[header_off] = typ;
 | |
| 	bw->next = header_off + 4;
 | |
| 	bw->restart_interval = 16;
 | |
| 	bw->entries = 0;
 | |
| 	bw->restart_len = 0;
 | |
| 	bw->last_key.len = 0;
 | |
| 	if (!bw->zstream) {
 | |
| 		REFTABLE_CALLOC_ARRAY(bw->zstream, 1);
 | |
| 		deflateInit(bw->zstream, 9);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| uint8_t block_writer_type(struct block_writer *bw)
 | |
| {
 | |
| 	return bw->buf[bw->header_off];
 | |
| }
 | |
| 
 | |
| /* Adds the reftable_record to the block. Returns -1 if it does not fit, 0 on
 | |
|    success. Returns REFTABLE_API_ERROR if attempting to write a record with
 | |
|    empty key. */
 | |
| int block_writer_add(struct block_writer *w, struct reftable_record *rec)
 | |
| {
 | |
| 	struct strbuf empty = STRBUF_INIT;
 | |
| 	struct strbuf last =
 | |
| 		w->entries % w->restart_interval == 0 ? empty : w->last_key;
 | |
| 	struct string_view out = {
 | |
| 		.buf = w->buf + w->next,
 | |
| 		.len = w->block_size - w->next,
 | |
| 	};
 | |
| 
 | |
| 	struct string_view start = out;
 | |
| 
 | |
| 	int is_restart = 0;
 | |
| 	struct strbuf key = STRBUF_INIT;
 | |
| 	int n = 0;
 | |
| 	int err = -1;
 | |
| 
 | |
| 	reftable_record_key(rec, &key);
 | |
| 	if (!key.len) {
 | |
| 		err = REFTABLE_API_ERROR;
 | |
| 		goto done;
 | |
| 	}
 | |
| 
 | |
| 	n = reftable_encode_key(&is_restart, out, last, key,
 | |
| 				reftable_record_val_type(rec));
 | |
| 	if (n < 0)
 | |
| 		goto done;
 | |
| 	string_view_consume(&out, n);
 | |
| 
 | |
| 	n = reftable_record_encode(rec, out, w->hash_size);
 | |
| 	if (n < 0)
 | |
| 		goto done;
 | |
| 	string_view_consume(&out, n);
 | |
| 
 | |
| 	err = block_writer_register_restart(w, start.len - out.len, is_restart,
 | |
| 					    &key);
 | |
| done:
 | |
| 	strbuf_release(&key);
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| int block_writer_finish(struct block_writer *w)
 | |
| {
 | |
| 	int i;
 | |
| 	for (i = 0; i < w->restart_len; i++) {
 | |
| 		put_be24(w->buf + w->next, w->restarts[i]);
 | |
| 		w->next += 3;
 | |
| 	}
 | |
| 
 | |
| 	put_be16(w->buf + w->next, w->restart_len);
 | |
| 	w->next += 2;
 | |
| 	put_be24(w->buf + 1 + w->header_off, w->next);
 | |
| 
 | |
| 	/*
 | |
| 	 * Log records are stored zlib-compressed. Note that the compression
 | |
| 	 * also spans over the restart points we have just written.
 | |
| 	 */
 | |
| 	if (block_writer_type(w) == BLOCK_TYPE_LOG) {
 | |
| 		int block_header_skip = 4 + w->header_off;
 | |
| 		uLongf src_len = w->next - block_header_skip, compressed_len;
 | |
| 		int ret;
 | |
| 
 | |
| 		ret = deflateReset(w->zstream);
 | |
| 		if (ret != Z_OK)
 | |
| 			return REFTABLE_ZLIB_ERROR;
 | |
| 
 | |
| 		/*
 | |
| 		 * Precompute the upper bound of how many bytes the compressed
 | |
| 		 * data may end up with. Combined with `Z_FINISH`, `deflate()`
 | |
| 		 * is guaranteed to return `Z_STREAM_END`.
 | |
| 		 */
 | |
| 		compressed_len = deflateBound(w->zstream, src_len);
 | |
| 		REFTABLE_ALLOC_GROW(w->compressed, compressed_len, w->compressed_cap);
 | |
| 
 | |
| 		w->zstream->next_out = w->compressed;
 | |
| 		w->zstream->avail_out = compressed_len;
 | |
| 		w->zstream->next_in = w->buf + block_header_skip;
 | |
| 		w->zstream->avail_in = src_len;
 | |
| 
 | |
| 		/*
 | |
| 		 * We want to perform all decompression in a single step, which
 | |
| 		 * is why we can pass Z_FINISH here. As we have precomputed the
 | |
| 		 * deflated buffer's size via `deflateBound()` this function is
 | |
| 		 * guaranteed to succeed according to the zlib documentation.
 | |
| 		 */
 | |
| 		ret = deflate(w->zstream, Z_FINISH);
 | |
| 		if (ret != Z_STREAM_END)
 | |
| 			return REFTABLE_ZLIB_ERROR;
 | |
| 
 | |
| 		/*
 | |
| 		 * Overwrite the uncompressed data we have already written and
 | |
| 		 * adjust the `next` pointer to point right after the
 | |
| 		 * compressed data.
 | |
| 		 */
 | |
| 		memcpy(w->buf + block_header_skip, w->compressed,
 | |
| 		       w->zstream->total_out);
 | |
| 		w->next = w->zstream->total_out + block_header_skip;
 | |
| 	}
 | |
| 
 | |
| 	return w->next;
 | |
| }
 | |
| 
 | |
| int block_reader_init(struct block_reader *br, struct reftable_block *block,
 | |
| 		      uint32_t header_off, uint32_t table_block_size,
 | |
| 		      int hash_size)
 | |
| {
 | |
| 	uint32_t full_block_size = table_block_size;
 | |
| 	uint8_t typ = block->data[header_off];
 | |
| 	uint32_t sz = get_be24(block->data + header_off + 1);
 | |
| 	int err = 0;
 | |
| 	uint16_t restart_count = 0;
 | |
| 	uint32_t restart_start = 0;
 | |
| 	uint8_t *restart_bytes = NULL;
 | |
| 
 | |
| 	reftable_block_done(&br->block);
 | |
| 
 | |
| 	if (!reftable_is_block_type(typ)) {
 | |
| 		err =  REFTABLE_FORMAT_ERROR;
 | |
| 		goto done;
 | |
| 	}
 | |
| 
 | |
| 	if (typ == BLOCK_TYPE_LOG) {
 | |
| 		uint32_t block_header_skip = 4 + header_off;
 | |
| 		uLong dst_len = sz - block_header_skip;
 | |
| 		uLong src_len = block->len - block_header_skip;
 | |
| 
 | |
| 		/* Log blocks specify the *uncompressed* size in their header. */
 | |
| 		REFTABLE_ALLOC_GROW(br->uncompressed_data, sz,
 | |
| 				    br->uncompressed_cap);
 | |
| 
 | |
| 		/* Copy over the block header verbatim. It's not compressed. */
 | |
| 		memcpy(br->uncompressed_data, block->data, block_header_skip);
 | |
| 
 | |
| 		if (!br->zstream) {
 | |
| 			REFTABLE_CALLOC_ARRAY(br->zstream, 1);
 | |
| 			err = inflateInit(br->zstream);
 | |
| 		} else {
 | |
| 			err = inflateReset(br->zstream);
 | |
| 		}
 | |
| 		if (err != Z_OK) {
 | |
| 			err = REFTABLE_ZLIB_ERROR;
 | |
| 			goto done;
 | |
| 		}
 | |
| 
 | |
| 		br->zstream->next_in = block->data + block_header_skip;
 | |
| 		br->zstream->avail_in = src_len;
 | |
| 		br->zstream->next_out = br->uncompressed_data + block_header_skip;
 | |
| 		br->zstream->avail_out = dst_len;
 | |
| 
 | |
| 		/*
 | |
| 		 * We know both input as well as output size, and we know that
 | |
| 		 * the sizes should never be bigger than `uInt_MAX` because
 | |
| 		 * blocks can at most be 16MB large. We can thus use `Z_FINISH`
 | |
| 		 * here to instruct zlib to inflate the data in one go, which
 | |
| 		 * is more efficient than using `Z_NO_FLUSH`.
 | |
| 		 */
 | |
| 		err = inflate(br->zstream, Z_FINISH);
 | |
| 		if (err != Z_STREAM_END) {
 | |
| 			err = REFTABLE_ZLIB_ERROR;
 | |
| 			goto done;
 | |
| 		}
 | |
| 		err = 0;
 | |
| 
 | |
| 		if (br->zstream->total_out + block_header_skip != sz) {
 | |
| 			err = REFTABLE_FORMAT_ERROR;
 | |
| 			goto done;
 | |
| 		}
 | |
| 
 | |
| 		/* We're done with the input data. */
 | |
| 		reftable_block_done(block);
 | |
| 		block->data = br->uncompressed_data;
 | |
| 		block->len = sz;
 | |
| 		full_block_size = src_len + block_header_skip - br->zstream->avail_in;
 | |
| 	} else if (full_block_size == 0) {
 | |
| 		full_block_size = sz;
 | |
| 	} else if (sz < full_block_size && sz < block->len &&
 | |
| 		   block->data[sz] != 0) {
 | |
| 		/* If the block is smaller than the full block size, it is
 | |
| 		   padded (data followed by '\0') or the next block is
 | |
| 		   unaligned. */
 | |
| 		full_block_size = sz;
 | |
| 	}
 | |
| 
 | |
| 	restart_count = get_be16(block->data + sz - 2);
 | |
| 	restart_start = sz - 2 - 3 * restart_count;
 | |
| 	restart_bytes = block->data + restart_start;
 | |
| 
 | |
| 	/* transfer ownership. */
 | |
| 	br->block = *block;
 | |
| 	block->data = NULL;
 | |
| 	block->len = 0;
 | |
| 
 | |
| 	br->hash_size = hash_size;
 | |
| 	br->block_len = restart_start;
 | |
| 	br->full_block_size = full_block_size;
 | |
| 	br->header_off = header_off;
 | |
| 	br->restart_count = restart_count;
 | |
| 	br->restart_bytes = restart_bytes;
 | |
| 
 | |
| done:
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| void block_reader_release(struct block_reader *br)
 | |
| {
 | |
| 	inflateEnd(br->zstream);
 | |
| 	reftable_free(br->zstream);
 | |
| 	reftable_free(br->uncompressed_data);
 | |
| 	reftable_block_done(&br->block);
 | |
| }
 | |
| 
 | |
| uint8_t block_reader_type(const struct block_reader *r)
 | |
| {
 | |
| 	return r->block.data[r->header_off];
 | |
| }
 | |
| 
 | |
| int block_reader_first_key(const struct block_reader *br, struct strbuf *key)
 | |
| {
 | |
| 	int off = br->header_off + 4, n;
 | |
| 	struct string_view in = {
 | |
| 		.buf = br->block.data + off,
 | |
| 		.len = br->block_len - off,
 | |
| 	};
 | |
| 	uint8_t extra = 0;
 | |
| 
 | |
| 	strbuf_reset(key);
 | |
| 
 | |
| 	n = reftable_decode_key(key, &extra, in);
 | |
| 	if (n < 0)
 | |
| 		return n;
 | |
| 	if (!key->len)
 | |
| 		return REFTABLE_FORMAT_ERROR;
 | |
| 
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| static uint32_t block_reader_restart_offset(const struct block_reader *br, int i)
 | |
| {
 | |
| 	return get_be24(br->restart_bytes + 3 * i);
 | |
| }
 | |
| 
 | |
| void block_iter_seek_start(struct block_iter *it, const struct block_reader *br)
 | |
| {
 | |
| 	it->block = br->block.data;
 | |
| 	it->block_len = br->block_len;
 | |
| 	it->hash_size = br->hash_size;
 | |
| 	strbuf_reset(&it->last_key);
 | |
| 	it->next_off = br->header_off + 4;
 | |
| }
 | |
| 
 | |
| struct restart_needle_less_args {
 | |
| 	int error;
 | |
| 	struct strbuf needle;
 | |
| 	const struct block_reader *reader;
 | |
| };
 | |
| 
 | |
| static int restart_needle_less(size_t idx, void *_args)
 | |
| {
 | |
| 	struct restart_needle_less_args *args = _args;
 | |
| 	uint32_t off = block_reader_restart_offset(args->reader, idx);
 | |
| 	struct string_view in = {
 | |
| 		.buf = args->reader->block.data + off,
 | |
| 		.len = args->reader->block_len - off,
 | |
| 	};
 | |
| 	uint64_t prefix_len, suffix_len;
 | |
| 	uint8_t extra;
 | |
| 	int n;
 | |
| 
 | |
| 	/*
 | |
| 	 * Records at restart points are stored without prefix compression, so
 | |
| 	 * there is no need to fully decode the record key here. This removes
 | |
| 	 * the need for allocating memory.
 | |
| 	 */
 | |
| 	n = reftable_decode_keylen(in, &prefix_len, &suffix_len, &extra);
 | |
| 	if (n < 0 || prefix_len) {
 | |
| 		args->error = 1;
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	string_view_consume(&in, n);
 | |
| 	if (suffix_len > in.len) {
 | |
| 		args->error = 1;
 | |
| 		return -1;
 | |
| 	}
 | |
| 
 | |
| 	n = memcmp(args->needle.buf, in.buf,
 | |
| 		   args->needle.len < suffix_len ? args->needle.len : suffix_len);
 | |
| 	if (n)
 | |
| 		return n < 0;
 | |
| 	return args->needle.len < suffix_len;
 | |
| }
 | |
| 
 | |
| int block_iter_next(struct block_iter *it, struct reftable_record *rec)
 | |
| {
 | |
| 	struct string_view in = {
 | |
| 		.buf = (unsigned char *) it->block + it->next_off,
 | |
| 		.len = it->block_len - it->next_off,
 | |
| 	};
 | |
| 	struct string_view start = in;
 | |
| 	uint8_t extra = 0;
 | |
| 	int n = 0;
 | |
| 
 | |
| 	if (it->next_off >= it->block_len)
 | |
| 		return 1;
 | |
| 
 | |
| 	n = reftable_decode_key(&it->last_key, &extra, in);
 | |
| 	if (n < 0)
 | |
| 		return -1;
 | |
| 	if (!it->last_key.len)
 | |
| 		return REFTABLE_FORMAT_ERROR;
 | |
| 
 | |
| 	string_view_consume(&in, n);
 | |
| 	n = reftable_record_decode(rec, it->last_key, extra, in, it->hash_size,
 | |
| 				   &it->scratch);
 | |
| 	if (n < 0)
 | |
| 		return -1;
 | |
| 	string_view_consume(&in, n);
 | |
| 
 | |
| 	it->next_off += start.len - in.len;
 | |
| 	return 0;
 | |
| }
 | |
| 
 | |
| void block_iter_reset(struct block_iter *it)
 | |
| {
 | |
| 	strbuf_reset(&it->last_key);
 | |
| 	it->next_off = 0;
 | |
| 	it->block = NULL;
 | |
| 	it->block_len = 0;
 | |
| 	it->hash_size = 0;
 | |
| }
 | |
| 
 | |
| void block_iter_close(struct block_iter *it)
 | |
| {
 | |
| 	strbuf_release(&it->last_key);
 | |
| 	strbuf_release(&it->scratch);
 | |
| }
 | |
| 
 | |
| int block_iter_seek_key(struct block_iter *it, const struct block_reader *br,
 | |
| 			struct strbuf *want)
 | |
| {
 | |
| 	struct restart_needle_less_args args = {
 | |
| 		.needle = *want,
 | |
| 		.reader = br,
 | |
| 	};
 | |
| 	struct reftable_record rec;
 | |
| 	int err = 0;
 | |
| 	size_t i;
 | |
| 
 | |
| 	/*
 | |
| 	 * Perform a binary search over the block's restart points, which
 | |
| 	 * avoids doing a linear scan over the whole block. Like this, we
 | |
| 	 * identify the section of the block that should contain our key.
 | |
| 	 *
 | |
| 	 * Note that we explicitly search for the first restart point _greater_
 | |
| 	 * than the sought-after record, not _greater or equal_ to it. In case
 | |
| 	 * the sought-after record is located directly at the restart point we
 | |
| 	 * would otherwise start doing the linear search at the preceding
 | |
| 	 * restart point. While that works alright, we would end up scanning
 | |
| 	 * too many record.
 | |
| 	 */
 | |
| 	i = binsearch(br->restart_count, &restart_needle_less, &args);
 | |
| 	if (args.error) {
 | |
| 		err = REFTABLE_FORMAT_ERROR;
 | |
| 		goto done;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	 * Now there are multiple cases:
 | |
| 	 *
 | |
| 	 *   - `i == 0`: The wanted record is smaller than the record found at
 | |
| 	 *     the first restart point. As the first restart point is the first
 | |
| 	 *     record in the block, our wanted record cannot be located in this
 | |
| 	 *     block at all. We still need to position the iterator so that the
 | |
| 	 *     next call to `block_iter_next()` will yield an end-of-iterator
 | |
| 	 *     signal.
 | |
| 	 *
 | |
| 	 *   - `i == restart_count`: The wanted record was not found at any of
 | |
| 	 *     the restart points. As there is no restart point at the end of
 | |
| 	 *     the section the record may thus be contained in the last block.
 | |
| 	 *
 | |
| 	 *   - `i > 0`: The wanted record must be contained in the section
 | |
| 	 *     before the found restart point. We thus do a linear search
 | |
| 	 *     starting from the preceding restart point.
 | |
| 	 */
 | |
| 	if (i > 0)
 | |
| 		it->next_off = block_reader_restart_offset(br, i - 1);
 | |
| 	else
 | |
| 		it->next_off = br->header_off + 4;
 | |
| 	it->block = br->block.data;
 | |
| 	it->block_len = br->block_len;
 | |
| 	it->hash_size = br->hash_size;
 | |
| 
 | |
| 	reftable_record_init(&rec, block_reader_type(br));
 | |
| 
 | |
| 	/*
 | |
| 	 * We're looking for the last entry less than the wanted key so that
 | |
| 	 * the next call to `block_reader_next()` would yield the wanted
 | |
| 	 * record. We thus don't want to position our reader at the sought
 | |
| 	 * after record, but one before. To do so, we have to go one entry too
 | |
| 	 * far and then back up.
 | |
| 	 */
 | |
| 	while (1) {
 | |
| 		size_t prev_off = it->next_off;
 | |
| 
 | |
| 		err = block_iter_next(it, &rec);
 | |
| 		if (err < 0)
 | |
| 			goto done;
 | |
| 		if (err > 0) {
 | |
| 			it->next_off = prev_off;
 | |
| 			err = 0;
 | |
| 			goto done;
 | |
| 		}
 | |
| 
 | |
| 		/*
 | |
| 		 * Check whether the current key is greater or equal to the
 | |
| 		 * sought-after key. In case it is greater we know that the
 | |
| 		 * record does not exist in the block and can thus abort early.
 | |
| 		 * In case it is equal to the sought-after key we have found
 | |
| 		 * the desired record.
 | |
| 		 *
 | |
| 		 * Note that we store the next record's key record directly in
 | |
| 		 * `last_key` without restoring the key of the preceding record
 | |
| 		 * in case we need to go one record back. This is safe to do as
 | |
| 		 * `block_iter_next()` would return the ref whose key is equal
 | |
| 		 * to `last_key` now, and naturally all keys share a prefix
 | |
| 		 * with themselves.
 | |
| 		 */
 | |
| 		reftable_record_key(&rec, &it->last_key);
 | |
| 		if (strbuf_cmp(&it->last_key, want) >= 0) {
 | |
| 			it->next_off = prev_off;
 | |
| 			goto done;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| done:
 | |
| 	reftable_record_release(&rec);
 | |
| 	return err;
 | |
| }
 | |
| 
 | |
| void block_writer_release(struct block_writer *bw)
 | |
| {
 | |
| 	deflateEnd(bw->zstream);
 | |
| 	FREE_AND_NULL(bw->zstream);
 | |
| 	FREE_AND_NULL(bw->restarts);
 | |
| 	FREE_AND_NULL(bw->compressed);
 | |
| 	strbuf_release(&bw->last_key);
 | |
| 	/* the block is not owned. */
 | |
| }
 | |
| 
 | |
| void reftable_block_done(struct reftable_block *blockp)
 | |
| {
 | |
| 	struct reftable_block_source source = blockp->source;
 | |
| 	if (blockp && source.ops)
 | |
| 		source.ops->return_block(source.arg, blockp);
 | |
| 	blockp->data = NULL;
 | |
| 	blockp->len = 0;
 | |
| 	blockp->source.ops = NULL;
 | |
| 	blockp->source.arg = NULL;
 | |
| }
 |