chunk-format: create read chunk API
Add the capability to read the table of contents, then pair the chunks
with necessary logic using read_chunk_fn pointers. Callers will be added
in future changes, but the typical outline will be:
 1. initialize a 'struct chunkfile' with init_chunkfile(NULL).
 2. call read_table_of_contents().
 3. for each chunk to parse,
    a. call pair_chunk() to assign a pointer with the chunk position, or
    b. call read_chunk() to run a callback on the chunk start and size.
 4. call free_chunkfile() to clear the 'struct chunkfile' data.
We are re-using the anonymous 'struct chunkfile' data, as it is internal
to the chunk-format API. This gives it essentially two modes: write and
read. If the same struct instance was used for both reads and writes,
then there would be failures.
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
			
			
This commit is contained in:
		
				
					committed by
					
						
						Junio C Hamano
					
				
			
			
				
	
			
			
			
						parent
						
							63a8f0e9b9
						
					
				
				
					commit
					5f0879f54b
				
			@ -11,6 +11,8 @@ struct chunk_info {
 | 
			
		||||
	uint32_t id;
 | 
			
		||||
	uint64_t size;
 | 
			
		||||
	chunk_write_fn write_fn;
 | 
			
		||||
 | 
			
		||||
	const void *start;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
struct chunkfile {
 | 
			
		||||
@ -88,3 +90,81 @@ int write_chunkfile(struct chunkfile *cf, void *data)
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int read_table_of_contents(struct chunkfile *cf,
 | 
			
		||||
			   const unsigned char *mfile,
 | 
			
		||||
			   size_t mfile_size,
 | 
			
		||||
			   uint64_t toc_offset,
 | 
			
		||||
			   int toc_length)
 | 
			
		||||
{
 | 
			
		||||
	uint32_t chunk_id;
 | 
			
		||||
	const unsigned char *table_of_contents = mfile + toc_offset;
 | 
			
		||||
 | 
			
		||||
	ALLOC_GROW(cf->chunks, toc_length, cf->chunks_alloc);
 | 
			
		||||
 | 
			
		||||
	while (toc_length--) {
 | 
			
		||||
		uint64_t chunk_offset, next_chunk_offset;
 | 
			
		||||
 | 
			
		||||
		chunk_id = get_be32(table_of_contents);
 | 
			
		||||
		chunk_offset = get_be64(table_of_contents + 4);
 | 
			
		||||
 | 
			
		||||
		if (!chunk_id) {
 | 
			
		||||
			error(_("terminating chunk id appears earlier than expected"));
 | 
			
		||||
			return 1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		table_of_contents += CHUNK_TOC_ENTRY_SIZE;
 | 
			
		||||
		next_chunk_offset = get_be64(table_of_contents + 4);
 | 
			
		||||
 | 
			
		||||
		if (next_chunk_offset < chunk_offset ||
 | 
			
		||||
		    next_chunk_offset > mfile_size - the_hash_algo->rawsz) {
 | 
			
		||||
			error(_("improper chunk offset(s) %"PRIx64" and %"PRIx64""),
 | 
			
		||||
			      chunk_offset, next_chunk_offset);
 | 
			
		||||
			return -1;
 | 
			
		||||
		}
 | 
			
		||||
 | 
			
		||||
		cf->chunks[cf->chunks_nr].id = chunk_id;
 | 
			
		||||
		cf->chunks[cf->chunks_nr].start = mfile + chunk_offset;
 | 
			
		||||
		cf->chunks[cf->chunks_nr].size = next_chunk_offset - chunk_offset;
 | 
			
		||||
		cf->chunks_nr++;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	chunk_id = get_be32(table_of_contents);
 | 
			
		||||
	if (chunk_id) {
 | 
			
		||||
		error(_("final chunk has non-zero id %"PRIx32""), chunk_id);
 | 
			
		||||
		return -1;
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int pair_chunk_fn(const unsigned char *chunk_start,
 | 
			
		||||
			 size_t chunk_size,
 | 
			
		||||
			 void *data)
 | 
			
		||||
{
 | 
			
		||||
	const unsigned char **p = data;
 | 
			
		||||
	*p = chunk_start;
 | 
			
		||||
	return 0;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int pair_chunk(struct chunkfile *cf,
 | 
			
		||||
	       uint32_t chunk_id,
 | 
			
		||||
	       const unsigned char **p)
 | 
			
		||||
{
 | 
			
		||||
	return read_chunk(cf, chunk_id, pair_chunk_fn, p);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int read_chunk(struct chunkfile *cf,
 | 
			
		||||
	       uint32_t chunk_id,
 | 
			
		||||
	       chunk_read_fn fn,
 | 
			
		||||
	       void *data)
 | 
			
		||||
{
 | 
			
		||||
	int i;
 | 
			
		||||
 | 
			
		||||
	for (i = 0; i < cf->chunks_nr; i++) {
 | 
			
		||||
		if (cf->chunks[i].id == chunk_id)
 | 
			
		||||
			return fn(cf->chunks[i].start, cf->chunks[i].size, data);
 | 
			
		||||
	}
 | 
			
		||||
 | 
			
		||||
	return CHUNK_NOT_FOUND;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -8,6 +8,20 @@ struct chunkfile;
 | 
			
		||||
 | 
			
		||||
#define CHUNK_TOC_ENTRY_SIZE (sizeof(uint32_t) + sizeof(uint64_t))
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Initialize a 'struct chunkfile' for writing _or_ reading a file
 | 
			
		||||
 * with the chunk format.
 | 
			
		||||
 *
 | 
			
		||||
 * If writing a file, supply a non-NULL 'struct hashfile *' that will
 | 
			
		||||
 * be used to write.
 | 
			
		||||
 *
 | 
			
		||||
 * If reading a file, use a NULL 'struct hashfile *' and then call
 | 
			
		||||
 * read_table_of_contents(). Supply the memory-mapped data to the
 | 
			
		||||
 * pair_chunk() or read_chunk() methods, as appropriate.
 | 
			
		||||
 *
 | 
			
		||||
 * DO NOT MIX THESE MODES. Use different 'struct chunkfile' instances
 | 
			
		||||
 * for reading and writing.
 | 
			
		||||
 */
 | 
			
		||||
struct chunkfile *init_chunkfile(struct hashfile *f);
 | 
			
		||||
void free_chunkfile(struct chunkfile *cf);
 | 
			
		||||
int get_num_chunks(struct chunkfile *cf);
 | 
			
		||||
@ -18,4 +32,37 @@ void add_chunk(struct chunkfile *cf,
 | 
			
		||||
	       chunk_write_fn fn);
 | 
			
		||||
int write_chunkfile(struct chunkfile *cf, void *data);
 | 
			
		||||
 | 
			
		||||
int read_table_of_contents(struct chunkfile *cf,
 | 
			
		||||
			   const unsigned char *mfile,
 | 
			
		||||
			   size_t mfile_size,
 | 
			
		||||
			   uint64_t toc_offset,
 | 
			
		||||
			   int toc_length);
 | 
			
		||||
 | 
			
		||||
#define CHUNK_NOT_FOUND (-2)
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Find 'chunk_id' in the given chunkfile and assign the
 | 
			
		||||
 * given pointer to the position in the mmap'd file where
 | 
			
		||||
 * that chunk begins.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns CHUNK_NOT_FOUND if the chunk does not exist.
 | 
			
		||||
 */
 | 
			
		||||
int pair_chunk(struct chunkfile *cf,
 | 
			
		||||
	       uint32_t chunk_id,
 | 
			
		||||
	       const unsigned char **p);
 | 
			
		||||
 | 
			
		||||
typedef int (*chunk_read_fn)(const unsigned char *chunk_start,
 | 
			
		||||
			     size_t chunk_size, void *data);
 | 
			
		||||
/*
 | 
			
		||||
 * Find 'chunk_id' in the given chunkfile and call the
 | 
			
		||||
 * given chunk_read_fn method with the information for
 | 
			
		||||
 * that chunk.
 | 
			
		||||
 *
 | 
			
		||||
 * Returns CHUNK_NOT_FOUND if the chunk does not exist.
 | 
			
		||||
 */
 | 
			
		||||
int read_chunk(struct chunkfile *cf,
 | 
			
		||||
	       uint32_t chunk_id,
 | 
			
		||||
	       chunk_read_fn fn,
 | 
			
		||||
	       void *data);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user