The function git_wcwidth() returns for a given unicode code point the width on the display: -1 for control characters, 0 for combining or other non-visible code points 1 for e.g. ASCII 2 for double-width code points. This table had been originally been extracted for one Unicode version, probably 3.2. We now use two tables these days, one for zero-width and another for double-width. Make it easier to update these tables to a later version of Unicode by factoring out the table from utf8.c into unicode_width.h and add the script update_unicode.sh to update the table based on the latest Unicode specification files. Thanks to Peter Krefting <peter@softwolves.pp.se> and Kevin Bracey <kevin@bracey.fi> for helping with their Unicode knowledge. Signed-off-by: Torsten Bögershausen <tboegi@web.de> Signed-off-by: Junio C Hamano <gitster@pobox.com>
		
			
				
	
	
		
			38 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			38 lines
		
	
	
		
			1.1 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
#!/bin/sh
 | 
						|
#See http://www.unicode.org/reports/tr44/
 | 
						|
#
 | 
						|
#Me Enclosing_Mark  an enclosing combining mark
 | 
						|
#Mn Nonspacing_Mark a nonspacing combining mark (zero advance width)
 | 
						|
#Cf Format          a format control character
 | 
						|
#
 | 
						|
UNICODEWIDTH_H=../unicode_width.h
 | 
						|
if ! test -d unicode; then
 | 
						|
	mkdir unicode
 | 
						|
fi &&
 | 
						|
( cd unicode &&
 | 
						|
	if ! test -f UnicodeData.txt; then
 | 
						|
		wget http://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt
 | 
						|
	fi &&
 | 
						|
	if ! test -f EastAsianWidth.txt; then
 | 
						|
		wget http://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt
 | 
						|
	fi &&
 | 
						|
	if ! test -d uniset; then
 | 
						|
		git clone https://github.com/depp/uniset.git
 | 
						|
	fi &&
 | 
						|
	(
 | 
						|
		cd uniset &&
 | 
						|
		if ! test -x uniset; then
 | 
						|
			autoreconf -i &&
 | 
						|
			./configure --enable-warnings=-Werror CFLAGS='-O0 -ggdb'
 | 
						|
		fi &&
 | 
						|
		make
 | 
						|
	) &&
 | 
						|
	echo "static const struct interval zero_width[] = {" >$UNICODEWIDTH_H &&
 | 
						|
	UNICODE_DIR=. ./uniset/uniset --32 cat:Me,Mn,Cf + U+1160..U+11FF - U+00AD |
 | 
						|
	grep -v plane >>$UNICODEWIDTH_H &&
 | 
						|
	echo "};" >>$UNICODEWIDTH_H &&
 | 
						|
	echo "static const struct interval double_width[] = {" >>$UNICODEWIDTH_H &&
 | 
						|
	UNICODE_DIR=. ./uniset/uniset --32 eaw:F,W >>$UNICODEWIDTH_H &&
 | 
						|
	echo "};" >>$UNICODEWIDTH_H
 | 
						|
)
 |