Merge branch 'ls/p4-lfs'

Teach "git p4" to send large blobs outside the repository by
talking to Git LFS.

* ls/p4-lfs:
  git-p4: add Git LFS backend for large file system
  git-p4: add support for large file systems
  git-p4: check free space during streaming
  git-p4: add file streaming progress in verbose mode
  git-p4: return an empty list if a list config has no values
  git-p4: add gitConfigInt reader
  git-p4: add optional type specifier to gitConfig reader
This commit is contained in:
Junio C Hamano
2015-10-15 15:43:52 -07:00
4 changed files with 766 additions and 16 deletions

270
git-p4.py
View File

@ -22,6 +22,8 @@ import platform
import re
import shutil
import stat
import zipfile
import zlib
try:
from subprocess import CalledProcessError
@ -104,6 +106,16 @@ def chdir(path, is_client_path=False):
path = os.getcwd()
os.environ['PWD'] = path
def calcDiskFree():
"""Return free space in bytes on the disk of the given dirname."""
if platform.system() == 'Windows':
free_bytes = ctypes.c_ulonglong(0)
ctypes.windll.kernel32.GetDiskFreeSpaceExW(ctypes.c_wchar_p(os.getcwd()), None, None, ctypes.pointer(free_bytes))
return free_bytes.value
else:
st = os.statvfs(os.getcwd())
return st.f_bavail * st.f_frsize
def die(msg):
if verbose:
raise Exception(msg)
@ -602,9 +614,12 @@ def gitBranchExists(branch):
_gitConfig = {}
def gitConfig(key):
def gitConfig(key, typeSpecifier=None):
if not _gitConfig.has_key(key):
cmd = [ "git", "config", key ]
cmd = [ "git", "config" ]
if typeSpecifier:
cmd += [ typeSpecifier ]
cmd += [ key ]
s = read_pipe(cmd, ignore_error=True)
_gitConfig[key] = s.strip()
return _gitConfig[key]
@ -615,16 +630,26 @@ def gitConfigBool(key):
in the config."""
if not _gitConfig.has_key(key):
cmd = [ "git", "config", "--bool", key ]
_gitConfig[key] = gitConfig(key, '--bool') == "true"
return _gitConfig[key]
def gitConfigInt(key):
if not _gitConfig.has_key(key):
cmd = [ "git", "config", "--int", key ]
s = read_pipe(cmd, ignore_error=True)
v = s.strip()
_gitConfig[key] = v == "true"
try:
_gitConfig[key] = int(gitConfig(key, '--int'))
except ValueError:
_gitConfig[key] = None
return _gitConfig[key]
def gitConfigList(key):
if not _gitConfig.has_key(key):
s = read_pipe(["git", "config", "--get-all", key], ignore_error=True)
_gitConfig[key] = s.strip().split(os.linesep)
if _gitConfig[key] == ['']:
_gitConfig[key] = []
return _gitConfig[key]
def p4BranchesInGit(branchesAreInRemotes=True):
@ -907,6 +932,182 @@ def wildcard_present(path):
m = re.search("[*#@%]", path)
return m is not None
class LargeFileSystem(object):
"""Base class for large file system support."""
def __init__(self, writeToGitStream):
self.largeFiles = set()
self.writeToGitStream = writeToGitStream
def generatePointer(self, cloneDestination, contentFile):
"""Return the content of a pointer file that is stored in Git instead of
the actual content."""
assert False, "Method 'generatePointer' required in " + self.__class__.__name__
def pushFile(self, localLargeFile):
"""Push the actual content which is not stored in the Git repository to
a server."""
assert False, "Method 'pushFile' required in " + self.__class__.__name__
def hasLargeFileExtension(self, relPath):
return reduce(
lambda a, b: a or b,
[relPath.endswith('.' + e) for e in gitConfigList('git-p4.largeFileExtensions')],
False
)
def generateTempFile(self, contents):
contentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False)
for d in contents:
contentFile.write(d)
contentFile.close()
return contentFile.name
def exceedsLargeFileThreshold(self, relPath, contents):
if gitConfigInt('git-p4.largeFileThreshold'):
contentsSize = sum(len(d) for d in contents)
if contentsSize > gitConfigInt('git-p4.largeFileThreshold'):
return True
if gitConfigInt('git-p4.largeFileCompressedThreshold'):
contentsSize = sum(len(d) for d in contents)
if contentsSize <= gitConfigInt('git-p4.largeFileCompressedThreshold'):
return False
contentTempFile = self.generateTempFile(contents)
compressedContentFile = tempfile.NamedTemporaryFile(prefix='git-p4-large-file', delete=False)
zf = zipfile.ZipFile(compressedContentFile.name, mode='w')
zf.write(contentTempFile, compress_type=zipfile.ZIP_DEFLATED)
zf.close()
compressedContentsSize = zf.infolist()[0].compress_size
os.remove(contentTempFile)
os.remove(compressedContentFile.name)
if compressedContentsSize > gitConfigInt('git-p4.largeFileCompressedThreshold'):
return True
return False
def addLargeFile(self, relPath):
self.largeFiles.add(relPath)
def removeLargeFile(self, relPath):
self.largeFiles.remove(relPath)
def isLargeFile(self, relPath):
return relPath in self.largeFiles
def processContent(self, git_mode, relPath, contents):
"""Processes the content of git fast import. This method decides if a
file is stored in the large file system and handles all necessary
steps."""
if self.exceedsLargeFileThreshold(relPath, contents) or self.hasLargeFileExtension(relPath):
contentTempFile = self.generateTempFile(contents)
(git_mode, contents, localLargeFile) = self.generatePointer(contentTempFile)
# Move temp file to final location in large file system
largeFileDir = os.path.dirname(localLargeFile)
if not os.path.isdir(largeFileDir):
os.makedirs(largeFileDir)
shutil.move(contentTempFile, localLargeFile)
self.addLargeFile(relPath)
if gitConfigBool('git-p4.largeFilePush'):
self.pushFile(localLargeFile)
if verbose:
sys.stderr.write("%s moved to large file system (%s)\n" % (relPath, localLargeFile))
return (git_mode, contents)
class MockLFS(LargeFileSystem):
"""Mock large file system for testing."""
def generatePointer(self, contentFile):
"""The pointer content is the original content prefixed with "pointer-".
The local filename of the large file storage is derived from the file content.
"""
with open(contentFile, 'r') as f:
content = next(f)
gitMode = '100644'
pointerContents = 'pointer-' + content
localLargeFile = os.path.join(os.getcwd(), '.git', 'mock-storage', 'local', content[:-1])
return (gitMode, pointerContents, localLargeFile)
def pushFile(self, localLargeFile):
"""The remote filename of the large file storage is the same as the local
one but in a different directory.
"""
remotePath = os.path.join(os.path.dirname(localLargeFile), '..', 'remote')
if not os.path.exists(remotePath):
os.makedirs(remotePath)
shutil.copyfile(localLargeFile, os.path.join(remotePath, os.path.basename(localLargeFile)))
class GitLFS(LargeFileSystem):
"""Git LFS as backend for the git-p4 large file system.
See https://git-lfs.github.com/ for details."""
def __init__(self, *args):
LargeFileSystem.__init__(self, *args)
self.baseGitAttributes = []
def generatePointer(self, contentFile):
"""Generate a Git LFS pointer for the content. Return LFS Pointer file
mode and content which is stored in the Git repository instead of
the actual content. Return also the new location of the actual
content.
"""
pointerProcess = subprocess.Popen(
['git', 'lfs', 'pointer', '--file=' + contentFile],
stdout=subprocess.PIPE
)
pointerFile = pointerProcess.stdout.read()
if pointerProcess.wait():
os.remove(contentFile)
die('git-lfs pointer command failed. Did you install the extension?')
pointerContents = [i+'\n' for i in pointerFile.split('\n')[2:][:-1]]
oid = pointerContents[1].split(' ')[1].split(':')[1][:-1]
localLargeFile = os.path.join(
os.getcwd(),
'.git', 'lfs', 'objects', oid[:2], oid[2:4],
oid,
)
# LFS Spec states that pointer files should not have the executable bit set.
gitMode = '100644'
return (gitMode, pointerContents, localLargeFile)
def pushFile(self, localLargeFile):
uploadProcess = subprocess.Popen(
['git', 'lfs', 'push', '--object-id', 'origin', os.path.basename(localLargeFile)]
)
if uploadProcess.wait():
die('git-lfs push command failed. Did you define a remote?')
def generateGitAttributes(self):
return (
self.baseGitAttributes +
[
'\n',
'#\n',
'# Git LFS (see https://git-lfs.github.com/)\n',
'#\n',
] +
['*.' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n'
for f in sorted(gitConfigList('git-p4.largeFileExtensions'))
] +
['/' + f.replace(' ', '[[:space:]]') + ' filter=lfs -text\n'
for f in sorted(self.largeFiles) if not self.hasLargeFileExtension(f)
]
)
def addLargeFile(self, relPath):
LargeFileSystem.addLargeFile(self, relPath)
self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes())
def removeLargeFile(self, relPath):
LargeFileSystem.removeLargeFile(self, relPath)
self.writeToGitStream('100644', '.gitattributes', self.generateGitAttributes())
def processContent(self, git_mode, relPath, contents):
if relPath == '.gitattributes':
self.baseGitAttributes = contents
return (git_mode, self.generateGitAttributes())
else:
return LargeFileSystem.processContent(self, git_mode, relPath, contents)
class Command:
def __init__(self):
self.usage = "usage: %prog [options]"
@ -1080,6 +1281,9 @@ class P4Submit(Command, P4UserMap):
self.p4HasMoveCommand = p4_has_move_command()
self.branch = None
if gitConfig('git-p4.largeFileSystem'):
die("Large file system not supported for git-p4 submit command. Please remove it from config.")
def check(self):
if len(p4CmdList("opened ...")) > 0:
die("You have files opened with perforce! Close them before starting the sync.")
@ -2030,6 +2234,13 @@ class P4Sync(Command, P4UserMap):
self.clientSpecDirs = None
self.tempBranches = []
self.tempBranchLocation = "git-p4-tmp"
self.largeFileSystem = None
if gitConfig('git-p4.largeFileSystem'):
largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')]
self.largeFileSystem = largeFileSystemConstructor(
lambda git_mode, relPath, contents: self.writeToGitStream(git_mode, relPath, contents)
)
if gitConfig("git-p4.syncFromOrigin") == "false":
self.syncWithOrigin = False
@ -2150,13 +2361,22 @@ class P4Sync(Command, P4UserMap):
return branches
def writeToGitStream(self, gitMode, relPath, contents):
self.gitStream.write('M %s inline %s\n' % (gitMode, relPath))
self.gitStream.write('data %d\n' % sum(len(d) for d in contents))
for d in contents:
self.gitStream.write(d)
self.gitStream.write('\n')
# output one file from the P4 stream
# - helper for streamP4Files
def streamOneP4File(self, file, contents):
relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes)
if verbose:
sys.stderr.write("%s\n" % relPath)
size = int(self.stream_file['fileSize'])
sys.stdout.write('\r%s --> %s (%i MB)\n' % (file['depotFile'], relPath, size/1024/1024))
sys.stdout.flush()
(type_base, type_mods) = split_p4_type(file["type"])
@ -2235,24 +2455,21 @@ class P4Sync(Command, P4UserMap):
if self.verbose:
print 'Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, relPath)
self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
if self.largeFileSystem:
(git_mode, contents) = self.largeFileSystem.processContent(git_mode, relPath, contents)
# total length...
length = 0
for d in contents:
length = length + len(d)
self.gitStream.write("data %d\n" % length)
for d in contents:
self.gitStream.write(d)
self.gitStream.write("\n")
self.writeToGitStream(git_mode, relPath, contents)
def streamOneP4Deletion(self, file):
relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
if verbose:
sys.stderr.write("delete %s\n" % relPath)
sys.stdout.write("delete %s\n" % relPath)
sys.stdout.flush()
self.gitStream.write("D %s\n" % relPath)
if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath):
self.largeFileSystem.removeLargeFile(relPath)
# handle another chunk of streaming data
def streamP4FilesCb(self, marshalled):
@ -2262,6 +2479,14 @@ class P4Sync(Command, P4UserMap):
if marshalled["code"] == "error":
if "data" in marshalled:
err = marshalled["data"].rstrip()
if not err and 'fileSize' in self.stream_file:
required_bytes = int((4 * int(self.stream_file["fileSize"])) - calcDiskFree())
if required_bytes > 0:
err = 'Not enough space left on %s! Free at least %i MB.' % (
os.getcwd(), required_bytes/1024/1024
)
if err:
f = None
if self.stream_have_file_info:
@ -2290,10 +2515,23 @@ class P4Sync(Command, P4UserMap):
# 'data' field we need to append to our array
for k in marshalled.keys():
if k == 'data':
if 'streamContentSize' not in self.stream_file:
self.stream_file['streamContentSize'] = 0
self.stream_file['streamContentSize'] += len(marshalled['data'])
self.stream_contents.append(marshalled['data'])
else:
self.stream_file[k] = marshalled[k]
if (verbose and
'streamContentSize' in self.stream_file and
'fileSize' in self.stream_file and
'depotFile' in self.stream_file):
size = int(self.stream_file["fileSize"])
if size > 0:
progress = 100*self.stream_file['streamContentSize']/size
sys.stdout.write('\r%s %d%% (%i MB)' % (self.stream_file['depotFile'], progress, int(size/1024/1024)))
sys.stdout.flush()
self.stream_have_file_info = True
# Stream directly from "p4 files" into "git fast-import"