简介

这个项目使用 Python 的 Request 库,借助 API 直接从仓库中拉取镜像,并保存为 TAR 文件。此工具不需要借助 docker 客户端。

保存下来的 Tar 文件可以直接使用 docker load -i 命令进行载入。由于去掉了对 Docker/Podman/xxxx 等的依赖,在实际工作中,例如对于文件传输或者 CICD 流程来说,这个脚本都有可能发挥很有意思的作用。

用法也是相当的简单粗暴:python3 docker_pull.py [image name],就完成任务了。

安装

该项目同样是个开源项目,地址为:
https://github.com/NotGlop/docker-drag

import os
import sys
import gzip
from io import BytesIO
import json
import hashlib
import shutil
import requests
import tarfile
import urllib3
urllib3.disable_warnings()

if len(sys.argv) != 2 :
	print('Usage:\n\tdocker_pull.py [registry/][repository/]image[:tag|@digest]\n')
	exit(1)

# Look for the Docker image to download
repo = 'library'
tag = 'latest'
imgparts = sys.argv[1].split('/')
try:
    img,tag = imgparts[-1].split('@')
except ValueError:
	try:
	    img,tag = imgparts[-1].split(':')
	except ValueError:
		img = imgparts[-1]
# Docker client doesn't seem to consider the first element as a potential registry unless there is a '.' or ':'
if len(imgparts) > 1 and ('.' in imgparts[0] or ':' in imgparts[0]):
	registry = imgparts[0]
	repo = '/'.join(imgparts[1:-1])
else:
	registry = 'registry-1.docker.io'
	if len(imgparts[:-1]) != 0:
		repo = '/'.join(imgparts[:-1])
	else:
		repo = 'library'
repository = '{}/{}'.format(repo, img)

# Get Docker authentication endpoint when it is required
auth_url='https://auth.docker.io/token'
reg_service='registry.docker.io'
resp = requests.get('https://{}/v2/'.format(registry), verify=False)
if resp.status_code == 401:
	auth_url = resp.headers['WWW-Authenticate'].split('"')[1]
	try:
		reg_service = resp.headers['WWW-Authenticate'].split('"')[3]
	except IndexError:
		reg_service = ""

# Get Docker token (this function is useless for unauthenticated registries like Microsoft)
def get_auth_head(type):
	resp = requests.get('{}?service={}&scope=repository:{}:pull'.format(auth_url, reg_service, repository), verify=False)
	access_token = resp.json()['token']
	auth_head = {'Authorization':'Bearer '+ access_token, 'Accept': type}
	return auth_head

# Docker style progress bar
def progress_bar(ublob, nb_traits):
	sys.stdout.write('\r' + ublob[7:19] + ': Downloading [')
	for i in range(0, nb_traits):
		if i == nb_traits - 1:
			sys.stdout.write('>')
		else:
			sys.stdout.write('=')
	for i in range(0, 49 - nb_traits):
		sys.stdout.write(' ')
	sys.stdout.write(']')
	sys.stdout.flush()

# Fetch manifest v2 and get image layer digests
auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json')
resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False)
if (resp.status_code != 200):
	print('[-] Cannot fetch manifest for {} [HTTP {}]'.format(repository, resp.status_code))
	print(resp.content)
	auth_head = get_auth_head('application/vnd.docker.distribution.manifest.list.v2+json')
	resp = requests.get('https://{}/v2/{}/manifests/{}'.format(registry, repository, tag), headers=auth_head, verify=False)
	if (resp.status_code == 200):
		print('[+] Manifests found for this tag (use the @digest format to pull the corresponding image):')
		manifests = resp.json()['manifests']
		for manifest in manifests:
			for key, value in manifest["platform"].items():
				sys.stdout.write('{}: {}, '.format(key, value))
			print('digest: {}'.format(manifest["digest"]))
	exit(1)
layers = resp.json()['layers']

# Create tmp folder that will hold the image
imgdir = 'tmp_{}_{}'.format(img, tag.replace(':', '@'))
os.mkdir(imgdir)
print('Creating image structure in: ' + imgdir)

config = resp.json()['config']['digest']
confresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry, repository, config), headers=auth_head, verify=False)
file = open('{}/{}.json'.format(imgdir, config[7:]), 'wb')
file.write(confresp.content)
file.close()

content = [{
	'Config': config[7:] + '.json',
	'RepoTags': [ ],
	'Layers': [ ]
	}]
if len(imgparts[:-1]) != 0:
	content[0]['RepoTags'].append('/'.join(imgparts[:-1]) + '/' + img + ':' + tag)
else:
	content[0]['RepoTags'].append(img + ':' + tag)

empty_json = '{"created":"1970-01-01T00:00:00Z","container_config":{"Hostname":"","Domainname":"","User":"","AttachStdin":false, \
	"AttachStdout":false,"AttachStderr":false,"Tty":false,"OpenStdin":false, "StdinOnce":false,"Env":null,"Cmd":null,"Image":"", \
	"Volumes":null,"WorkingDir":"","Entrypoint":null,"OnBuild":null,"Labels":null}}'

# Build layer folders
parentid=''
for layer in layers:
	ublob = layer['digest']
	# FIXME: Creating fake layer ID. Don't know how Docker generates it
	fake_layerid = hashlib.sha256((parentid+'\n'+ublob+'\n').encode('utf-8')).hexdigest()
	layerdir = imgdir + '/' + fake_layerid
	os.mkdir(layerdir)

	# Creating VERSION file
	file = open(layerdir + '/VERSION', 'w')
	file.write('1.0')
	file.close()

	# Creating layer.tar file
	sys.stdout.write(ublob[7:19] + ': Downloading...')
	sys.stdout.flush()
	auth_head = get_auth_head('application/vnd.docker.distribution.manifest.v2+json') # refreshing token to avoid its expiration
	bresp = requests.get('https://{}/v2/{}/blobs/{}'.format(registry, repository, ublob), headers=auth_head, stream=True, verify=False)
	if (bresp.status_code != 200): # When the layer is located at a custom URL
		bresp = requests.get(layer['urls'][0], headers=auth_head, stream=True, verify=False)
		if (bresp.status_code != 200):
			print('\rERROR: Cannot download layer {} [HTTP {}]'.format(ublob[7:19], bresp.status_code, bresp.headers['Content-Length']))
			print(bresp.content)
			exit(1)
	# Stream download and follow the progress
	bresp.raise_for_status()
	unit = int(bresp.headers['Content-Length']) / 50
	acc = 0
	nb_traits = 0
	progress_bar(ublob, nb_traits)
	with open(layerdir + '/layer_gzip.tar', "wb") as file:
		for chunk in bresp.iter_content(chunk_size=8192): 
			if chunk:
				file.write(chunk)
				acc = acc + 8192
				if acc > unit:
					nb_traits = nb_traits + 1
					progress_bar(ublob, nb_traits)
					acc = 0
	sys.stdout.write("\r{}: Extracting...{}".format(ublob[7:19], " "*50)) # Ugly but works everywhere
	sys.stdout.flush()
	with open(layerdir + '/layer.tar', "wb") as file: # Decompress gzip response
		unzLayer = gzip.open(layerdir + '/layer_gzip.tar','rb')
		shutil.copyfileobj(unzLayer, file)
		unzLayer.close()
	os.remove(layerdir + '/layer_gzip.tar')
	print("\r{}: Pull complete [{}]".format(ublob[7:19], bresp.headers['Content-Length']))
	content[0]['Layers'].append(fake_layerid + '/layer.tar')
	
	# Creating json file
	file = open(layerdir + '/json', 'w')
	# last layer = config manifest - history - rootfs
	if layers[-1]['digest'] == layer['digest']:
		# FIXME: json.loads() automatically converts to unicode, thus decoding values whereas Docker doesn't
		json_obj = json.loads(confresp.content)
		del json_obj['history']
		try:
			del json_obj['rootfs']
		except: # Because Microsoft loves case insensitiveness
			del json_obj['rootfS']
	else: # other layers json are empty
		json_obj = json.loads(empty_json)
	json_obj['id'] = fake_layerid
	if parentid:
		json_obj['parent'] = parentid
	parentid = json_obj['id']
	file.write(json.dumps(json_obj))
	file.close()

file = open(imgdir + '/manifest.json', 'w')
file.write(json.dumps(content))
file.close()

if len(imgparts[:-1]) != 0:
	content = { '/'.join(imgparts[:-1]) + '/' + img : { tag : fake_layerid } }
else: # when pulling only an img (without repo and registry)
	content = { img : { tag : fake_layerid } }
file = open(imgdir + '/repositories', 'w')
file.write(json.dumps(content))
file.close()

# Create image tar and clean tmp folder
docker_tar = repo.replace('/', '_') + '_' + img + '.tar'
sys.stdout.write("Creating archive...")
sys.stdout.flush()
tar = tarfile.open(docker_tar, "w")
tar.add(imgdir, arcname=os.path.sep)
tar.close()
shutil.rmtree(imgdir)
print('\rDocker image pulled: ' + docker_tar)

使用

python docker_pull.py hello-world
python docker_pull.py mysql/mysql-server:8.0
python docker_pull.py mcr.microsoft.com/mssql-tools
python docker_pull.py consul@sha256:6ba4bfe1449ad8ac5a76cb29b6c3ff54489477a23786afb61ae30fb3b1ac0ae9

777661608da6f080703f11ea953cfd69978cb3bf.gif

$ python3 docker_pull.py nginx:alpine
Creating image structure in: tmp_nginx_alpine
89d9c30c1d48: Pull complete [2787134]
110ad692b782: Pull complete [5953615]
Docker image pulled: library_nginx.tar

$ ls library_nginx.tar
library_nginx.tar

或者放到镜像里:

FROM alpine:3.10.3
RUN apk add -u ca-certificates python3 \
    && pip3 install requests \
    && wget https://raw.githubusercontent.com/NotGlop/docker-drag/master/docker_pull.py