如何创建目录的zip存档?

如何在Python中创建目录结构的zip存档?

643635 次浏览

您可能想看看zipfile模块;在http://docs.python.org/library/zipfile.html有留档。

您可能还需要os.walk()来索引目录结构。

正如其他人所指出的,您应该使用zipfile。留档告诉您哪些功能可用,但并没有真正解释如何使用它们来压缩整个目录。我认为用一些示例代码来解释最简单:

import osimport zipfile    
def zipdir(path, ziph):# ziph is zipfile handlefor root, dirs, files in os.walk(path):for file in files:ziph.write(os.path.join(root, file),os.path.relpath(os.path.join(root, file),os.path.join(path, '..')))
with zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:zipdir('tmp/', zipf)

要将mydirectory的内容添加到新的zip文件中,包括所有文件和子目录:

import osimport zipfile
zf = zipfile.ZipFile("myzipfile.zip", "w")for dirname, subdirs, files in os.walk("mydirectory"):zf.write(dirname)for filename in files:zf.write(os.path.join(dirname, filename))zf.close()

要将压缩添加到生成的zip文件中,请查看此链接

你需要改变:

zip = zipfile.ZipFile('Python.zip', 'w')

zip = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)

我对代码由Mark Byers提供做了一些修改。如果你有空目录,下面的函数也会添加它们。示例应该更清楚添加到zip中的路径是什么。

#!/usr/bin/env pythonimport osimport zipfile
def addDirToZip(zipHandle, path, basePath=""):"""Adding directory given by \a path to opened zip file \a zipHandle
@param basePath path that will be removed from \a path when adding to archive
Examples:# add whole "dir" to "test.zip" (when you open "test.zip" you will see only "dir")zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir')zipHandle.close()
# add contents of "dir" to "test.zip" (when you open "test.zip" you will see only it's contents)zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir', 'dir')zipHandle.close()
# add contents of "dir/subdir" to "test.zip" (when you open "test.zip" you will see only contents of "subdir")zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir/subdir', 'dir/subdir')zipHandle.close()
# add whole "dir/subdir" to "test.zip" (when you open "test.zip" you will see only "subdir")zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir/subdir', 'dir')zipHandle.close()
# add whole "dir/subdir" with full path to "test.zip" (when you open "test.zip" you will see only "dir" and inside it only "subdir")zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir/subdir')zipHandle.close()
# add whole "dir" and "otherDir" (with full path) to "test.zip" (when you open "test.zip" you will see only "dir" and "otherDir")zipHandle = zipfile.ZipFile('test.zip', 'w')addDirToZip(zipHandle, 'dir')addDirToZip(zipHandle, 'otherDir')zipHandle.close()"""basePath = basePath.rstrip("\\/") + ""basePath = basePath.rstrip("\\/")for root, dirs, files in os.walk(path):# add dir itself (needed for empty dirszipHandle.write(os.path.join(root, "."))# add filesfor file in files:filePath = os.path.join(root, file)inZipPath = filePath.replace(basePath, "", 1).lstrip("\\/")#print filePath + " , " + inZipPathzipHandle.write(filePath, inZipPath)

上面是一个简单的函数,应该适用于简单的情况。你可以在我的Gist中找到更优雅的类:https://gist.github.com/Eccenux/17526123107ca0ac28e6

此函数将递归地压缩目录树,压缩文件,并在存档中记录正确的相对文件名。存档条目与zip -r output.zip source_dir生成的条目相同。

import osimport zipfiledef make_zipfile(output_filename, source_dir):relroot = os.path.abspath(os.path.join(source_dir, os.pardir))with zipfile.ZipFile(output_filename, "w", zipfile.ZIP_DEFLATED) as zip:for root, dirs, files in os.walk(source_dir):# add directory (needed for empty dirs)zip.write(root, os.path.relpath(root, relroot))for file in files:filename = os.path.join(root, file)if os.path.isfile(filename): # regular files onlyarcname = os.path.join(os.path.relpath(root, relroot), file)zip.write(filename, arcname)

以下是Nux给出的答案的变体,对我有效:

def WriteDirectoryToZipFile( zipHandle, srcPath, zipLocalPath = "", zipOperation = zipfile.ZIP_DEFLATED ):basePath = os.path.split( srcPath )[ 0 ]for root, dirs, files in os.walk( srcPath ):p = os.path.join( zipLocalPath, root [ ( len( basePath ) + 1 ) : ] )# add dirzipHandle.write( root, p, zipOperation )# add filesfor f in files:filePath = os.path.join( root, f )fileInZipPath = os.path.join( p, f )zipHandle.write( filePath, fileInZipPath, zipOperation )

最简单的方法是使用shutil.make_archive。它支持zip和tar格式。

import shutilshutil.make_archive(output_filename, 'zip', dir_name)

如果您需要做一些比压缩整个目录更复杂的事情(例如跳过某些文件),那么您需要按照其他人的建议深入研究zipfile模块。

试试下面的,对我有用

import zipfile, oszipf = "compress.zip"def main():directory = r"Filepath"toZip(directory)def toZip(directory):zippedHelp = zipfile.ZipFile(zipf, "w", compression=zipfile.ZIP_DEFLATED )
list = os.listdir(directory)for file_list in list:file_name = os.path.join(directory,file_list)
if os.path.isfile(file_name):print file_namezippedHelp.write(file_name)else:addFolderToZip(zippedHelp,file_list,directory)print "---------------Directory Found-----------------------"zippedHelp.close()
def addFolderToZip(zippedHelp,folder,directory):path=os.path.join(directory,folder)print pathfile_list=os.listdir(path)for file_name in file_list:file_path=os.path.join(path,file_name)if os.path.isfile(file_path):zippedHelp.write(file_path)elif os.path.isdir(file_name):print "------------------sub directory found--------------------"addFolderToZip(zippedHelp,file_name,path)

if __name__=="__main__":main()

我有另一个代码示例可能会有所帮助,使用python3、path lib和zipfile。它可以在任何操作系统中运行。

from pathlib import Pathimport zipfilefrom datetime import datetime
DATE_FORMAT = '%y%m%d'

def date_str():"""returns the today string year, month, day"""return '{}'.format(datetime.now().strftime(DATE_FORMAT))

def zip_name(path):"""returns the zip filename as string"""cur_dir = Path(path).resolve()parent_dir = cur_dir.parents[0]zip_filename = '{}/{}_{}.zip'.format(parent_dir, cur_dir.name, date_str())p_zip = Path(zip_filename)n = 1while p_zip.exists():zip_filename = ('{}/{}_{}_{}.zip'.format(parent_dir, cur_dir.name,date_str(), n))p_zip = Path(zip_filename)n += 1return zip_filename

def all_files(path):"""iterator returns all files and folders from path as absolute path string"""for child in Path(path).iterdir():yield str(child)if child.is_dir():for grand_child in all_files(str(child)):yield str(Path(grand_child))

def zip_dir(path):"""generate a zip"""zip_filename = zip_name(path)zip_file = zipfile.ZipFile(zip_filename, 'w')print('create:', zip_filename)for file in all_files(path):print('adding... ', file)zip_file.write(file)zip_file.close()

if __name__ == '__main__':zip_dir('.')print('end!')

如果您想要任何常见图形文件管理器的压缩文件夹等功能,您可以使用以下代码,它使用zipfile模块。使用此代码,您将拥有路径为其根文件夹的zip文件。

import osimport zipfile
def zipdir(path, ziph):# Iterate all the directories and filesfor root, dirs, files in os.walk(path):# Create a prefix variable with the folder structure inside the path folder.# So if a file is at the path directory will be at the root directory of the zip file# so the prefix will be empty. If the file belongs to a containing folder of path folder# then the prefix will be that folder.if root.replace(path,'') == '':prefix = ''else:# Keep the folder structure after the path folder, append a '/' at the end# and remome the first character, if it is a '/' in order to have a path like# folder1/folder2/file.txtprefix = root.replace(path, '') + '/'if (prefix[0] == '/'):prefix = prefix[1:]for filename in files:actual_file_path = root + '/' + filenamezipped_file_path = prefix + filenamezipf.write( actual_file_path, zipped_file_path)

zipf = zipfile.ZipFile('Python.zip', 'w', zipfile.ZIP_DEFLATED)zipdir('/tmp/justtest/', zipf)zipf.close()

如何在Python中创建目录结构的zip存档?

在Python脚本中

在Python 2.7+中,shutil有一个make_archive函数。

from shutil import make_archivemake_archive('zipfile_name','zip',           # the archive format - or tar, bztar, gztarroot_dir=None,   # root for archive - current working dir if Nonebase_dir=None)   # start archiving from here - cwd if None too

这里压缩的存档将被命名为zipfile_name.zip。如果base_dirroot_dir更远,它将排除不在base_dir中的文件,但仍将父目录中的文件存档到root_dir

我确实在Cygwin上用2.7测试了这个问题-它需要一个root_dir的参数,对于cwd:

make_archive('zipfile_name', 'zip', root_dir='.')

从shell中使用Python

您也可以使用zipfile模块从shell使用Python执行此操作:

$ python -m zipfile -c zipname sourcedir

其中zipname是您想要的目标文件的名称(如果需要,请添加.zip,它不会自动执行),Source cedir是目录的路径。

压缩Python(或者只是不想要父目录):

如果您尝试使用__init__.py__main__.py压缩python包,并且您不想要父目录,则

$ python -m zipfile -c zipname sourcedir/*

$ python zipname

将运行包。(请注意,您不能将子包作为压缩存档中的切入点运行。)

压缩Python应用程序:

如果您有python3.5+,并且特别想要压缩Python包,请使用zipapp

$ python -m zipapp myapp$ python myapp.pyz

这是一个现代的方法,它使用了Pathlib和一个上下文管理器。将文件直接放在zip中,而不是放在子文件夹中。

def zip_dir(filename: str, dir_to_zip: pathlib.Path):with zipfile.ZipFile(filename, 'w', zipfile.ZIP_DEFLATED) as zipf:# Use glob instead of iterdir(), to cover all subdirectories.for directory in dir_to_zip.glob('**'):for file in directory.iterdir():if not file.is_file():continue# Strip the first component, so we don't create an uneeded subdirectory# containing everything.zip_path = pathlib.Path(*file.parts[1:])# Use a string, since zipfile doesn't support pathlib  directly.zipf.write(str(file), str(zip_path))

我通过将Mark Byers的解决方案与Reimund和Morten Zilmer的注释(相对路径和包括空目录)合并来准备一个函数。作为最佳实践,with用于ZipFile的文件构造。

该函数还准备了一个默认的zip文件名,其中包含压缩后的目录名和“. zip”扩展名。因此,它只适用于一个参数:要压缩的源目录。

import osimport zipfile
def zip_dir(path_dir, path_file_zip=''):if not path_file_zip:path_file_zip = os.path.join(os.path.dirname(path_dir), os.path.basename(path_dir)+'.zip')with zipfile.ZipFile(path_file_zip, 'wb', zipfile.ZIP_DEFLATED) as zip_file:for root, dirs, files in os.walk(path_dir):for file_or_dir in files + dirs:zip_file.write(os.path.join(root, file_or_dir),os.path.relpath(os.path.join(root, file_or_dir),os.path.join(path_dir, os.path.pardir)))

现代Python(3.6+)使用pathlib模块进行简明的类似OOP的路径处理,pathlib.Path.rglob()用于递归全局化。据我所知,这相当于George V. Reilly的回答:压缩的zips,最上面的元素是一个目录,保留空目录,使用相对路径。

from pathlib import Pathfrom zipfile import ZIP_DEFLATED, ZipFile
from os import PathLikefrom typing import Union

def zip_dir(zip_name: str, source_dir: Union[str, PathLike]):src_path = Path(source_dir).expanduser().resolve(strict=True)with ZipFile(zip_name, 'w', ZIP_DEFLATED) as zf:for file in src_path.rglob('*'):zf.write(file, file.relative_to(src_path.parent))

注意:如可选类型提示所示,zip_name不能是Path对象(将被固定在3.6.2+)。

# import required python modules# You have to install zipfile package using pip install
import os,zipfile
# Change the directory where you want your new zip file to be
os.chdir('Type your destination')
# Create a new zipfile ( I called it myfile )
zf = zipfile.ZipFile('myfile.zip','w')
# os.walk gives a directory tree. Access the files using a for loop
for dirnames,folders,files in os.walk('Type your directory'):zf.write('Type your Directory')for file in files:zf.write(os.path.join('Type your directory',file))

好吧,在阅读了这些建议之后,我想出了一个非常相似的方法,可以在2.7. x中使用,而不会创建“有趣”的目录名称(绝对类似的名称),并且只会在zip中创建指定的文件夹。

或者以防您需要zip包含一个包含所选目录内容的文件夹。

def zipDir( path, ziph ) :"""Inserts directory (path) into zipfile instance (ziph)"""for root, dirs, files in os.walk( path ) :for file in files :ziph.write( os.path.join( root, file ) , os.path.basename( os.path.normpath( path ) ) + "\\" + file )
def makeZip( pathToFolder ) :"""Creates a zip file with the specified folder"""zipf = zipfile.ZipFile( pathToFolder + 'file.zip', 'w', zipfile.ZIP_DEFLATED )zipDir( pathToFolder, zipf )zipf.close()print( "Zip file saved to: " + pathToFolder)
makeZip( "c:\\path\\to\\folder\\to\\insert\\into\\zipfile" )

为了提供更大的灵活性,例如按名称选择目录/文件:

import osimport zipfile
def zipall(ob, path, rel=""):basename = os.path.basename(path)if os.path.isdir(path):if rel == "":rel = basenameob.write(path, os.path.join(rel))for root, dirs, files in os.walk(path):for d in dirs:zipall(ob, os.path.join(root, d), os.path.join(rel, d))for f in files:ob.write(os.path.join(root, f), os.path.join(rel, f))breakelif os.path.isfile(path):ob.write(path, os.path.join(rel, basename))else:pass

对于文件树:

.├── dir│   ├── dir2│   │   └── file2.txt│   ├── dir3│   │   └── file3.txt│   └── file.txt├── dir4│   ├── dir5│   └── file4.txt├── listdir.zip├── main.py├── root.txt└── selective.zip

例如,您可以仅选择dir4root.txt

cwd = os.getcwd()files = [os.path.join(cwd, f) for f in ['dir4', 'root.txt']]
with zipfile.ZipFile("selective.zip", "w" ) as myzip:for f in files:zipall(myzip, f)

或者只需在脚本调用目录中listdir并从那里添加所有内容:

with zipfile.ZipFile("listdir.zip", "w" ) as myzip:for f in os.listdir():if f == "listdir.zip":# Creating a listdir.zip in the same directory# will include listdir.zip inside itself, beware of thiscontinuezipall(myzip, f)

使用Shutil,它是python标准库集的一部分。使用Shutil非常简单(见下面的代码):

  • 第一个参数:结果zip/tar文件的文件名,
  • 第二个参数:zip/tar,
  • 第三个参数:dir_name

代码:

import shutilshutil.make_archive('/home/user/Desktop/Filename','zip','/home/username/Desktop/Directory')

创建zip文件的函数。

def CREATEZIPFILE(zipname, path):#function to create a zip file#Parameters: zipname - name of the zip file; path - name of folder/file to be put in zip file
zipf = zipfile.ZipFile(zipname, 'w', zipfile.ZIP_DEFLATED)zipf.setpassword(b"password") #if you want to set password to zipfile
#checks if the path is file or directoryif os.path.isdir(path):for files in os.listdir(path):zipf.write(os.path.join(path, files), files)
elif os.path.isfile(path):zipf.write(os.path.join(path), path)zipf.close()

假设您想压缩当前目录中的所有文件夹(子目录)。

for root, dirs, files in os.walk("."):for sub_dir in dirs:zip_you_want = sub_dir+".zip"zip_process = zipfile.ZipFile(zip_you_want, "w", zipfile.ZIP_DEFLATED)zip_process.write(file_you_want_to_include)zip_process.close()
print("Successfully zipped directory: {sub_dir}".format(sub_dir=sub_dir))

要保留要存档的父目录下的文件夹层次结构:

import globimport osimport zipfile
with zipfile.ZipFile(fp_zip, "w", zipfile.ZIP_DEFLATED) as zipf:for fp in glob(os.path.join(parent, "**/*")):base = os.path.commonpath([parent, fp])zipf.write(fp, arcname=fp.replace(base, ""))

如果需要,您可以将其更改为使用pathlib用于文件全局化

这里有这么多答案,我希望我可以贡献我自己的版本,它基于原始答案(顺便说一句),但具有更图形化的视角,还为每个zipfile设置和排序os.walk()使用上下文,以便有一个有序的输出。

有了这些文件夹和它们的文件(以及其他文件夹),我想为每个cap_文件夹创建一个.zip

$ tree -d.├── cap_01|    ├── 0101000001.json|    ├── 0101000002.json|    ├── 0101000003.json|├── cap_02|    ├── 0201000001.json|    ├── 0201000002.json|    ├── 0201001003.json|├── cap_03|    ├── 0301000001.json|    ├── 0301000002.json|    ├── 0301000003.json|├── docs|    ├── map.txt|    ├── main_data.xml|├── core_files├── core_master├── core_slave

以下是我应用的内容,并附有评论以更好地理解该过程。

$ cat zip_cap_dirs.py""" Zip 'cap_*' directories. """import osimport zipfile as zf

for root, dirs, files in sorted(os.walk('.')):if 'cap_' in root:print(f"Compressing: {root}")# Defining .zip name, according to Capítulo.cap_dir_zip = '{}.zip'.format(root)# Opening zipfile context for current root dir.with zf.ZipFile(cap_dir_zip, 'w', zf.ZIP_DEFLATED) as new_zip:# Iterating over os.walk list of files for the current root dir.for f in files:# Defining relative path to files from current root dir.f_path = os.path.join(root, f)# Writing the file on the .zip file of the contextnew_zip.write(f_path)

基本上,对于os.walk(path)的每次迭代,我都会为zipfile设置打开一个上下文,然后迭代files,这是root目录中的list文件,基于当前root目录形成每个文件的相对路径,附加到正在运行的zipfile上下文。

输出如下所示:

$ python3 zip_cap_dirs.pyCompressing: ./cap_01Compressing: ./cap_02Compressing: ./cap_03

要查看每个.zip目录的内容,您可以使用less命令:

$ less cap_01.zip
Archive:  cap_01.zipLength   Method    Size  Cmpr    Date    Time   CRC-32   Name--------  ------  ------- ---- ---------- ----- --------  ----22017  Defl:N     2471  89% 2019-09-05 08:05 7a3b5ec6  cap_01/0101000001.json21998  Defl:N     2471  89% 2019-09-05 08:05 155bece7  cap_01/0101000002.json23236  Defl:N     2573  89% 2019-09-05 08:05 55fced20  cap_01/0101000003.json--------          ------- ---                           -------67251             7515  89%                            3 files

压缩文件或树(目录及其子目录)。

from pathlib import Pathfrom zipfile import ZipFile, ZIP_DEFLATED
def make_zip(tree_path, zip_path, mode='w', skip_empty_dir=False):with ZipFile(zip_path, mode=mode, compression=ZIP_DEFLATED) as zf:paths = [Path(tree_path)]while paths:p = paths.pop()if p.is_dir():paths.extend(p.iterdir())if skip_empty_dir:continuezf.write(p)

要附加到现有存档,请传递mode='a',以创建一个新的存档mode='w'(上面的默认值)。所以假设您想在同一个存档下捆绑3个不同的目录树。

make_zip(path_to_tree1, path_to_arch, mode='w')make_zip(path_to_tree2, path_to_arch, mode='a')make_zip(path_to_file3, path_to_arch, mode='a')

使用pathlib.Path的解决方案,独立于所使用的操作系统:

import zipfilefrom pathlib import Path
def zip_dir(path: Path, zip_file_path: Path):"""Zip all contents of path to zip_file"""files_to_zip = [file for file in path.glob('*') if file.is_file()]with zipfile.ZipFile(zip_file_path, 'w', zipfile.ZIP_DEFLATED) as zip_f:for file in files_to_zip:print(file.name)zip_f.write(file, file.name)
current_dir = Path.cwd()zip_dir = current_dir / "test"tools.zip_dir(zip_dir, current_dir / 'Zipped_dir.zip')

显而易见的方法是使用Shutil,就像第二个顶部答案所说的那样,但是如果你出于某种原因仍然希望使用ZipFile,如果你在这样做时遇到了一些麻烦(如Windows中的ERR 13等),你可以使用这个修复:

import osimport zipfile
def retrieve_file_paths(dirName):filePaths = []for root, directories, files in os.walk(dirName):for filename in files:filePath = os.path.join(root, filename)filePaths.append(filePath)return filePaths 
def main(dir_name, output_filename):filePaths = retrieve_file_paths(dir_name)   
zip_file = zipfile.ZipFile(output_filename+'.zip', 'w')with zip_file:for file in filePaths:zip_file.write(file)
main("my_dir", "my_dir_archived")

这个递归遍历给定文件夹中的每个子文件夹/文件,并将它们写入zip文件,而不是尝试直接压缩文件夹。

使用python 3.9、pathlibzipfile模块,您可以从系统中的任何位置创建zip文件。

def zip_dir(dir: Union[Path, str], filename: Union[Path, str]):"""Zip the provided directory without navigating to that directory using `pathlib` module"""
# Convert to Path objectdir = Path(dir)
with zipfile.ZipFile(filename, "w", zipfile.ZIP_DEFLATED) as zip_file:for entry in dir.rglob("*"):zip_file.write(entry, entry.relative_to(dir))

它很整洁,打字,代码更少。