import glob# All files and directories ending with .txt and that don't begin with a dot:print(glob.glob("/home/adam/*.txt"))# All files and directories ending with .txt with depth of 2 folders, ignoring names beginning with a dot:print(glob.glob("/home/adam/*/*.txt"))
import os
def get_filepaths(directory):"""This function will generate the file names in a directorytree by walking the tree either top-down or bottom-up. For eachdirectory in the tree rooted at directory top (including top itself),it yields a 3-tuple (dirpath, dirnames, filenames)."""file_paths = [] # List which will store all of the full filepaths.
# Walk the tree.for root, directories, files in os.walk(directory):for filename in files:# Join the two strings in order to form the full filepath.filepath = os.path.join(root, filename)file_paths.append(filepath) # Add it to the list.
return file_paths # Self-explanatory.
# Run the above function and store its results in a variable.full_file_paths = get_filepaths("/Users/johnny/Desktop/TEST")
from findtools.find_files import (find_files, Match)
# Recursively find all *.sh files in **/usr/bin**sh_files_pattern = Match(filetype='f', name='*.sh')found_files = find_files(path='/usr/bin', match=sh_files_pattern)
for found_file in found_files:print found_file
def list_files(path):# returns a list of names (with extension, without full path) of all files# in folder pathfiles = []for name in os.listdir(path):if os.path.isfile(os.path.join(path, name)):files.append(name)return files
from glob import globfrom os.path import join
# Return everything under Users, admin, that contains a folder called wlp.glob(join('Users', 'admin', '*', 'wlp'))
from glob import globfrom os.path import expanduser, join
# Return everything under the user directory that contains a folder called wlp.glob(join(expanduser('~'), '*', 'wlp'))
这在所有平台上都非常有效。
另一个很好的例子,它可以完美地跨平台工作,并且做了一些不同的事情:
from glob import globfrom os import getcwdfrom os.path import join
# Return everything under the current directory that contains a folder called wlp.glob(join(getcwd(), '*', 'wlp'))
import os
# Getting the current work directory (cwd)thisdir = os.getcwd()
# r=root, d=directories, f = filesfor r, d, f in os.walk(thisdir):for file in f:if file.endswith(".docx"):print(os.path.join(r, file))
import osarr = []for d,r,f in next(os.walk("F:\\_python")):for file in f:arr.append(os.path.join(r,file))
for f in arr:print(files)
>>> F:\\_python\\dict_class.py>>> F:\\_python\\programmi.txt
接下来…走路
[os.path.join(r,file) for r,d,f in next(os.walk("F:\\_python")) for file in f]
>>> ['F:\\_python\\dict_class.py', 'F:\\_python\\programmi.txt']
os.walk
x = [os.path.join(r,file) for r,d,f in os.walk("F:\\_python") for file in f]print(x)
>>> ['F:\\_python\\dict.py', 'F:\\_python\\progr.txt', 'F:\\_python\\readl.py']
os.listdir()-只获取txt文件
arr_txt = [x for x in os.listdir() if x.endswith(".txt")]
使用glob获取文件的完整路径
from path import pathfrom glob import glob
x = [path(f).abspath() for f in glob("F:\\*.txt")]
使用os.path.isfile避免列表中的目录
import os.pathlistOfFiles = [f for f in os.listdir() if os.path.isfile(f)]
使用Python 3.4中的pathlib
import pathlib
flist = []for p in pathlib.Path('.').iterdir():if p.is_file():print(p)flist.append(p)
使用list comprehension:
flist = [p for p in pathlib.Path('.').iterdir() if p.is_file()]
在path lib. Path()中使用全局方法
import pathlib
py = pathlib.Path().glob("*.py")
获取所有且仅具有os.walk的文件:仅检查返回的第三个元素,即文件列表
import osx = [i[2] for i in os.walk('.')]y=[]for t in x:for f in t:y.append(f)
仅获取目录中带有next的文件:仅返回根文件夹中的文件
import osx = next(os.walk('F://python'))[2]
只获取带有next的目录并在目录中行走,因为在[1]元素中只有文件夹
import osnext(os.walk('F://python'))[1] # for the current dir use ('.')
>>> ['python3','others']
使用walk获取所有subdir名称
for r,d,f in os.walk("F:\\_python"):for dirs in d:print(dirs)
os.scandir()从Python 3.5及更高版本
import osx = [f.name for f in os.scandir() if f.is_file()]
# Another example with `scandir` (a little variation from docs.python.org)# This one is more efficient than `os.listdir`.# In this case, it shows the files only in the current directory# where the script is executed.
import oswith os.scandir() as i:for entry in i:if entry.is_file():print(entry.name)
import osimport fnmatch
def list_paths(folder='.', pattern='*', case_sensitive=False, subfolders=False):"""Return a list of the file paths matching the pattern in the specifiedfolder, optionally including files inside subfolders."""match = fnmatch.fnmatchcase if case_sensitive else fnmatch.fnmatchwalked = os.walk(folder) if subfolders else [next(os.walk(folder))]return [os.path.join(root, f)for root, dirnames, filenames in walkedfor f in filenames if match(f, pattern)]
>>> import sys>>> sys.version'2.7.10 (default, Mar 8 2016, 15:02:46) [MSC v.1600 64 bit (AMD64)]'>>> m = map(lambda x: x, [1, 2, 3]) # Just a dummy lambda function>>> m, type(m)([1, 2, 3], <type 'list'>)>>> len(m)3
>>> import sys>>> sys.version'3.5.4 (v3.5.4:3f56838, Aug 8 2017, 02:17:05) [MSC v.1900 64 bit (AMD64)]'>>> m = map(lambda x: x, [1, 2, 3])>>> m, type(m)(<map object at 0x000001B4257342B0>, <class 'map'>)>>> len(m)Traceback (most recent call last):File "<stdin>", line 1, in <module>TypeError: object of type 'map' has no len()>>> lm0 = list(m) # Build a list from the generator>>> lm0, type(lm0)([1, 2, 3], <class 'list'>)>>>>>> lm1 = list(m) # Build a list from the same generator>>> lm1, type(lm1) # Empty list now - generator already consumed([], <class 'list'>)
>>> import os>>> root_dir = "root_dir" # Path relative to current dir (os.getcwd())>>>>>> os.listdir(root_dir) # List all the items in root_dir['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']>>>>>> [item for item in os.listdir(root_dir) if os.path.isfile(os.path.join(root_dir, item))] # Filter items and only keep files (strip out directories)['file0', 'file1']
>>> import os>>> root_dir = os.path.join(".", "root_dir") # Explicitly prepending current directory>>> root_dir'.\\root_dir'>>>>>> scandir_iterator = os.scandir(root_dir)>>> scandir_iterator<nt.ScandirIterator object at 0x00000268CF4BC140>>>> [item.path for item in scandir_iterator]['.\\root_dir\\dir0', '.\\root_dir\\dir1', '.\\root_dir\\dir2', '.\\root_dir\\dir3', '.\\root_dir\\file0', '.\\root_dir\\file1']>>>>>> [item.path for item in scandir_iterator] # Will yield an empty list as it was consumed by previous iteration (automatically performed by the list comprehension)[]>>>>>> scandir_iterator = os.scandir(root_dir) # Reinitialize the generator>>> for item in scandir_iterator :... if os.path.isfile(item.path):... print(item.name)...file0file1
>>> import os>>> root_dir = os.path.join(os.getcwd(), "root_dir") # Specify the full path>>> root_dir'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir'>>>>>> walk_generator = os.walk(root_dir)>>> root_dir_entry = next(walk_generator) # First entry corresponds to the root dir (passed as an argument)>>> root_dir_entry('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir', ['dir0', 'dir1', 'dir2', 'dir3'], ['file0', 'file1'])>>>>>> root_dir_entry[1] + root_dir_entry[2] # Display dirs and files (direct descendants) in a single list['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']>>>>>> [os.path.join(root_dir_entry[0], item) for item in root_dir_entry[1] + root_dir_entry[2]] # Display all the entries in the previous list by their full path['E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir1', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir3', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\file0', 'E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\file1']>>>>>> for entry in walk_generator: # Display the rest of the elements (corresponding to every subdir)... print(entry)...('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0', ['dir00', 'dir01', 'dir02'], [])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir00', ['dir000'], ['file000'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir00\\dir000', [], ['file0000'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir01', [], ['file010', 'file011'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02', ['dir020'], [])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02\\dir020', ['dir0200'], [])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir0\\dir02\\dir020\\dir0200', [], [])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir1', [], ['file10', 'file11', 'file12'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2', ['dir20'], ['file20'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir2\\dir20', [], ['file200'])('E:\\Work\\Dev\\StackOverflow\\q003207219\\root_dir\\dir3', [], [])
>>> import glob, os>>> wildcard_pattern = "*">>> root_dir = os.path.join("root_dir", wildcard_pattern) # Match every file/dir name>>> root_dir'root_dir\\*'>>>>>> glob_list = glob.glob(root_dir)>>> glob_list['root_dir\\dir0', 'root_dir\\dir1', 'root_dir\\dir2', 'root_dir\\dir3', 'root_dir\\file0', 'root_dir\\file1']>>>>>> [item.replace("root_dir" + os.path.sep, "") for item in glob_list] # Strip the dir name and the path separator from begining['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']>>>>>> for entry in glob.iglob(root_dir + "*", recursive=True):... print(entry)...root_dir\root_dir\dir0root_dir\dir0\dir00root_dir\dir0\dir00\dir000root_dir\dir0\dir00\dir000\file0000root_dir\dir0\dir00\file000root_dir\dir0\dir01root_dir\dir0\dir01\file010root_dir\dir0\dir01\file011root_dir\dir0\dir02root_dir\dir0\dir02\dir020root_dir\dir0\dir02\dir020\dir0200root_dir\dir1root_dir\dir1\file10root_dir\dir1\file11root_dir\dir1\file12root_dir\dir2root_dir\dir2\dir20root_dir\dir2\dir20\file200root_dir\dir2\file20root_dir\dir3root_dir\file0root_dir\file1
>>> import pathlib>>> root_dir = "root_dir">>> root_dir_instance = pathlib.Path(root_dir)>>> root_dir_instanceWindowsPath('root_dir')>>> root_dir_instance.name'root_dir'>>> root_dir_instance.is_dir()True>>>>>> [item.name for item in root_dir_instance.glob("*")] # Wildcard searching for all direct descendants['dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']>>>>>> [os.path.join(item.parent.name, item.name) for item in root_dir_instance.glob("*") if not item.is_dir()] # Display paths (including parent) for files only['root_dir\\file0', 'root_dir\\file1']
ctypes is a foreign function library for Python. It provides C compatible data types, and allows calling functions in DLLs or shared libraries. It can be used to wrap these libraries in pure Python.
>>> import os, win32file, win32con>>> root_dir = "root_dir">>> wildcard = "*">>> root_dir_wildcard = os.path.join(root_dir, wildcard)>>> entry_list = win32file.FindFilesW(root_dir_wildcard)>>> len(entry_list) # Don't display the whole content as it's too long8>>> [entry[-2] for entry in entry_list] # Only display the entry names['.', '..', 'dir0', 'dir1', 'dir2', 'dir3', 'file0', 'file1']>>>>>> [entry[-2] for entry in entry_list if entry[0] & win32con.FILE_ATTRIBUTE_DIRECTORY and entry[-2] not in (".", "..")] # Filter entries and only display dir names (except self and parent)['dir0', 'dir1', 'dir2', 'dir3']>>>>>> [os.path.join(root_dir, entry[-2]) for entry in entry_list if entry[0] & (win32con.FILE_ATTRIBUTE_NORMAL | win32con.FILE_ATTRIBUTE_ARCHIVE)] # Only display file "full" names['root_dir\\file0', 'root_dir\\file1']
def file_filter(filename, radical='', extension=''):"Check if a filename matches a radical and extension"if not filename:return Falsefilename = filename.strip()return(filename.startswith(radical) and filename.endswith(extension))
def dir_filter(dirname='', radical='', extension=''):"Filter filenames in directory according to radical and extension"if not dirname:dirname = '.'return [filename for filename in os.listdir(dirname)if file_filter(filename, radical, extension)]