在 nodejs 文件夹下通过扩展名查找文件,* . html

* I’d like to find all * 。Src 文件夹中的 html 文件及其所有使用 nodejs 的子文件夹。最好的方法是什么?

var folder = '/project1/src';
var extension = 'html';
var cb = function(err, results) {
// results is an array of the files with path relative to the folder
console.log(results);


}
// This function is what I am looking for. It has to recursively traverse all sub folders.
findFiles(folder, extension, cb);

我认为很多开发人员都应该拥有经过测试的优秀解决方案,使用它比自己编写一个更好。

147920 次浏览

你可以使用操作系统帮助,这里有一个跨平台的解决方案:

1. 下列函数使用 lsdir,不递归搜索,但有相对路径

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
var command = "";
if(/^win/.test(process.platform)){
command = "dir /B "+folder+"\\*."+extension;
}else{
command = "ls -1 "+folder+"/*."+extension;
}
exec(command,function(err,stdout,stderr){
if(err)
return cb(err,null);
//get rid of \r from windows
stdout = stdout.replace(/\r/g,"");
var files = stdout.split("\n");
//remove last entry because it is empty
files.splice(-1,1);
cb(err,files);
});
}


findFiles("folderName","html",function(err,files){
console.log("files:",files);
})

2. 下列函数使用 finddir,递归搜索,但在窗口上有绝对路径

var exec = require('child_process').exec;
function findFiles(folder,extension,cb){
var command = "";
if(/^win/.test(process.platform)){
command = "dir /B /s "+folder+"\\*."+extension;
}else{
command = 'find '+folder+' -name "*.'+extension+'"'
}
exec(command,function(err,stdout,stderr){
if(err)
return cb(err,null);
//get rid of \r from windows
stdout = stdout.replace(/\r/g,"");
var files = stdout.split("\n");
//remove last entry because it is empty
files.splice(-1,1);
cb(err,files);
});
}


findFiles("folder","html",function(err,files){
console.log("files:",files);
})

Js,递归简单函数:

var path = require('path'),
fs = require('fs');


function fromDir(startPath, filter) {


//console.log('Starting from dir '+startPath+'/');


if (!fs.existsSync(startPath)) {
console.log("no dir ", startPath);
return;
}


var files = fs.readdirSync(startPath);
for (var i = 0; i < files.length; i++) {
var filename = path.join(startPath, files[i]);
var stat = fs.lstatSync(filename);
if (stat.isDirectory()) {
fromDir(filename, filter); //recurse
} else if (filename.endsWith(filter)) {
console.log('-- found: ', filename);
};
};
};


fromDir('../LiteScript', '.html');

添加 RegExp (如果希望变得花哨)和一个回调(使其成为通用的)。

var path = require('path'),
fs = require('fs');


function fromDir(startPath, filter, callback) {


//console.log('Starting from dir '+startPath+'/');


if (!fs.existsSync(startPath)) {
console.log("no dir ", startPath);
return;
}


var files = fs.readdirSync(startPath);
for (var i = 0; i < files.length; i++) {
var filename = path.join(startPath, files[i]);
var stat = fs.lstatSync(filename);
if (stat.isDirectory()) {
fromDir(filename, filter, callback); //recurse
} else if (filter.test(filename)) callback(filename);
};
};


fromDir('../LiteScript', /\.html$/, function(filename) {
console.log('-- found: ', filename);
});

根据卢西奥的代码,我做了一个模块。它将返回一个带有特定扩展名的所有文件。把它贴在这里,以防有人需要。

var path = require('path'),
fs   = require('fs');




/**
* Find all files recursively in specific folder with specific extension, e.g:
* findFilesInDir('./project/src', '.html') ==> ['./project/src/a.html','./project/src/build/index.html']
* @param  {String} startPath    Path relative to this file or other file which requires this files
* @param  {String} filter       Extension name, e.g: '.html'
* @return {Array}               Result files with path string in an array
*/
function findFilesInDir(startPath,filter){


var results = [];


if (!fs.existsSync(startPath)){
console.log("no dir ",startPath);
return;
}


var files=fs.readdirSync(startPath);
for(var i=0;i<files.length;i++){
var filename=path.join(startPath,files[i]);
var stat = fs.lstatSync(filename);
if (stat.isDirectory()){
results = results.concat(findFilesInDir(filename,filter)); //recurse
}
else if (filename.indexOf(filter)>=0) {
console.log('-- found: ',filename);
results.push(filename);
}
}
return results;
}


module.exports = findFilesInDir;

我刚才注意到,你正在使用 sync fs 方法,这可能会阻塞你的应用程序,这里有一个基于承诺的异步方法,使用 异步,你可以用 START =/myfile FILTER =”执行它。节点 myfile.js,假设您将下面的代码放在一个名为 myfile.js 的文件中:

Q = require("q")
async = require("async")
path = require("path")
fs = require("fs")


function findFiles(startPath, filter, files){
var deferred;
deferred = Q.defer(); //main deferred


//read directory
Q.nfcall(fs.readdir, startPath).then(function(list) {
var ideferred = Q.defer(); //inner deferred for resolve of async each
//async crawling through dir
async.each(list, function(item, done) {


//stat current item in dirlist
return Q.nfcall(fs.stat, path.join(startPath, item))
.then(function(stat) {
//check if item is a directory
if (stat.isDirectory()) {
//recursive!! find files in subdirectory
return findFiles(path.join(startPath, item), filter, files)
.catch(function(error){
console.log("could not read path: " + error.toString());
})
.finally(function() {
//resolve async job after promise of subprocess of finding files has been resolved
return done();
});
//check if item is a file, that matches the filter and add it to files array
} else if (item.indexOf(filter) >= 0) {
files.push(path.join(startPath, item));
return done();
//file is no directory and does not match the filefilter -> don't do anything
} else {
return done();
}
})
.catch(function(error){
ideferred.reject("Could not stat: " + error.toString());
});
}, function() {
return ideferred.resolve(); //async each has finished, so resolve inner deferred
});
return ideferred.promise;
}).then(function() {
//here you could do anything with the files of this recursion step (otherwise you would only need ONE deferred)
return deferred.resolve(files); //resolve main deferred
}).catch(function(error) {
deferred.reject("Could not read dir: " + error.toString());
return
});
return deferred.promise;
}




findFiles(process.env.START, process.env.FILTER, [])
.then(function(files){
console.log(files);
})
.catch(function(error){
console.log("Problem finding files: " + error);
})

您可以使用 文件猎犬来完成此操作。

例如: 在/tmp 中查找所有的. html 文件:

const Filehound = require('filehound');


Filehound.create()
.ext('html')
.paths("/tmp")
.find((err, htmlFiles) => {
if (err) return console.error("handle err", err);


console.log(htmlFiles);
});

要了解更多信息(和示例) ,请查看文档: Https://github.com/nspragg/filehound

免责声明 : 我是作者。

What, hang on?! ... Okay ya, maybe this makes more sense to someones else too.

[ Nodejs 7提醒你]

const fs = import('fs');
const dirCont = fs.readdirSync( dir );
const files = dirCont.filter( ( elm ) => elm.match(/.*\.(html?)/ig));

使用 regex 做任何事情,使它成为你在函数中设置的一个参数,默认值等等。

我的两便士,用地图代替 for-loop

var path = require('path'), fs = require('fs');


var findFiles = function(folder, pattern = /.*/, callback) {
var flist = [];


fs.readdirSync(folder).map(function(e){
var fname = path.join(folder, e);
var fstat = fs.lstatSync(fname);
if (fstat.isDirectory()) {
// don't want to produce a new array with concat
Array.prototype.push.apply(flist, findFiles(fname, pattern, callback));
} else {
if (pattern.test(fname)) {
flist.push(fname);
if (callback) {
callback(fname);
}
}
}
});
return flist;
};


// HTML files
var html_files = findFiles(myPath, /\.html$/, function(o) { console.log('look what we have found : ' + o} );


// All files
var all_files = findFiles(myPath);

The following code does a recursive search inside ./ (change it appropriately) and returns an array of absolute file names ending with .html

var fs = require('fs');
var path = require('path');


var searchRecursive = function(dir, pattern) {
// This is where we store pattern matches of all files inside the directory
var results = [];


// Read contents of directory
fs.readdirSync(dir).forEach(function (dirInner) {
// Obtain absolute path
dirInner = path.resolve(dir, dirInner);


// Get stats to determine if path is a directory or a file
var stat = fs.statSync(dirInner);


// If path is a directory, scan it and combine results
if (stat.isDirectory()) {
results = results.concat(searchRecursive(dirInner, pattern));
}


// If path is a file and ends with pattern then push it onto results
if (stat.isFile() && dirInner.endsWith(pattern)) {
results.push(dirInner);
}
});


return results;
};


var files = searchRecursive('./', '.html'); // replace dir and pattern
// as you seem fit


console.log(files);

看看 File-regex 文件正则表达式

let findFiles = require('file-regex')
let pattern = '\.js'


findFiles(__dirname, pattern, (err, files) => {
console.log(files);
})

上面的代码片段将打印工作目录中的所有 js文件。

i like using the 一团 package:

const glob = require('glob');


glob(__dirname + '/**/*.html', {}, (err, files)=>{
console.log(files)
})

我已经看了上面的答案,并且把这个对我有用的版本混合在一起:

function getFilesFromPath(path, extension) {
let files = fs.readdirSync( path );
return files.filter( file => file.match(new RegExp(`.*\.(${extension})`, 'ig')));
}


console.log(getFilesFromPath("./testdata", ".txt"));

此测试将从路径 ./testdata的文件夹中找到的文件返回一个文件名数组。正在处理节点版本8.11.3。

由于声誉原因不能添加评论,但请注意以下内容:

使用 fs.readdir 或 node-globb 在一个包含500,000个文件的文件夹中查找通配符集需要花费 ~ 2s 的时间。 使用带 DIR 的 exec 需要约0.05 s (非递归)或约0.45 s (递归)。 (I was looking for ~14 files matching my pattern in a single directory).

So far, I have failed to find any nodejs implementation which uses low level OS wildcard searching for efficiency. But the above DIR/ls based code works wonderfully in windows in terms of efficiency. linux find, however, 可能会很慢 for large directories.

旧的文章,但 ES6现在处理这与 includes方法开箱即用。

let files = ['file.json', 'other.js'];


let jsonFiles = files.filter(file => file.includes('.json'));


console.log("Files: ", jsonFiles) ==> //file.json

安装

您可以通过以下方式安装此包 走路同步

yarn add walk-sync

用法

const walkSync = require("walk-sync");
const paths = walkSync("./project1/src", {globs: ["**/*.html"]});
console.log(paths);   //all html file path array

您可以编辑此代码以适应您要做的事情。我使用了 nodejs IO 操作的同步版本,以便在 node 继续执行下一行代码之前返回结果:

const fs = require('fs');
const path = require('path');
    

// Path to the directory(folder) to look into
const dirPath = path.resolve(`${__dirname}../../../../../tests_output`);
        

// Read all files with .html extension in the specified folder above
const filesList = fs.readdirSync(dirPath, (err, files) => files.filter((e) => path.extname(e).toLowerCase() === '.html'));
        

// Read the content of the first file with .txt extension in the folder
const data = fs.readFileSync(path.resolve(`${__dirname}../../../../../tests_output/${filesList[0]}`), 'utf8');


res.writeHead(200, { 'Content-Type': 'text/html' });
res.write(data);
return res.end();

对于无数可能的解决方案,我们还可以添加 飞-喷气背包库,这对于构建脚本的目的来说是完美的。

const jetpack = require("fs-jetpack");


// the sync way
const files = jetpack.find("my_project", { matching: "*.html" });
console.log(files);


// or the async way
jetpack.findAsync("my_project", { matching: "*.html" }).then(files => {
console.log(files);
});