如何使用Node.js下载文件(不使用第三方库)?

我如何下载一个文件与Node.js 不使用第三方库?

我不需要什么特别的东西。我只想从给定的URL下载文件,然后将其保存到给定的目录。

773470 次浏览

你可以创建一个HTTP GET请求,并将它的response管道到一个可写的文件流:

const http = require('http'); // or 'https' for https:// URLs
const fs = require('fs');


const file = fs.createWriteStream("file.jpg");
const request = http.get("http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg", function(response) {
response.pipe(file);


// after download completed close filestream
file.on("finish", () => {
file.close();
console.log("Download Completed");
});
});

如果您希望支持在命令行上收集信息——比如指定目标文件或目录或URL——可以使用指挥官之类的工具。

更详细的解释在https://sebhastian.com/nodejs-download-file/

就像Michelle Tilley说的,但是要有适当的控制流:

var http = require('http');
var fs = require('fs');


var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb);
});
});
}

如果不等待finish事件,幼稚的脚本可能最终得到一个不完整的文件。

感谢@Augusto Roman指出cb应该传递给file.close,而不是显式调用。

gfxmonk的答案在回调和file.close()完成之间有一个非常紧张的数据竞赛。file.close()实际上接受一个回调,在结束时调用。否则,立即使用文件可能会失败(非常罕见!)。

一个完整的解决方案是:

var http = require('http');
var fs = require('fs');


var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb);  // close() is async, call cb after close completes.
});
});
}

如果不等待finish事件,幼稚的脚本可能最终得到一个不完整的文件。如果不通过close来调度cb回调,您可能会在访问文件和文件实际准备好之间出现竞争。

不要忘记处理错误!下面的代码是基于Augusto Roman的回答。

var http = require('http');
var fs = require('fs');


var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
file.close(cb);  // close() is async, call cb after close completes.
});
}).on('error', function(err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (cb) cb(err.message);
});
};

超时解决方案,防止内存泄漏:

下面的代码是基于Brandon Tilley的回答:

var http = require('http'),
fs = require('fs');


var request = http.get("http://example12345.com/yourfile.html", function(response) {
if (response.statusCode === 200) {
var file = fs.createWriteStream("copy.html");
response.pipe(file);
}
// Add timeout.
request.setTimeout(12000, function () {
request.abort();
});
});

当您得到一个错误时,不要创建文件,并倾向于使用超时在X秒后关闭您的请求。

Vince Yuan的代码很棒,但似乎有问题。

function download(url, dest, callback) {
var file = fs.createWriteStream(dest);
var request = http.get(url, function (response) {
response.pipe(file);
file.on('finish', function () {
file.close(callback); // close() is async, call callback after close completes.
});
file.on('error', function (err) {
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (callback)
callback(err.message);
});
});
}

您可以使用https://github.com/douzi8/ajax-request#download

request.download('http://res.m.ctrip.com/html5/Content/images/57.png',
function(err, res, body) {}
);

说到处理错误,监听请求错误甚至更好。我甚至会通过检查响应代码来验证。这里认为只有200个响应代码成功,但其他代码可能很好。

const fs = require('fs');
const http = require('http');


const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);


const request = http.get(url, (response) => {
// check if response is success
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}


response.pipe(file);
});


// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));


// check for request error too
request.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});


file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};

尽管这段代码相对简单,但我还是建议使用请求模块,因为它可以处理更多http不支持的协议(你好,HTTPS!)。

可以这样做:

const fs = require('fs');
const request = require('request');


const download = (url, dest, cb) => {
const file = fs.createWriteStream(dest);
const sendReq = request.get(url);
    

// verify response code
sendReq.on('response', (response) => {
if (response.statusCode !== 200) {
return cb('Response status was ' + response.statusCode);
}


sendReq.pipe(file);
});


// close() is async, call cb after close completes
file.on('finish', () => file.close(cb));


// check for request errors
sendReq.on('error', (err) => {
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});


file.on('error', (err) => { // Handle errors
fs.unlink(dest, () => cb(err.message)); // delete the (partial) file and then return the error
});
};

# EYZ0:

要使它与https一起工作,请更改

const http = require('http');

const http = require('https');

路径:img 类型:JPG 随机唯一id

    function resim(url) {


var http = require("http");
var fs = require("fs");
var sayi = Math.floor(Math.random()*10000000000);
var uzanti = ".jpg";
var file = fs.createWriteStream("img/"+sayi+uzanti);
var request = http.get(url, function(response) {
response.pipe(file);
});


return sayi+uzanti;
}

对于那些寻找es6风格的基于承诺的方式的人来说,我猜它会是这样的:

var http = require('http');
var fs = require('fs');


function pDownload(url, dest){
var file = fs.createWriteStream(dest);
return new Promise((resolve, reject) => {
var responseSent = false; // flag to make sure that response is sent only once.
http.get(url, response => {
response.pipe(file);
file.on('finish', () =>{
file.close(() => {
if(responseSent)  return;
responseSent = true;
resolve();
});
});
}).on('error', err => {
if(responseSent)  return;
responseSent = true;
reject(err);
});
});
}


//example
pDownload(url, fileLocation)
.then( ()=> console.log('downloaded file no issues...'))
.catch( e => console.error('error while downloading', e));

使用promise下载,它解析可读流。添加额外的逻辑来处理重定向。

var http = require('http');
var promise = require('bluebird');
var url = require('url');
var fs = require('fs');
var assert = require('assert');


function download(option) {
assert(option);
if (typeof option == 'string') {
option = url.parse(option);
}


return new promise(function(resolve, reject) {
var req = http.request(option, function(res) {
if (res.statusCode == 200) {
resolve(res);
} else {
if (res.statusCode === 301 && res.headers.location) {
resolve(download(res.headers.location));
} else {
reject(res.statusCode);
}
}
})
.on('error', function(e) {
reject(e);
})
.end();
});
}


download('http://localhost:8080/redirect')
.then(function(stream) {
try {


var writeStream = fs.createWriteStream('holyhigh.jpg');
stream.pipe(writeStream);


} catch(e) {
console.error(e);
}
});
const download = (url, path) => new Promise((resolve, reject) => {
http.get(url, response => {
const statusCode = response.statusCode;


if (statusCode !== 200) {
return reject('Download error!');
}


const writeStream = fs.createWriteStream(path);
response.pipe(writeStream);


writeStream.on('error', () => reject('Error writing to file!'));
writeStream.on('finish', () => writeStream.close(resolve));
});}).catch(err => console.error(err));

如果您正在使用express,请使用res.download()方法。否则使用fs模块。

app.get('/read-android', function(req, res) {
var file = "/home/sony/Documents/docs/Android.apk";
res.download(file)
});

(或)

   function readApp(req,res) {
var file = req.fileName,
filePath = "/home/sony/Documents/docs/";
fs.exists(filePath, function(exists){
if (exists) {
res.writeHead(200, {
"Content-Type": "application/octet-stream",
"Content-Disposition" : "attachment; filename=" + file});
fs.createReadStream(filePath + file).pipe(res);
} else {
res.writeHead(400, {"Content-Type": "text/plain"});
res.end("ERROR File does NOT Exists.ipa");
}
});
}
var requestModule=require("request");


requestModule(filePath).pipe(fs.createWriteStream('abc.zip'));

也许node.js已经改变了,但似乎其他解决方案(使用node v8.1.2)有一些问题:

  1. 您不需要在finish事件中调用file.close()。默认情况下,fs.createWriteStream设置为autoClose: https://nodejs.org/api/fs.html#fs_fs_createwritestream_path_options
  2. 错误时应该调用file.close()。当文件被删除(unlink())时可能不需要这个,但通常它是:https://nodejs.org/api/stream.html#stream_readable_pipe_destination_options
  3. 没有删除statusCode !== 200上的临时文件
  4. 不带回调的fs.unlink()已弃用(输出警告)
  5. 如果dest文件存在;它被覆盖了

下面是一个修改后的解决方案(使用ES6和promises),它可以处理这些问题。

const http = require("http");
const fs = require("fs");


function download(url, dest) {
return new Promise((resolve, reject) => {
const file = fs.createWriteStream(dest, { flags: "wx" });


const request = http.get(url, response => {
if (response.statusCode === 200) {
response.pipe(file);
} else {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});


request.on("error", err => {
file.close();
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
});


file.on("finish", () => {
resolve();
});


file.on("error", err => {
file.close();


if (err.code === "EEXIST") {
reject("File already exists");
} else {
fs.unlink(dest, () => {}); // Delete temp file
reject(err.message);
}
});
});
}

没有库,它可能是错误的,只是指出。以下是一些例子:

以下是我的建议:

  • 调用系统工具,如wgetcurl
  • 使用一些工具,如node-wget-promise,也非常简单使用。 Var wget = require('node-wget-promise'); wget(“http://nodejs.org/images/logo.svg”); 李< /代码> < / >
function download(url, dest, cb) {


var request = http.get(url, function (response) {


const settings = {
flags: 'w',
encoding: 'utf8',
fd: null,
mode: 0o666,
autoClose: true
};


// response.pipe(fs.createWriteStream(dest, settings));
var file = fs.createWriteStream(dest, settings);
response.pipe(file);


file.on('finish', function () {
let okMsg = {
text: `File downloaded successfully`
}
cb(okMsg);
file.end();
});
}).on('error', function (err) { // Handle errors
fs.unlink(dest); // Delete the file async. (But we don't check the result)
let errorMsg = {
text: `Error in file downloadin: ${err.message}`
}
if (cb) cb(errorMsg);
});
};

您可以尝试使用res.redirect到https文件下载url,然后它将下载文件。

如:# EYZ0

var fs = require('fs'),
request = require('request');


var download = function(uri, filename, callback){
request.head(uri, function(err, res, body){
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);
request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);


});
};


download('https://www.cryptocompare.com/media/19684/doge.png', 'icons/taskks12.png', function(){
console.log('done');
});

你好,我认为你可以使用child_process模块和curl命令。

const cp = require('child_process');


let download = async function(uri, filename){
let command = `curl -o ${filename}  '${uri}'`;
let result = cp.execSync(command);
};




async function test() {
await download('http://zhangwenning.top/20181221001417.png', './20181221001417.png')
}


test()

另外,当你想下载大的、多的文件时,你可以使用集群模块来使用更多的cpu内核。

我更喜欢request(),因为你可以同时使用http和https。

request('http://i3.ytimg.com/vi/J---aiyznGQ/mqdefault.jpg')
.pipe(fs.createWriteStream('cat.jpg'))

这里还有另一种方法来处理它没有第三方依赖,也搜索重定向:

        var download = function(url, dest, cb) {
var file = fs.createWriteStream(dest);
https.get(url, function(response) {
if ([301,302].indexOf(response.statusCode) !== -1) {
body = [];
download(response.headers.location, dest, cb);
}
response.pipe(file);
file.on('finish', function() {
file.close(cb);  // close() is async, call cb after close completes.
});
});
}


✅因此,如果您使用管道,它将关闭所有其他流,并确保没有内存泄漏。

工作的例子:

const http = require('http');
const { pipeline } = require('stream');
const fs = require('fs');


const file = fs.createWriteStream('./file.jpg');


http.get('http://via.placeholder.com/150/92c952', response => {
pipeline(
response,
file,
err => {
if (err)
console.error('Pipeline failed.', err);
else
console.log('Pipeline succeeded.');
}
);
});

我的答案" stream上的。pipe和。pipeline有什么区别"

download.js(即/project/utils/download.js)

const fs = require('fs');
const request = require('request');


const download = (uri, filename, callback) => {
request.head(uri, (err, res, body) => {
console.log('content-type:', res.headers['content-type']);
console.log('content-length:', res.headers['content-length']);


request(uri).pipe(fs.createWriteStream(filename)).on('close', callback);
});
};


module.exports = { download };
< p >
app.js < / >强

...
// part of imports
const { download } = require('./utils/download');


...
// add this function wherever
download('https://imageurl.com', 'imagename.jpg', () => {
console.log('done')
});

基于上面的其他答案和一些微妙的问题,以下是我的尝试。

  1. 在连接网络之前,使用fs.access检查文件是否存在。
  2. 如果您获得200 OK状态代码,则只创建fs.createWriteStream。这减少了整理临时文件句柄所需的fs.unlink命令的数量。
  3. 即使在200 OK上,我们仍然可能在reject上,因为EEXIST文件已经存在(想象一下,当我们进行网络调用时,另一个进程创建了该文件)。
  4. 如果在头文件中提供的链接位置后面有301 Moved Permanently302 Found (Moved Temporarily)重定向,则递归地调用download
  5. 递归调用download的一些其他答案的问题是,他们调用resolve(download)而不是download(...).then(() => resolve()),因此Promise将在下载实际完成之前返回。这样,嵌套的承诺链将按照正确的顺序解析。
  6. 很酷的似乎是异步清理临时文件,但我也选择在完成之后才拒绝,这样我就知道当这个承诺解决或拒绝时,所有从开始到结束的事情都完成了。
const https = require('https');
const fs = require('fs');


/**
* Download a resource from `url` to `dest`.
* @param {string} url - Valid URL to attempt download of resource
* @param {string} dest - Valid path to save the file.
* @returns {Promise<void>} - Returns asynchronously when successfully completed download
*/
function download(url, dest) {
return new Promise((resolve, reject) => {
// Check file does not exist yet before hitting network
fs.access(dest, fs.constants.F_OK, (err) => {


if (err === null) reject('File already exists');


const request = https.get(url, response => {
if (response.statusCode === 200) {
       

const file = fs.createWriteStream(dest, { flags: 'wx' });
file.on('finish', () => resolve());
file.on('error', err => {
file.close();
if (err.code === 'EEXIST') reject('File already exists');
else fs.unlink(dest, () => reject(err.message)); // Delete temp file
});
response.pipe(file);
} else if (response.statusCode === 302 || response.statusCode === 301) {
//Recursively follow redirects, only a 200 will resolve.
download(response.headers.location, dest).then(() => resolve());
} else {
reject(`Server responded with ${response.statusCode}: ${response.statusMessage}`);
}
});
      

request.on('error', err => {
reject(err.message);
});
});
});
}

编写自己的解决方案,因为现有的不符合我的要求。

包括:

  • HTTPS下载(HTTP下载将包切换到http)
  • 基于承诺的函数
  • 处理转发路径(状态302)
  • 浏览器头-需要在一些cdn
  • 来自URL的文件名(以及硬编码)
  • 错误处理

打印出来的,更安全。如果你使用的是纯JS(没有Flow,没有TS),可以随意删除类型,或者转换为.d.ts文件

index.js

import httpsDownload from httpsDownload;
httpsDownload('https://example.com/file.zip', './');

httpsDownload [js | ts]。

import https from "https";
import fs from "fs";
import path from "path";


function download(
url: string,
folder?: string,
filename?: string
): Promise<void> {
return new Promise((resolve, reject) => {
const req = https
.request(url, { headers: { "User-Agent": "javascript" } }, (response) => {
if (response.statusCode === 302 && response.headers.location != null) {
download(
buildNextUrl(url, response.headers.location),
folder,
filename
)
.then(resolve)
.catch(reject);
return;
}


const file = fs.createWriteStream(
buildDestinationPath(url, folder, filename)
);
response.pipe(file);
file.on("finish", () => {
file.close();
resolve();
});
})
.on("error", reject);
req.end();
});
}


function buildNextUrl(current: string, next: string) {
const isNextUrlAbsolute = RegExp("^(?:[a-z]+:)?//").test(next);
if (isNextUrlAbsolute) {
return next;
} else {
const currentURL = new URL(current);
const fullHost = `${currentURL.protocol}//${currentURL.hostname}${
currentURL.port ? ":" + currentURL.port : ""
}`;
return `${fullHost}${next}`;
}
}


function buildDestinationPath(url: string, folder?: string, filename?: string) {
return path.join(folder ?? "./", filename ?? generateFilenameFromPath(url));
}


function generateFilenameFromPath(url: string): string {
const urlParts = url.split("/");
return urlParts[urlParts.length - 1] ?? "";
}


export default download;

我建议你使用res.download,如下所示:

app.get('/download', function(req, res){
const file = `${__dirname}/folder/abc.csv`;
res.download(file); // Set disposition and send it.
});

使用http2模块

我看到了使用httphttps请求模块的答案。我想添加一个使用另一个本地NodeJS模块,支持http或https协议:

解决方案

我已经参考了官方的NodeJS API,以及关于这个问题的一些其他答案。下面是我编写的测试,它可以按照预期工作:

import * as fs from 'fs';
import * as _path from 'path';
import * as http2 from 'http2';


/* ... */


async function download( host, query, destination )
{
return new Promise
(
( resolve, reject ) =>
{
// Connect to client:
const client = http2.connect( host );
client.on( 'error', error => reject( error ) );


// Prepare a write stream:
const fullPath = _path.join( fs.realPathSync( '.' ), destination );
const file = fs.createWriteStream( fullPath, { flags: "wx" } );
file.on( 'error', error => reject( error ) );


// Create a request:
const request = client.request( { [':path']: query } );


// On initial response handle non-success (!== 200) status error:
request.on
(
'response',
( headers/*, flags*/ ) =>
{
if( headers[':status'] !== 200 )
{
file.close();
fs.unlink( fullPath, () => {} );
reject( new Error( `Server responded with ${headers[':status']}` ) );
}
}
);


// Set encoding for the payload:
request.setEncoding( 'utf8' );


// Write the payload to file:
request.on( 'data', chunk => file.write( chunk ) );


// Handle ending the request
request.on
(
'end',
() =>
{
file.close();
client.close();
resolve( { result: true } );
}
);


/*
You can use request.setTimeout( 12000, () => {} ) for aborting
after period of inactivity
*/


// Fire off [flush] the request:
request.end();
}
);
}

然后,例如:

/* ... */


let downloaded = await download( 'https://gitlab.com', '/api/v4/...', 'tmp/tmpFile' );


if( downloaded.result )
{
// Success!
}


// ...

外部引用

编辑信息

  • 解决方案是为typescript编写的,函数是类方法 -但没有注意到这一点,如果没有正确使用function声明,这个解决方案将无法为假定的javascript用户工作,我们的贡献者及时添加了function声明。谢谢!

我发现这种方法是最有帮助的,特别是当涉及到pdf和其他随机文件时。

import fs from "fs";


fs.appendFile("output_file_name.ext", fileDataInBytes, (err) => {
if (err) throw err;
console.log("File saved!");
});

现代版本(ES6, Promise, Node 12。X +)适用于https/http。它还支持重定向302 &301. 我决定不使用第三方库,因为它可以很容易地使用标准Node.js库。

// download.js
import fs from 'fs'
import https from 'https'
import http from 'http'
import { basename } from 'path'
import { URL } from 'url'


const TIMEOUT = 10000


function download (url, dest) {
const uri = new URL(url)
if (!dest) {
dest = basename(uri.pathname)
}
const pkg = url.toLowerCase().startsWith('https:') ? https : http


return new Promise((resolve, reject) => {
const request = pkg.get(uri.href).on('response', (res) => {
if (res.statusCode === 200) {
const file = fs.createWriteStream(dest, { flags: 'wx' })
res
.on('end', () => {
file.end()
// console.log(`${uri.pathname} downloaded to: ${path}`)
resolve()
})
.on('error', (err) => {
file.destroy()
fs.unlink(dest, () => reject(err))
}).pipe(file)
} else if (res.statusCode === 302 || res.statusCode === 301) {
// Recursively follow redirects, only a 200 will resolve.
download(res.headers.location, dest).then(() => resolve())
} else {
reject(new Error(`Download request failed, response status: ${res.statusCode} ${res.statusMessage}`))
}
})
request.setTimeout(TIMEOUT, function () {
request.abort()
reject(new Error(`Request timeout after ${TIMEOUT / 1000.0}s`))
})
})
}


export default download

我修改了要点安德烈Tkachenko

将其包含在另一个文件中并使用

const download = require('./download.js')
const url = 'https://raw.githubusercontent.com/replace-this-with-your-remote-file'
console.log('Downloading ' + url)


async function run() {
console.log('Downloading file')
try {
await download(url, 'server')
console.log('Download done')
} catch (e) {
console.log('Download failed')
console.log(e.message)
}
}


run()

# EYZ0:

Node v18及以上版本自带自带的Fetch API支持。使用它。

最初的回答:

对于支持Promise的节点,与其他答案相比,一个简单的获取API(部分)的Node shim只需要少量额外的代码:

const fs = require(`fs`);
const http = require(`http`);
const https = require(`https`);


module.exports = function fetch(url) {
return new Promise((resolve, reject) => {
const data = [];
const client = url.startsWith("https") ? https : http;
client
.request(url, (res) => {
res.on(`data`, (chunk) => data.push(chunk));
res.on(`end`, () => {
const asBytes = Buffer.concat(data);
const asString = asBytes.toString(`utf8`);
resolve({
arrayBuffer: async () => asBytes,
json: async () => JSON.parse(asString),
text: async () => asString,
});
});
res.on(`error`, (e) => reject(e));
})
.end();
});
};

你可以用它来做任何你需要的事情,使用普通的fetch语法:

const fetch = require(`./tiny-fetch.js`);


fetch(`https://placekitten.com/200/300`)
.then(res => res.arrayBuffer())
.then(bytes => fs.writeFileSync(`kitten.jpg`, bytes))
.catch(e => console.error(e));


fetch(`https://jsonplaceholder.typicode.com/todos/1`)
.then(res => res.json())
.then(obj => console.log(obj))
.catch(e => console.error(e));


// etc.