Phantom.js is an headless webkit server and it will load any web page and render it in memory, although you might not be able to see it, there is a Screen Capture feature, in which you can export the current view as PNG, PDF, JPEG and GIF. Have a look at this example from phantom.js documentation
If you want to export HTML to PDF. You have many options. without node even
Option 1: Have a button on your html page that calls window.print() function. use the browsers native html to pdf. use media queries to make your html page look good on a pdf. and you also have the print before and after events that you can use to make changes to your page before print.
Option 2. htmltocanvas or rasterizeHTML. convert your html to canvas , then call toDataURL() on the canvas object to get the image . and use a JavaScript library like jsPDF to add that image to a PDF file. Disadvantage of this approach is that the pdf doesnt become editable. If you want data extracted from PDF, there is different ways for that.
Here's an adaptation of the previous answers which utilizes html-pdf, but also combines it with requestify so it works with an external URL:
Install your dependencies
npm i -S html-pdf requestify
Then, create the script:
//MakePDF.js
var pdf = require('html-pdf');
var requestify = require('requestify');
var externalURL= 'http://www.google.com';
requestify.get(externalURL).then(function (response) {
// Get the raw HTML response body
var html = response.body;
var config = {format: 'A4'}; // or format: 'letter' - see https://github.com/marcbachmann/node-html-pdf#options
// Create the PDF
pdf.create(html, config).toFile('pathtooutput/generated.pdf', function (err, res) {
if (err) return console.log(err);
console.log(res); // { filename: '/pathtooutput/generated.pdf' }
});
});
Then you just run from the command line:
node MakePDF.js
Watch your beautify pixel perfect PDF be created for you (for free!)
Easy to use and allows not only to save pdf as file, but also pipe pdf content to a WriteStream (so I could stream it directly to Google Storage to save there my reports).
Using css + images
It takes css into account. The only problem I faced - it ignored my images. The solution I found was to replace url in src attrribute value by base64, e.g.
For those who don't want to install PhantomJS along with an instance of Chrome/Firefox on their server - or because the PhantomJS project is currently suspended, here's an alternative.
You can externalize the conversions to APIs to do the job. Many exists and varies but what you'll get is a reliable service with up-to-date features (I'm thinking CSS3, Web fonts, SVG, Canvas compatible).
For instance, with PDFShift (disclaimer, I'm the founder), you can do this simply by using the request package:
const request = require('request')
request.post(
'https://api.pdfshift.io/v2/convert/',
{
'auth': {'user': 'your_api_key'},
'json': {'source': 'https://www.google.com'},
'encoding': null
},
(error, response, body) => {
if (response === undefined) {
return reject({'message': 'Invalid response from the server.', 'code': 0, 'response': response})
}
if (response.statusCode == 200) {
// Do what you want with `body`, that contains the binary PDF
// Like returning it to the client - or saving it as a file locally or on AWS S3
return True
}
// Handle any errors that might have occured
}
);
In my view, the best way to do this is via an API so that you do not add a large and complex dependency into your app that runs unmanaged code, that needs to be frequently updated.
Here is a simple way to do this, which is free for 800 requests/month:
var CloudmersiveConvertApiClient = require('cloudmersive-convert-api-client');
var defaultClient = CloudmersiveConvertApiClient.ApiClient.instance;
// Configure API key authorization: Apikey
var Apikey = defaultClient.authentications['Apikey'];
Apikey.apiKey = 'YOUR API KEY';
var apiInstance = new CloudmersiveConvertApiClient.ConvertWebApi();
var input = new CloudmersiveConvertApiClient.HtmlToPdfRequest(); // HtmlToPdfRequest | HTML to PDF request parameters
input.Html = "<b>Hello, world!</b>";
var callback = function(error, data, response) {
if (error) {
console.error(error);
} else {
console.log('API called successfully. Returned data: ' + data);
}
};
apiInstance.convertWebHtmlToPdf(input, callback);
With the above approach you can also install the API on-premises or on your own infrastructure if you prefer.
const fs = require('fs')
const path = require('path')
const utils = require('util')
const puppeteer = require('puppeteer')
const hb = require('handlebars')
const readFile = utils.promisify(fs.readFile)
async function getTemplateHtml() {
console.log("Loading template file in memory")
try {
const invoicePath = path.resolve("./invoice.html");
return await readFile(invoicePath, 'utf8');
} catch (err) {
return Promise.reject("Could not load html template");
}
}
async function generatePdf() {
let data = {};
getTemplateHtml()
.then(async (res) => {
// Now we have the html code of our template in res object
// you can check by logging it on console
// console.log(res)
console.log("Compiing the template with handlebars")
const template = hb.compile(res, { strict: true });
// we have compile our code with handlebars
const result = template(data);
// We can use this to add dyamic data to our handlebas template at run time from database or API as per need. you can read the official doc to learn more https://handlebarsjs.com/
const html = result;
// we are using headless mode
const browser = await puppeteer.launch();
const page = await browser.newPage()
// We set the page content as the generated html by handlebars
await page.setContent(html)
// we Use pdf function to generate the pdf in the same folder as this file.
await page.pdf({ path: 'invoice.pdf', format: 'A4' })
await browser.close();
console.log("PDF Generated")
})
.catch(err => {
console.error(err)
});
}
generatePdf();