如何在节点中逐行从stdin读取

我希望处理一个文本文件与节点使用命令行调用,如:

node app.js < input.txt

文件的每一行都需要单独处理,但是一旦处理了输入行就可以忘记。

使用stdin的on-data侦听器,我得到输入蒸汽按字节大小分块,所以我设置了这个。

process.stdin.resume();
process.stdin.setEncoding('utf8');


var lingeringLine = "";


process.stdin.on('data', function(chunk) {
lines = chunk.split("\n");


lines[0] = lingeringLine + lines[0];
lingeringLine = lines.pop();


lines.forEach(processLine);
});


process.stdin.on('end', function() {
processLine(lingeringLine);
});

但这看起来太草率了。必须围绕行数组的第一个和最后一个项目进行按摩。就没有更优雅的方式了吗?

242146 次浏览

你可以使用readline模块逐行从stdin中读取:

const readline = require('readline');


const rl = readline.createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});


rl.on('line', (line) => {
console.log(line);
});


rl.once('close', () => {
// end of input
});

readline是专门设计用于终端(即process.stdin.isTTY === true)。有很多模块为通用流提供拆分功能,如分裂。它让事情变得超级简单:

process.stdin.pipe(require('split')()).on('data', processLine)


function processLine (line) {
console.log(line + '!')
}

在我的情况下,程序(elinks)返回的行看起来是空的,但实际上有特殊的终端字符、颜色控制代码和退格,所以在其他答案中显示的grep选项对我不起作用。所以我用Node.js写了这个小脚本。我将该文件命名为tight,但这只是一个随机名称。

#!/usr/bin/env node


function visible(a) {
var R  =  ''
for (var i = 0; i < a.length; i++) {
if (a[i] == '\b') {  R -= 1; continue; }
if (a[i] == '\u001b') {
while (a[i] != 'm' && i < a.length) i++
if (a[i] == undefined) break
}
else R += a[i]
}
return  R
}


function empty(a) {
a = visible(a)
for (var i = 0; i < a.length; i++) {
if (a[i] != ' ') return false
}
return  true
}


var readline = require('readline')
var rl = readline.createInterface({ input: process.stdin, output: process.stdout, terminal: false })


rl.on('line', function(line) {
if (!empty(line)) console.log(line)
})

逐行读取流,应该适合大文件管道到stdin,我的版本:

var n=0;
function on_line(line,cb)
{
////one each line
console.log(n++,"line ",line);
return cb();
////end of one each line
}


var fs = require('fs');
var readStream = fs.createReadStream('all_titles.txt');
//var readStream = process.stdin;
readStream.pause();
readStream.setEncoding('utf8');


var buffer=[];
readStream.on('data', (chunk) => {
const newlines=/[\r\n]+/;
var lines=chunk.split(newlines)
if(lines.length==1)
{
buffer.push(lines[0]);
return;
}
    

buffer.push(lines[0]);
var str=buffer.join('');
buffer.length=0;
readStream.pause();


on_line(str,()=>{
var i=1,l=lines.length-1;
i--;
function while_next()
{
i++;
if(i<l)
{
return on_line(lines[i],while_next);
}
else
{
buffer.push(lines.pop());
lines.length=0;
return readStream.resume();
}
}
while_next();
});
}).on('end', ()=>{
if(buffer.length)
var str=buffer.join('');
buffer.length=0;
on_line(str,()=>{
////after end
console.error('done')
////end after end
});
});
readStream.resume();

解释:

  • 为了正确地在utf8字母上切割它,而不是在中间字节集编码到utf8,它确保它每次都发出完整的多字节字母。
  • 当接收到数据时,输入被暂停。它用于阻塞输入,直到所有行都用完为止。如果行处理函数比输入慢,它可以防止自助餐溢出。
  • 如果每次都有一行没有换行符。需要为它积攒所有的来电,什么都不做,回报。一旦有多行,也追加它并使用累积的缓冲区。
  • 在所有被分割的行被消耗之后。在最后一行,将最后一行推到缓冲区并恢复暂停的流。

es6代码

var n=0;
async function on_line(line)
{
////one each line
console.log(n++,"line ",line);
////end of one each line
}


var fs = require('fs');
var readStream = fs.createReadStream('all_titles.txt');
//var readStream = process.stdin;
readStream.pause();
readStream.setEncoding('utf8');


var buffer=[];
readStream.on('data', async (chunk) => {
    

const newlines=/[\r\n]+/;
var lines=chunk.split(newlines)
if(lines.length==1)
{
buffer.push(lines[0]);
return;
}
readStream.pause();


// let i=0;
buffer.push(lines[0]); // take first line
var str=buffer.join('');
buffer.length=0;//clear array, because consumed
await on_line(str);
    

for(let i=1;i<lines.length-1;i++)
await on_line(lines[i]);
buffer.push(lines[lines.length-1]);
lines.length=0; //optional, clear array to hint GC.
return readStream.resume();
}).on('end', async ()=>{
if(buffer.length)
var str=buffer.join('');
buffer.length=0;
await on_line(str);
});
readStream.resume();

我没有测试es6代码

// Work on POSIX and Windows
var fs = require("fs");
var stdinBuffer = fs.readFileSync(0); // STDIN_FILENO = 0
console.log(stdinBuffer.toString());
#!/usr/bin/env node


const EventEmitter = require('events');


function stdinLineByLine() {
const stdin = new EventEmitter();
let buff = '';


process.stdin
.on('data', data => {
buff += data;
lines = buff.split(/\r\n|\n/);
buff = lines.pop();
lines.forEach(line => stdin.emit('line', line));
})
.on('end', () => {
if (buff.length > 0) stdin.emit('line', buff);
});


return stdin;
}


const stdin = stdinLineByLine();
stdin.on('line', console.log);

如果你想先问用户行数:

    //array to save line by line
let xInputs = [];


const getInput = async (resolve)=>{
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout,
});
readline.on('line',(line)=>{
readline.close();
xInputs.push(line);
resolve(line);
})
}


const getMultiInput = (numberOfInputLines,callback)=>{
let i = 0;
let p = Promise.resolve();
for (; i < numberOfInputLines; i++) {
p = p.then(_ => new Promise(resolve => getInput(resolve)));
}
p.then(()=>{
callback();
});
}


//get number of lines
const readline = require('readline').createInterface({
input: process.stdin,
output: process.stdout,
terminal: false
});
readline.on('line',(line)=>{
getMultiInput(line,()=>{
//get here the inputs from xinputs array
});
readline.close();
})

process.stdin.pipe(process.stdout);