如何使用 node.js 从“ AWS Dynamodb”获取/扫描所有项目

如何获取/扫描所有项目从 AWS dynamodb使用 node.js。我在这里张贴我的代码。

var docClient = new aws.DynamoDB.DocumentClient();
var params = {
TableName:"users",
KeyConditionExpression:"user_status=:status",
ExpressionAttributeValues: {
":status": "Y"
}
};


var queryExecute = function(callback) {
docClient.query(params,function(err,result) {
if(err) {
console.log(err)
callback(err);
} else {
console.log(result);


if(result.LastEvaluatedKey) {
params.ExclusiveStartKey = result.LastEvaluatedKey;
queryExecute(callback);
} else {
callback(err,items);
}
}
});
}
queryExecute(callback);

这是给我下面的误差。

ValidationException: Query condition missed key schema element: `user_id`.

这里的主键是 user _ id。我不想在我的查询条件中使用它,因为如果我在 KeyConditionExpression中提到了主键,我需要设置一个值。也许我错了。但是,请建议我一个好方法来获取所有的项目从 dynamodb,这是有 user_status = "Y"

108881 次浏览

If you would like to get the data from DynamoDB without using Hash key value, you need to use Scan API.

Note: The Scan API reads all the items in the table to get the results. So, it is a costly operation in DynamoDB.

Alternate Approach : Use GSI

Scan Code for the above sceanario:-

var docClient = new AWS.DynamoDB.DocumentClient();


var params = {
TableName: "users",
FilterExpression: "#user_status = :user_status_val",
ExpressionAttributeNames: {
"#user_status": "user_status",
},
ExpressionAttributeValues: { ":user_status_val": 'somestatus' }


};


docClient.scan(params, onScan);
var count = 0;


function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
console.log("Scan succeeded.");
data.Items.forEach(function(itemdata) {
console.log("Item :", ++count,JSON.stringify(itemdata));
});


// continue scanning if we have more items
if (typeof data.LastEvaluatedKey != "undefined") {
console.log("Scanning for more...");
params.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(params, onScan);
}
}
}

This is working for me:

export const scanTable = async (tableName) => {
const params = {
TableName: tableName,
};


const scanResults = [];
const items;
do{
items =  await documentClient.scan(params).promise();
items.Items.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey  = items.LastEvaluatedKey;
}while(typeof items.LastEvaluatedKey !== "undefined");
    

return scanResults;


};

A node express solution that returns the data in JSON format:

let datapack=[];
item = {
TableName: ddbTable,
FilterExpression: "aws = :e AND begins_with ( Id, :t )",
ExpressionAttributeValues: {
":t"    :   "contact",
":e"    :   aws
},
ProjectionExpression: "Id,FirstName,LastName,cEmail",
};
docClient.scan(item, onScan);
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
datapack = datapack.concat(data.Items);
});
if (typeof data.LastEvaluatedKey != "undefined") {
item.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(item, onScan);
} else {
res.json(datapack);
}
}
}

Here is an answer that will deliver indexed results rather than using an expensive scan, also in JSON format node/express. Note use of docClient.query:

 datapack=[];
item = {
TableName: ddbTable,
IndexName: "cEmailIndex",
KeyConditionExpression : "aws = :e AND begins_with ( cEmail, :t )",
ExpressionAttributeValues: {
":t"    :   search,
":e"    :   aws
},
ProjectionExpression: "Id,FirstName,LastName,cEmail",
};
docClient.query(item, onScan);
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
} else {
datapack = datapack.concat(data.Items);
if (typeof data.LastEvaluatedKey != "undefined") {
item.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.query(item, onScan);
} else {
// console.log(JSON.stringify(datapack));
res.json(datapack);
}
}
}

AWS documentation example didn't work for me. @Hank approach did the trick.

Using handler inside a lambda:

const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
// optional tuning - 50% faster(cold) / 20% faster(hot)
apiVersion: '2012-08-10',
sslEnabled: false,
paramValidation: false,
convertResponseTypes: false
});


const tableName = 'series';


exports.handler = async (event, context, callback) => {
let params = { TableName: tableName };


let scanResults = [];
let items;


do {
items = await docClient.scan(params).promise();
items.Items.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey = items.LastEvaluatedKey;
} while (typeof items.LastEvaluatedKey != "undefined");


callback(null, scanResults);
};

Using Promises and async

const aws = require('aws-sdk');
aws.config.update({ region: 'us-east-1' });
const documentClient = new aws.DynamoDB.DocumentClient();


const scanAll = async (params) => {
let lastEvaluatedKey = 'dummy'; // string must not be empty
const itemsAll = [];
while (lastEvaluatedKey) {
const data = await documentClient.scan(params).promise();
itemsAll.push(...data.Items);
lastEvaluatedKey = data.LastEvaluatedKey;
if (lastEvaluatedKey) {
params.ExclusiveStartKey = lastEvaluatedKey;
}
}
return itemsAll;
}

Use like this

const itemsAll = scanAll(params);

The code is the same for query (just replace scan with query)

You can use the ScanPaginator from @aws/dynamodb-query-iterator:

import { ScanPaginator } from '@aws/dynamodb-query-iterator';
import DynamoDB = require('aws-sdk/clients/dynamodb');


const paginator = new ScanPaginator(
new DynamoDB.DocumentClient(),
{
TableName: "users",
FilterExpression: "#user_status = :user_status_val",
ExpressionAttributeNames: {
"#user_status": "user_status",
},
ExpressionAttributeValues: { ":user_status_val": 'somestatus' }
}
);


for await (const page of paginator) {
// do something with `page`, e.g. myFunction(page.Items)
}

I use promises like this:

let AWS = require('aws-sdk');
let docClient = new AWS.DynamoDB.DocumentClient();


async function dbRead(params) {
let promise = docClient.scan(params).promise();
let result = await promise;
let data = result.Items;
if (result.LastEvaluatedKey) {
params.ExclusiveStartKey = result.LastEvaluatedKey;
data = data.concat(await dbRead(params));
}
return data;
}

and to use it:

let params = {
TableName: 'Table'
};
let data = await dbRead(params);
const AWS = require('aws-sdk');
const docClient = new AWS.DynamoDB.DocumentClient({
// optional tuning - 50% faster(cold) / 20% faster(hot)
apiVersion: '2012-08-10',
sslEnabled: false,
paramValidation: false,
convertResponseTypes: false,
region: 'us-east-2' // put your region
});
const tableName = 'tableName'; // put your tablename


exports.handler = async (event, context, callback) => {
let params = { TableName: tableName };


let scanResults = [];
let items;


do {
items = await docClient.scan(params).promise();
items.Items.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey = items.LastEvaluatedKey;
} while (typeof items.LastEvaluatedKey != "undefined");


callback(null, scanResults);
};

For those who are NOT USING AWS.DynamoDB.DocumentClient, this solution will work. I have split the functionality into multiple modules for easy readability and using async/await.

const AWS = require("aws-sdk");
AWS.config.update({
// update table region here
region: "us-west-2"
});
var dynamodb = new AWS.DynamoDB();
const performAsynScanOperation = (scanParams) => {
return new Promise((resolve, reject) => {
dynamodb.scan(scanParams, function (err, responseData) {
if (err) {
reject(err)
} else {
resolve(responseData)
}
})
})
}


const getAllRecords = async (tableName) => {
let allItems = [];
let LastEvaluatedKeyFlag = true;
let scanParams = { TableName: tableName }
while (LastEvaluatedKeyFlag) {
let responseData = await performAsynScanOperation(scanParams)
let batchItems = responseData.Items;
allItems = allItems.concat(batchItems);
if (responseData.LastEvaluatedKey) {
LastEvaluatedKeyFlag = true;
console.log('LastEvaluatedKey', responseData.LastEvaluatedKey)
scanParams.ExclusiveStartKey = responseData.LastEvaluatedKey
} else {
LastEvaluatedKeyFlag = false;
}
}
return allItems;
}
getAllRecords('<Name of table>').then((allItems)=>{
console.log(allItems)
})

This is a drop-in replacement to scan all records:

const scanAll = async (params) => {
let all = [];
while (true) {
let data = await new Promise((resolve, reject) => {
db.scan(params, function (err, data) {
if (err)
reject(err);
else
resolve(data);
});
});
all = all.concat(data.Items);
if (data.LastEvaluatedKey)
params.ExclusiveStartKey = data.LastEvaluatedKey;
else
break;
}
return all;
};

Usage:

scanAll(query)
.catch((err) => {


})
.then((records) => {


});
}

The scan method reads every item in the table and returns all the data in the table. You can provide an optional filter_expression, so that only the items matching your criteria are returned. However, the filter is applied only after the entire table has been scanned. ref

I'm sharing refactored onScan function hope it helps.

var AWS = require("aws-sdk");


AWS.config.update({
region: "us-west-2",
endpoint: "http://localhost:8000"
});


var docClient = new AWS.DynamoDB.DocumentClient();


async function read() {
const params = {
TableName: "tableName"
// options can be passed here e.g.
// FilterExpression: "#yr between :start_yr and :end_yr",
};


let items = [];
return new Promise((resolve, reject) => {
function onScan(err, data) {
if (err) {
console.error("Unable to scan the table. Error JSON:", JSON.stringify(err, null, 2));
reject();
} else {
items = items.concat(data.Items);


// continue scanning if we have more items, because
// scan can retrieve a maximum of 1MB of data
if (typeof data.LastEvaluatedKey !== "undefined") {
params.ExclusiveStartKey = data.LastEvaluatedKey;
docClient.scan(params, onScan);
} else {
resolve(items);
}
}
}
docClient.scan(params, onScan);
});
}

Update the code from @Ioannis Tsiokos

This function will return data structure like the normal scan but will scan all the data:

const scanAll = async (params) => {
try {
// string must not be empty
let lastEvaluatedKey = 'notEmpty';
let itemsAll = {
Items: [],
Count: 0,
ScannedCount: 0
};
while (lastEvaluatedKey) {
const data = await docClient.scan(params).promise();
console.log('data', JSON.stringify(data, null, 2));
const { Items, Count, ScannedCount } = data;
itemsAll = {
...data,
...{ Items: [...itemsAll.Items, ...(Items ? [...Items] : [])] },
...(Count
? { Count: itemsAll.Count + Count }
: { Count: itemsAll.Count }),
...(ScannedCount
? { ScannedCount: itemsAll.ScannedCount + ScannedCount }
: { ScannedCount: itemsAll.ScannedCount })
};
lastEvaluatedKey = data.LastEvaluatedKey;
if (lastEvaluatedKey) {
params.ExclusiveStartKey = lastEvaluatedKey;
}
}
return itemsAll;
} catch (err) {
console.error(
'Unable to scan the table. Error JSON:',
JSON.stringify(err, null, 2)
);
}
};

How to use:

 let resp = await scanAll(params);

An async example using typescript:

import { AttributeValue, DynamoDB, ScanCommandInput, ScanCommandOutput } from "@aws-sdk/client-dynamodb";


// Create an Amazon DynamoDB service object.
const AWS_REGION = process.env.REGION ?? "<your-default-region>";
const ddb = new DynamoDB({ region: AWS_REGION });


export async function scanTable(tableName: string): Promise<Record<string, AttributeValue>[]> {
const params: ScanCommandInput = {
TableName: tableName
};


const scanResults: Record<string, AttributeValue>[] = [];
let items: ScanCommandOutput;
do {
items =  await ddb.scan(params);
items.Items?.forEach((item) => scanResults.push(item));
params.ExclusiveStartKey  = items.LastEvaluatedKey;
} while (typeof items.LastEvaluatedKey !== "undefined");


return scanResults;
};