搜索某个inside存储在bucket中的每个对象;这假设该bucket中的所有对象(比如文本文件)都有一个通用的格式,等等。对于这样的事情,你不得不做科迪·考兰刚刚回答的事情。AWS S3文档有示例代码,展示了如何使用AWS SDK for Java做到这一点:列出键使用AWS SDK for Java(在那里你还会找到PHP和c#的例子)。
def getListOfPrefixesFromS3(dataPath: String, prefix: String, delimiter: String, batchSize: Integer): List[String] = {
var s3Client = new AmazonS3Client()
var listObjectsRequest = new ListObjectsRequest().withBucketName(dataPath).withMaxKeys(batchSize).withPrefix(prefix).withDelimiter(delimiter)
var objectListing: ObjectListing = null
var res: List[String] = List()
do {
objectListing = s3Client.listObjects(listObjectsRequest)
res = res ++ objectListing.getCommonPrefixes
listObjectsRequest.setMarker(objectListing.getNextMarker)
} while (objectListing.isTruncated)
res
}
# upload all S3 files in bucket "demo"
s3.upload_all(bucket_name="demo")
# upload one single file called "prescription.pdf" in bucket "demo"
s3.upload_one(s3_file_name="prescription.pdf", bucket_name="demo")
现在只需使用Mixpeek模块搜索:
# mixpeek api direct
mix = Mixpeek(
api_key=mixpeek_api_key
)
# search
result = mix.search(query="Heartgard")
print(result)
结果可以是:
[
{
"_id": "REDACTED",
"api_key": "REDACTED",
"highlights": [
{
"path": "document_str",
"score": 0.8759502172470093,
"texts": [
{
"type": "text",
"value": "Vetco Prescription\nVetcoClinics.com\n\nCustomer:\n\nAddress: Canine\n\nPhone: Australian Shepherd\n\nDate of Service: 2 Years 8 Months\n\nPrescription\nExpiration Date:\n\nWeight: 41.75\n\nSex: Female\n\n℞ "
},
{
"type": "hit",
"value": "Heartgard"
},
{
"type": "text",
"value": " Plus Green 26-50 lbs (Ivermectin 135 mcg/Pyrantel 114 mg)\n\nInstructions: Give one chewable tablet by mouth once monthly for protection against heartworms, and the treatment and\ncontrol of roundworms, and hookworms. "
}
]
}
],
"metadata": {
"date_inserted": "2021-10-07 03:19:23.632000",
"filename": "prescription.pdf"
},
"score": 0.13313256204128265
}
]