// This is the traditional Levenshtein Distance algorithem, though I've tweaked it to make
// it more like Google's autocomplete/suggest. It returns the number of operations
// (insert/delete/substitute) required to change one string into another, with the
// expectation that userTyped is only a partial version of fullEntry.
// Gives us a measurement of how similar the two strings are.
public static int EditDistance(string userTyped, string fullEntry)
{
if (userTyped.Length == 0) // all entries are assumed to be fully legit possibilities
return 0; // at this point, because the user hasn't typed anything.
var inx = fullEntry.IndexOf(userTyped[0]);
if (inx < 0) // If the 1st character doesn't exist anywhere in the entry, it's not
return Int32.MaxValue; // a possible match.
var lastInx = inx;
var lastMatchCount = 0;
TryAgain:
// Is there a better starting point?
var len = fullEntry.Length - inx;
var matchCount = 1;
var k = 1;
for (; k < len; k++)
{
if (k == userTyped.Length || userTyped[k] != fullEntry[k + inx])
{
if (matchCount > lastMatchCount)
{
lastMatchCount = matchCount;
lastInx = inx;
}
inx = fullEntry.IndexOf(userTyped[0], inx + 1);
matchCount = 0;
if (inx > 0)
goto TryAgain;
else
break;
}
else
matchCount++;
}
if (k == len && matchCount > lastMatchCount)
lastInx = inx;
if (lastInx > 0)
fullEntry = fullEntry.Substring(lastInx); // Jump to 1st character match, ignoring previous values
// The start of the Levenshtein Distance algorithem.
var m = userTyped.Length;
var n = Math.Min(m, fullEntry.Length);
int[,] d = new int[m + 1, n + 1]; // "distance" - meaning number of operations.
for (var i = 0; i <= m; i++)
d[i, 0] = i; // the distance of any first string to an empty second string
for (var j = 0; j <= n; j++)
d[0, j] = j; // the distance of any second string to an empty first string
for (var j = 1; j <= n; j++)
for (var i = 1; i <= m; i++)
if (userTyped[i - 1] == fullEntry[j - 1])
d[i, j] = d[i - 1, j - 1]; // no operation required
else
d[i, j] = Math.Min
(
d[i - 1, j] + 1, // a deletion
Math.Min(
d[i, j - 1] + 1, // an insertion
d[i - 1, j - 1] + 1 // a substitution
)
);
return d[m, n];
}
我不知道这是否会回答你的问题,但我做了一个非常简单的输入-自动完成代码使用 C 语言当时。我还没有实现机器学习和神经网络在这方面,所以它不会作出概率计算和诸如此类的东西。它所做的是使用子字符串检查算法检查与输入匹配的第一个索引。
您可以将匹配数据提供到“ dict.txt”文件中。
/* Auto-complete input function in c
@authors: James Vausch
@date: 2018-5-23
- This is a bona-fide self-created program which aims to
stimulate an input auto-suggest or auto-complete function
in C language. This is open source so you can use the code
freely. However if you will use this, just acknowledge the
creator as a sign of respect.
- I'd also like to acknowledge Code with C team whom where I
I got an answer how to have a colored output instead of
using system("color #"). Link down below
https://www.codewithc.com/change-text-color-in-codeblocks-console-window/
- THE GENERAL IDEA IS; WE READ A FILE WITH DICTIONARY WORDS
OR SHALL WE SAY ALL WORDS. WE RUN A WHILE LOOP THAT WILL
GET CHARACTER FROM THE USER USING "getch()" FUNCTION THEN
STORE IT IN A CHARACTER ARRAY THEN IS PASSED ON A FUNCTION
THAT CHECKS IF THE ANY DICTIONARY WORDS HAS A SUBSTRING
THAT IS THE USER INPUT. IF YES(0), THE FUNCTION WILL COPY
THE FOLLOWING STRING FROM THE DICTIONARY ARRAY THEN STORED
IN A TEMP CHAR ARRAY AND PROCESSED. THE PROCESSING SHOULD
BE SIMPLE. WE RUN A LOOP IN WHICH WILL CHECK THE AMOUNT OF
CHARACTERS IN THE MATCHED STRING, THEN WE'LL RUN A LOOP
THAT WILL SORT THE WORDS DECREMENTALLY BASED ON THE AMOUNT
OF CHARACTERS OF THE INPUT SUBSTRING. THEN PRINT THE
PROCESSED STRING ON THE FRONT OF THE INPUT STRING THEN RUN
A LOOP BASED ON THE AMOUNT OF CHARACTERS PRESENT OR STRING
LENGTH OF THE PROCESSED STRING PLUS 10 EXTRA CHARACTERS
ALONG WITH PRINTING SOME BACKWARD TRAVERSE CARET FUNCTION
TO MAKE THE CARET STAY WHERE IT SHOULD BE ALONG WITH
INPUTTING. SIMPLE.
- <EXAMPLE>
INPUT: COM
AFTER LOOP RUN: MATCHED WITH WORD "COMMAND"
AFTER LOOP RUN: INPUT HAS 3 CHARACTERS
LOOP SEQUENCE:
LOOP 0: OMMAND
LOOP 1: MMAND
LOOP 2: MAND
AFTER LOOP: MAND
PRINT: "MAND" AFTER INPUT BUT KEEP CARET ON THE INPUT "COM"
NOTE:
- You'll need the "dict.txt" file or you can create one and
put some stuff there
- Since C Programs run on.. say a terminal, I have not much of a way to
efficiently make a way to use arrow keys for the job.
- you should type your INPUT in LOWERCASE since pressing "Shift_Key + M"
is equivalent to pressing the VK_Right(right arrow key) as well as
the other arrow keys
- the right arrow key has an ascii equivalent of <-32><77>, 77 = M
- to complete the input, you'll need to press right arrow key
- the left arrow key has an ascii equivalent of <-32><75>, 75 = K
- to remove auto-complete suggestion, press left arrow key
TO ADD:
- UP arrow key and DOWN arrow key to cycle through suggestions
*/
//#include <headers.h> //My personal header file
#include <stdio.h>
#include <stdlib.h>
#include <conio.h>
#include <windows.h>
void main(){
SetColor(6);
start();
}
void start(){
int rep = 0;
char dictFile[] = "dict.txt";
loadDictionaryEntries(dictFile);
char inp[50];
printf("\nAuto Complete Program : C");
while(rep == 0){
printf("\nInput: ");
autoCompleteInput(inp);
if(strcasecmp(inp, "exit") == 0){
break;
}
printf("\nOutput: %s", inp);
}
printf("\n");
system("pause");
}
int dictEntryCount = 0;
struct allWords{
char entry[100];
}dictionary[60000];
//============================================================================//
void loadDictionaryEntries(char directory[]){
FILE *file;
int dex = 0;
char str[100];
if(file = fopen(directory, "r")){
printf("File accessed.\n");
while(!feof(file)){
fscanf(file, "%s", str);
//UN-COMMENT line 109 to check if the program is reading from file
//printf("Adding entry %d: \"%s\" to dictionary\n",dex + 1, str);
strcpy(dictionary[dex].entry, str);
dex++;
dictEntryCount++;
}
fclose(file);
printf("[ADDED %d WORDS TO DICTIONARY]\n", dictEntryCount);
}else{
printf(" File cannot be accessed.");
fclose(file);
}
}
void printArray(){
for(int i = 0; i < dictEntryCount; i++){
printf("Index %d: %s\n", i + 1, dictionary[i].entry);
}
}
//============================================================================//
void autoCompleteInput(char input[]){
char matchedWord[100]; //STORAGE FOR THE WORD THAT MATCHES INPUT
char ch; //STORAGE FOR EACH CHARACTER THAT THE USER INPUTS
int i = 0; //COUNTER
int words;
while(i != 200){ //LOOP TO GET EACH CHARACTER FROM KEYBOARD PRESS
SetColor(6);
ch = getch();
clsx(strlen(matchedWord));
if(ch == 13){ //CONDITION TO CHECK IF INPUT IS "ENTER" KEY
break; //BREAKS LOOP IF "ENTER IS PRESSED"
}else if(ch == 8){ //CONDITION TO CHECK IF INPUT IS "BACKSPACE"
if(i == 0){ //IF INPUT IS NULL, DO NOTHING, DONT ERASE ANYTHING
//DO NOTHING
}else{ //IF INPUT IS NOT NULL, ENABLE ERASING
clsx(strlen(matchedWord));
bksp();
i--;
input[i] = '\0';
if(i > 2){
if(matchToDictionary(input, matchedWord) == 0){
words = 0;
processMatchedWord(i, matchedWord);
SetColor(8);
printf("%s", matchedWord);
words = getArrSizeChar(matchedWord);
for(int x = 0; x < words; x++){
printf("\b");
}
}
}
}
}else if(ch == 77){ //CONDITION TO CHECK IF INPUT IS RIGHT ARROW KEY
printf("%s", matchedWord); //PRINT SUGESTED WORD WITH CARET AT FRONT
strcat(input, matchedWord); //CONCATENATE SUGGESTION TO INPUT
i = i + words - 1; //SETS INDEX AT THE END OF INPUT
words = 0; //
}else if(ch == 75){ //CONDITION TO CHECK IS INPUT IS LEFT ARROW KEY
clsx(strlen(matchedWord)); //ERASE SUGGESTION
i--; //DECREMENT INDEX
}else{ //IF CONDITIONS ABOVE ARE NOT MET, DO THIS
input[i] = ch; //INSERT CH AT THE INDEX OF INPUT
printf("%c", ch); //PRINT CHARACTER
input[i + 1] = '\0'; //SET END OF CURRENT INPUT TO NULL
i++;
if(i >= 2){
if(matchToDictionary(input, matchedWord) == 0){
words = 0;
processMatchedWord(i, matchedWord);
SetColor(8);
printf("%s", matchedWord);
words = getArrSizeChar(matchedWord);
for(int x = 0; x < words; x++){
printf("\b");
}
}else{
clsx(strlen(matchedWord));
}
}
}
}
input[i] = '\0'; //NULL ENDING VALUE TO PREVENT UNNECESSARY CHARACTERS
}
int getArrSizeChar(char array[]){
int size = 0;
while(array[size] != '\0'){size++;}
return size;
}
void clsx(int maxVal){
for(int i = 0; i < maxVal + 10; i++){
printf(" ");
}
for(int i = 0; i < maxVal + 10; i++){
printf("\b");
}
}
int matchToDictionary(char input[], char matchedWord[]){
int found = 0;
int dex = dictEntryCount; //LIMIT OF ARRAY / ARRAY BOUND/S
//while(dictionary[dex] != '\0'){ //LOOP TO DETERMINE ARRAY BOUND
//printf("%d", dex);
//dex++; //INCREMENT IF INDEX OF ARRAY IS NOT NULL
//}
//printf("%d", dex);
for(int i = 0; i < dex; i++){ //LOOP TROUGH ALL INDEXES OF DICTIONARY
//CHECKS IF THE INDEX OF DICTIONARY HAS A SUBSTRING INPUT
//printf(" Matching %s and %s\n", dictionary[i], input);
if(containsIgnoreCase(dictionary[i].entry, input) == 0){
//CHECKS IF THE INDEX OF DICTIONARY TOTALLY MATCHES THE INPUT
//IT IS TO PREVENT ERRORS IN AUTO-COMPLETING PROCESS
if(strcasecmp(dictionary[i].entry, input) == 1){
//IF NOT, STORE INDEX OF DICTIONARY TO MATCHED WORD
strcpy(matchedWord, dictionary[i].entry);
found++;
break; //BREAK LOOP
}
}
}
if(found == 1){
return 0;
}else{
return 1;
}
}
void processMatchedWord(int rep, char str[]){
int lim = 0;
int i;
char temp[50];
while(str[lim] != '\0'){
lim++;
}
while(rep != 0){
for(i = 0; i < lim; i++){
str[i] = str[i + 1];
}
rep--;
}
}
//===================================================================//
void bksp(){
printf("\b "); //first backsapce to print an emtpy character
printf("\b"); //second backspace to erase printed character
}
int containsIgnoreCase(char str1[], char str2[]){
char tmp1[100];
char tmp2[100];
toLowerCase(tmp1, str1);
toLowerCase(tmp2, str2);
int i, j = 0, k;
for(i = 0; tmp1[i]; i++){
if(tmp1[i] == tmp2[j]){
for(k = i, j = 0; tmp1[k] && tmp2[j]; j++, k++){
if(tmp1[k] != tmp2[j]){
break;
}
}
if(!tmp2[j]){
return 0;
}
}
}
return 1;
}
void toLowerCase(char destination[], char source[]){
int lim = 0;
int i;
while(source[lim] != '\0'){
lim++;
}
for(i = 0; i < lim; i++){
destination[i] = tolower(source[i]);
}
destination[i] = '\0';
}
/*Console Colors: Windows
0 = Black 8 = Gray
1 = Blue 9 = LBlue
2 = Green 10 = LGreen
3 = Aqua 11 = LAqua
4 = Red 12 = LRed
5 = Purple 13 = LPurple
6 = Yellow 14 = LYellow
7 = White 15 = Bright White
}*/
void SetColor(int ForgC){ //CODE SNIPPET FROM WWW.CODEWITHC.COM
WORD wColor;
//This handle is needed to get the current background attribute
HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO csbi;
//csbi is used for wAttributes word
if(GetConsoleScreenBufferInfo(hStdOut, &csbi)){
//To mask out all but the background attribute, and to add the color
wColor = (csbi.wAttributes & 0xF0) + (ForgC & 0x0F);
SetConsoleTextAttribute(hStdOut, wColor);
}
return;
}