如何使用 JavaScript 将字符串的波斯语和阿拉伯语数字转换为英语?

如何用一个简单的函数将波斯/阿拉伯数字转换为英文数字?

arabicNumbers = ["١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩", "٠"]
persianNumbers = ["۱", "۲", "۳", "۴", "۵", "۶", "۷", "۸", "۹", "۰"]

它是相同的模式,但是代码页不同。

43238 次浏览

Use this simple function to convert your string

var
persianNumbers = [/۰/g, /۱/g, /۲/g, /۳/g, /۴/g, /۵/g, /۶/g, /۷/g, /۸/g, /۹/g],
arabicNumbers  = [/٠/g, /١/g, /٢/g, /٣/g, /٤/g, /٥/g, /٦/g, /٧/g, /٨/g, /٩/g],
fixNumbers = function (str)
{
if(typeof str === 'string')
{
for(var i=0; i<10; i++)
{
str = str.replace(persianNumbers[i], i).replace(arabicNumbers[i], i);
}
}
return str;
};

Be careful, in this code the persian numbers codepage are different with the arabian numbers.

Example

var mystr = 'Sample text ۱۱۱۵۱ and ٢٨٢٢';
mystr = fixNumbers(mystr);

Refrence

You could do something like this that uses the index of the number within the string to do the conversion:

// Returns -1 if `fromNum` is not a numeric character
function convertNumber(fromNum) {
var persianNums = '۰١۲۳۴۵۶۷۸۹';
return persianNums.indexOf(fromNum);
}


var testNum = '۴';
alert("number is: " + convertNumber(testNum));

Or map using a object like this:

// Returns -1 if `fromNum` is not a numeric character
function convertNumber(fromNum) {
var result;
var arabicMap = {
'٩': 9,
'٨': 8,
'٧': 7,
'٦': 6,
'٥': 5,
'٤': 4,
'٣': 3,
'٢': 2,
'١': 1,
'٠': 0
};
result = arabicMap[fromNum];
if (result === undefined) {
result = -1;
}
return result;
}


var testNum = '٤';
alert("number is: " + convertNumber(testNum));

Transforms any Persian or Arabic (or mixed) number to "English" numbers (Hindu–Arabic numerals)

var transformNumbers = (function(){
var numerals = {
persian : ["۰", "۱", "۲", "۳", "۴", "۵", "۶", "۷", "۸", "۹"],
arabic  : ["٠", "١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩"]
};


function fromEnglish(str, lang){
var i, len = str.length, result = "";


for( i = 0; i < len; i++ )
result += numerals[lang][str[i]];


return result;
}


return {
toNormal : function(str){
var num, i, len = str.length, result = "";


for( i = 0; i < len; i++ ){
num = numerals["persian"].indexOf(str[i]);
num = num != -1 ? num : numerals["arabic"].indexOf(str[i]);
if( num == -1 ) num = str[i];
result += num;
}
              

return result;
},


toPersian : function(str, lang){
return fromEnglish(str, "persian");
},


toArabic : function(str){
return fromEnglish(str, "arabic");
}
}
})();


//////// ON INPUT EVENT //////////////


document.querySelectorAll('input')[0].addEventListener('input', onInput_Normal);
document.querySelectorAll('input')[1].addEventListener('input', onInput_Arabic);


function onInput_Arabic(){
var _n = transformNumbers.toArabic(this.value);
console.clear();
console.log( _n )
}


function onInput_Normal(){
var _n = transformNumbers.toNormal(this.value);
console.clear();
console.log( _n )
}
input{ width:90%; margin-bottom:1em; font-size:1.5em; padding:5px; }
<input placeholder="write in Arabic numerals">


<input placeholder="write in normal numerals">

this is a simple way to do that:

function toEnglishDigits(str) {


// convert persian digits [۰۱۲۳۴۵۶۷۸۹]
var e = '۰'.charCodeAt(0);
str = str.replace(/[۰-۹]/g, function(t) {
return t.charCodeAt(0) - e;
});


// convert arabic indic digits [٠١٢٣٤٥٦٧٨٩]
e = '٠'.charCodeAt(0);
str = str.replace(/[٠-٩]/g, function(t) {
return t.charCodeAt(0) - e;
});
return str;
}

an example:

console.log(toEnglishDigits("abc[0123456789][٠١٢٣٤٥٦٧٨٩][۰۱۲۳۴۵۶۷۸۹]"));
// expected result => abc[0123456789][0123456789][0123456789]

best way to do that return index of number in array:

String.prototype.toEnglishDigits = function () {
return this.replace(/[۰-۹]/g, function (chr) {
var persian = ['۰', '۱', '۲', '۳', '۴', '۵', '۶', '۷', '۸', '۹'];
return persian.indexOf(chr);
});
};

Short and easy!

"۰۱۲۳۴۵۶۷۸۹".replace(/([۰-۹])/g, function(token) { return String.fromCharCode(token.charCodeAt(0) - 1728); });

Or in a more modern manner

"۰۱۲۳۴۵۶۷۸۹".replace(/([۰-۹])/g, token => String.fromCharCode(token.charCodeAt(0) - 1728));

Oneliner of all 6 possible translations between English, Arabic, and persian Digits.

const e2p = s => s.replace(/\d/g, d => '۰۱۲۳۴۵۶۷۸۹'[d])
const e2a = s => s.replace(/\d/g, d => '٠١٢٣٤٥٦٧٨٩'[d])


const p2e = s => s.replace(/[۰-۹]/g, d => '۰۱۲۳۴۵۶۷۸۹'.indexOf(d))
const a2e = s => s.replace(/[٠-٩]/g, d => '٠١٢٣٤٥٦٧٨٩'.indexOf(d))


const p2a = s => s.replace(/[۰-۹]/g, d => '٠١٢٣٤٥٦٧٨٩'['۰۱۲۳۴۵۶۷۸۹'.indexOf(d)])
const a2p = s => s.replace(/[٠-٩]/g, d => '۰۱۲۳۴۵۶۷۸۹'['٠١٢٣٤٥٦٧٨٩'.indexOf(d)])


e2p("asdf1234") // asdf۱۲۳۴
e2a("asdf1234") // asdf١٢٣٤
p2e("asdf۱۲۳۴") // asdf1234
a2e("asdf١٢٣٤") // asdf1234
p2a("asdf۱۲۳۴") // asdf١٢٣٤
a2p("asdf١٢٣٤") // asdf۱۲۳۴

Explaination:

  • (s => f(s))(x) is a lambda function that is immediately executed, and will be equal to f(x)
  • s.replace(pattern, function) looks for matches of pattern in s, for every match m it will replace m with function(m) in the string.
  • /\d/g is a regex pattern, \d means a digit in the English language, g means global. If you don't specify the g it will only match the first occurrence, otherwise it will match all the occurrences.
  • In this case for every English digit d in the string, that digit will be replaced by '۰۱۲۳۴۵۶۷۸۹'[d] so, 3 will be replaced by the third index in that list('۰۱۲۳۴۵۶۷۸۹') which is '۳'
  • /[۰-۹]/g is the equivalent regex for Persian digits this time we can't use the same method, before we took advantage of the fact that javascript is dynamically typed and that d is automatically converted from a string(regex match) to a number(array index) (you can do '1234'['1'] in javascript which is the same as '1234'[1])
  • but this time we can't do that because '1234'['۱'] is invalid. so we use a trick here and use indexOf which is a function that tells us the index of an element in an array(here a character in a string) so, '۰۱۲۳۴۵۶۷۸۹'.indexOf(۳) will give us 3 because '۳' is the third index in the string '۰۱۲۳۴۵۶۷۸۹'
function toEnglishDigits(str) {
const persianNumbers = ["۱", "۲", "۳", "۴", "۵", "۶", "۷", "۸", "۹", "۰"]
const arabicNumbers = ["١", "٢", "٣", "٤", "٥", "٦", "٧", "٨", "٩", "٠"]
const englishNumbers = ["1", "2", "3", "4", "5", "6", "7", "8", "9", "0"]
  

return str.split("").map(c => englishNumbers[persianNumbers.indexOf(c)] ||
englishNumbers[arabicNumbers.indexOf(c)] || c).join("")
}


toEnglishDigits("۶٦۵any٥32") // "665any532"

If the string may contain both "Arabic" and "Persian" numbers then a one-line "replace" can do the job as follows.

The Arabic and Persian numbers are converted to English equivalents. Other text remains unchanged.

Num= "۳٣۶٦۵any٥۵٤۶32٠۰";     // Output should be "33665any55453200"


Num = Num.replace(/[٠-٩]/g, d => "٠١٢٣٤٥٦٧٨٩".indexOf(d)).replace(/[۰-۹]/g, d => "۰۱۲۳۴۵۶۷۸۹".indexOf(d));


console.log(Num);

You can use the new Persian-tools library which is an awesome javascript library to deal with Persian words and numbers. Here is a sample for the task you asked for it:

import { digitsArToFa, digitsArToEn, digitsEnToFa, digitsFaToEn } from "persian-tools2";


digitsArToFa("٠١٢٣٤٥٦٧٨٩"); // "۰۱۲۳۴۵۶۷۸۹"
digitsArToEn("٠١٢٣٤٥٦٧٨٩"); // "0123456789"
digitsEnToFa("123۴۵۶"); // "۱۲۳۴۵۶"
digitsFaToEn("۰۱۲۳۴۵۶۷۸۹"); // "0123456789"

You can also find many other useful functionalities on the repository page of the library.

For React solution using typescript this might be useful:

// https://gist.github.com/alieslamifard/364862613408a98139da3cab40abbeb9


import React, { InputHTMLAttributes, useEffect, useRef } from 'react';


// Persian/Arabic To English Digit
const f2e = (event) => {
event.target.value = event.target.value
.replace(/[٠-٩]/g, (d) => '٠١٢٣٤٥٦٧٨٩'.indexOf(d))
.replace(/[۰-۹]/g, (d) => '۰۱۲۳۴۵۶۷۸۹'.indexOf(d));


return event;
};


const useForwardedRef = (ref) => {
const innerRef = useRef(null);


useEffect(() => {
if (!ref) return;
if (typeof ref === 'function') {
ref(innerRef.current);
} else {
ref.current = innerRef.current;
}
}, [ref]);


return innerRef;
};


const Input = React.forwardRef<HTMLInputElement, InputHTMLAttributes<HTMLInputElement>>(
(props, ref) => {
const innerRef = useForwardedRef(ref);


useEffect(() => {
innerRef.current?.addEventListener('keyup', f2e);


return () => {
innerRef.current?.removeEventListener('keyup', f2e);
};
}, [innerRef]);


return <input {...props} ref={innerRef} />;
},
);


export default Input;

Simply use Input instead of native input in your form :)

const convertToPersianDigits = (number) => number.toLocaleString('fa-IR')


convertToPersianDigits(100000)    //۱۰۰٬۰۰۰

If you have your number string (a string representing a number) at hand, here is a function called paserNumber that converts that into an actual JS Number object:

function parseNumber(numberText: string) {
return Number(
// Convert Persian (and Arabic) digits to Latin digits
normalizeDigits(numberText)
// Convert Persian/Arabic decimal separator to English decimal separator (dot)
.replace(/٫/g, ".")
// Remove other characters such as thousands separators
.replace(/[^\d.]/g, "")
);
}


const persianDigitsRegex = [/۰/g, /۱/g, /۲/g, /۳/g, /۴/g, /۵/g, /۶/g, /۷/g, /۸/g, /۹/g];
const arabicDigitsRegex = [/٠/g, /١/g, /٢/g, /٣/g, /٤/g, /٥/g, /٦/g, /٧/g, /٨/g, /٩/g];


function normalizeDigits(text: string) {
for (let i = 0; i < 10; i++) {
text = text
.replace(persianDigitsRegex[i], i.toString())
.replace(arabicDigitsRegex[i], i.toString());
}
return text;
}

Note that the parse function is quite forgiving and the number string can be a combination of Persian/Arabic/Latin numerals and separators.

After getting a Number you can format it however you want with Number.toLocaleString function:

let numberString = "۱۲۳۴.5678";
let number = parseNumber(numberString);
val formatted1 = number.toLocaleString("fa"); // OR "fa-IR" for IRAN
val formatted2 = number.toLocaleString("en"); // OR "en-US" for USA
val formatted3 = number.toLocaleString("ar-EG"); // OR "ar" which uses western numerals

For more information about formatting numbers, refer to this answer.

The most High Performance (Fast & Accurate) function that can support both Persian/Arabic digits (Unicode numeral characters) is this:

function toEnDigit(s) {
return s.replace(/[\u0660-\u0669\u06f0-\u06f9]/g,    // Detect all Persian/Arabic Digit in range of their Unicode with a global RegEx character set
function(a) { return a.charCodeAt(0) & 0xf }     // Remove the Unicode base(2) range that not match
)
}


sample='English: 0123456789 - Persian: ۰۱۲۳۴۵۶۷۸۹ - Arabic: ٠١٢٣٤٥٦٧٨٩';
// English: 0123456789 - Persian: 0123456789 - Arabic: 0123456789


console.log( toEnDigit(sample) );

How it work

First by using replace() + RegEx Character Set in range of Arabic Digit Unicode U+0660 - U+0669 = ٠ ... ۹ & Persian Digit Unicode U+06F0 - U+06F9 = ۰ ... ۹ it will detect any character of the string that match it.

Then because Basic Latin Digits (ASCII) have same ends in Unicode U+0030 - U+00300=0-00, So if we remove the difference of them in base, the end can be same.
For that we can use Bitwise AND (&) operation between their Char-code by using charCodeAt() to just the same part stay.

Explain:

// x86 (Base 10) --> Binary (Base 2)


'٤'.charCodeAt(0);   // 1636 (Base 10)
'۴'.charCodeAt(0);   // 1780 (Base 10)


(1636).toString(2);  // 0000000000000000000001100110 0100 (Base 2)
(1780).toString(2);  // 0000000000000000000001101111 0100 (Base 2)
(4).toString(2);     // 0000000000000000000000000000 0100 (Base 2)


// We need a         // 0000000000000000000000000000 1111 (Base 2)
// To And it, for keeping just the 1's
// 0xf = 15
(15).toString(2);    // 0000000000000000000000000000 1111 (Base 2)


// So
(
1780                 // 0000000000000000000001101111 0100 (Base 2)
&                    // AND (Operation)
15                   // 0000000000000000000000000000 1111 (Base 2)
)
==
4                    // 0000000000000000000000000000 0100 (Base 2)
// ---> true


// Also              (1636 & 15) == 4    <--- true
Minified version (All Browsers):
function toEnDigit(s){return s.replace(/[\u0660-\u0669\u06f0-\u06f9]/g,function(a){return a.charCodeAt(0)&15})}
OneLiner (Modern Browsers)
const toEnDigit=s=>s.replace(/[٠-٩۰-۹]/g,a=>a.charCodeAt(0)&15);

Based on MMMahdy-PAPION method, a short one-line to convert both Persian and Arabic numbers to English numbers and keep all other characters unchanged is the following:

const toEnDigit=n=>n.replace(/[٠-٩۰-۹]/g,n=>15&n.charCodeAt(0));

const toEnDigit=n=>n.replace(/[٠-٩۰-۹]/g,n=>15&n.charCodeAt(0));




sample='English: 0123456789 - Persian (فارسی): ۰۱۲۳۴۵۶۷۸۹ - Arabic (عربي): ٠١٢٣٤٥٦٧٨٩';
// English: 0123456789 - Persian: 0123456789 - Arabic: 0123456789


console.log(toEnDigit(sample) );