PHP 短散列像网址缩短网站

我正在寻找一个 PHP 函数,创建一个字符串或文件的短散列,类似于那些网址缩短的网站,如 Tinyurl.com

散列不应长于8个字符。

103316 次浏览

URL shortening services rather use a auto incremented integer value (like a supplementary database ID) and encode that with Base64 or other encodings to have more information per character (64 instead of just 10 like digits).

Shortest hash is 32 character length, how ever you can use first 8 characters of md5 hash

echo substr(md5('http://www.google.com'), 0, 8);

Update: here is another class found here written by Travell Perkins which takes record number and create short hash for it. 14 digits number produce 8 digit string. By the date you reach this number you become more popular than tinyurl ;)

class BaseIntEncoder {


//const $codeset = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
//readable character set excluded (0,O,1,l)
const codeset = "23456789abcdefghijkmnopqrstuvwxyzABCDEFGHIJKLMNPQRSTUVWXYZ";


static function encode($n){
$base = strlen(self::codeset);
$converted = '';


while ($n > 0) {
$converted = substr(self::codeset, bcmod($n,$base), 1) . $converted;
$n = self::bcFloor(bcdiv($n, $base));
}


return $converted ;
}


static function decode($code){
$base = strlen(self::codeset);
$c = '0';
for ($i = strlen($code); $i; $i--) {
$c = bcadd($c,bcmul(strpos(self::codeset, substr($code, (-1 * ( $i - strlen($code) )),1))
,bcpow($base,$i-1)));
}


return bcmul($c, 1, 0);
}


static private function bcFloor($x)
{
return bcmul($x, '1', 0);
}


static private function bcCeil($x)
{
$floor = bcFloor($x);
return bcadd($floor, ceil(bcsub($x, $floor)));
}


static private function bcRound($x)
{
$floor = bcFloor($x);
return bcadd($floor, round(bcsub($x, $floor)));
}
}

here is example how to use it:

BaseIntEncoder::encode('1122344523');//result:3IcjVE
BaseIntEncoder::decode('3IcjVE');//result:1122344523

TinyURL doesn't hash anything, it uses Base 36 integers (or even base 62, using lower and uppercase letters) to indicate which record to visit.

Base 36 to Integer:

intval($str, 36);

Integer to Base 36:

base_convert($val, 10, 36);

So then, instead of redirecting to a route like /url/1234 it becomes /url/ax instead. This gives you a whole lot more use than a hash will, as there will be no collisions. With this you can easily check if a url exists and return the proper, existing, ID in base 36 without the user knowing that it was already in the database.

Don't hash, use other bases for this kind of thing. (It's faster and can be made collision-proof.)

I wrote a tiny lib to generate obfuscated hashes from integers.

http://web.archive.org/web/20130727034425/http://blog.kevburnsjr.com/php-unique-hash

$ids = range(1,10);
foreach($ids as $id) {
echo PseudoCrypt::unhash($id) . "\n";
}
m8z2p
8hy5e
uqx83
gzwas
38vdh
phug6
bqtiv
xzslk
k8ro9
6hqqy

7/14/2015: Adding the actual code below, since it has become difficult to find:

<?php
/**
* PseudoCrypt by KevBurns (http://blog.kevburnsjr.com/php-unique-hash)
* Reference/source: http://stackoverflow.com/a/1464155/933782
*
* I want a short alphanumeric hash that’s unique and who’s sequence is difficult to deduce.
* I could run it out to md5 and trim the first n chars but that’s not going to be very unique.
* Storing a truncated checksum in a unique field means that the frequency of collisions will increase
* geometrically as the number of unique keys for a base 62 encoded integer approaches 62^n.
* I’d rather do it right than code myself a timebomb. So I came up with this.
*
* Sample Code:
*
* echo "<pre>";
* foreach(range(1, 10) as $n) {
*     echo $n." - ";
*     $hash = PseudoCrypt::hash($n, 6);
*     echo $hash." - ";
*     echo PseudoCrypt::unhash($hash)."<br/>";
* }
*
* Sample Results:
* 1 - cJinsP - 1
* 2 - EdRbko - 2
* 3 - qxAPdD - 3
* 4 - TGtDVc - 4
* 5 - 5ac1O1 - 5
* 6 - huKpGQ - 6
* 7 - KE3d8p - 7
* 8 - wXmR1E - 8
* 9 - YrVEtd - 9
* 10 - BBE2m2 - 10
*/


class PseudoCrypt {


/* Key: Next prime greater than 62 ^ n / 1.618033988749894848 */
/* Value: modular multiplicative inverse */
private static $golden_primes = array(
'1'                  => '1',
'41'                 => '59',
'2377'               => '1677',
'147299'             => '187507',
'9132313'            => '5952585',
'566201239'          => '643566407',
'35104476161'        => '22071637057',
'2176477521929'      => '294289236153',
'134941606358731'    => '88879354792675',
'8366379594239857'   => '7275288500431249',
'518715534842869223' => '280042546585394647'
);


/* Ascii :                    0  9,         A  Z,         a  z     */
/* $chars = array_merge(range(48,57), range(65,90), range(97,122)) */
private static $chars62 = array(
0=>48,1=>49,2=>50,3=>51,4=>52,5=>53,6=>54,7=>55,8=>56,9=>57,10=>65,
11=>66,12=>67,13=>68,14=>69,15=>70,16=>71,17=>72,18=>73,19=>74,20=>75,
21=>76,22=>77,23=>78,24=>79,25=>80,26=>81,27=>82,28=>83,29=>84,30=>85,
31=>86,32=>87,33=>88,34=>89,35=>90,36=>97,37=>98,38=>99,39=>100,40=>101,
41=>102,42=>103,43=>104,44=>105,45=>106,46=>107,47=>108,48=>109,49=>110,
50=>111,51=>112,52=>113,53=>114,54=>115,55=>116,56=>117,57=>118,58=>119,
59=>120,60=>121,61=>122
);


public static function base62($int) {
$key = "";
while(bccomp($int, 0) > 0) {
$mod = bcmod($int, 62);
$key .= chr(self::$chars62[$mod]);
$int = bcdiv($int, 62);
}
return strrev($key);
}


public static function hash($num, $len = 5) {
$ceil = bcpow(62, $len);
$primes = array_keys(self::$golden_primes);
$prime = $primes[$len];
$dec = bcmod(bcmul($num, $prime), $ceil);
$hash = self::base62($dec);
return str_pad($hash, $len, "0", STR_PAD_LEFT);
}


public static function unbase62($key) {
$int = 0;
foreach(str_split(strrev($key)) as $i => $char) {
$dec = array_search(ord($char), self::$chars62);
$int = bcadd(bcmul($dec, bcpow(62, $i)), $int);
}
return $int;
}


public static function unhash($hash) {
$len = strlen($hash);
$ceil = bcpow(62, $len);
$mmiprimes = array_values(self::$golden_primes);
$mmi = $mmiprimes[$len];
$num = self::unbase62($hash);
$dec = bcmod(bcmul($num, $mmi), $ceil);
return $dec;
}


}

Actually the best solution to have "random" hash is to generate a list of random hash, put it on Mysql with an unique INDEX (you can write a simple UDF to insert 100 000 rows in 1 seconde).

I think a structure like this ID|HASH|STATUS|URL|VIEWS|......

Where status indicates if this Hash is free or not.

Best Answer Yet: Smallest Unique "Hash Like" String Given Unique Database ID - PHP Solution, No Third Party Libraries Required.

Here's the code:

<?php
/*
THE FOLLOWING CODE WILL PRINT:
A database_id value of 200 maps to 5K
A database_id value of 1 maps to 1
A database_id value of 1987645 maps to 16LOD
*/
$database_id = 200;
$base36value = dec2string($database_id, 36);
echo "A database_id value of $database_id maps to $base36value\n";
$database_id = 1;
$base36value = dec2string($database_id, 36);
echo "A database_id value of $database_id maps to $base36value\n";
$database_id = 1987645;
$base36value = dec2string($database_id, 36);
echo "A database_id value of $database_id maps to $base36value\n";


// HERE'S THE FUNCTION THAT DOES THE HEAVY LIFTING...
function dec2string ($decimal, $base)
// convert a decimal number into a string using $base
{
//DebugBreak();
global $error;
$string = null;


$base = (int)$base;
if ($base < 2 | $base > 36 | $base == 10) {
echo 'BASE must be in the range 2-9 or 11-36';
exit;
} // if


// maximum character string is 36 characters
$charset = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ';


// strip off excess characters (anything beyond $base)
$charset = substr($charset, 0, $base);


if (!ereg('(^[0-9]{1,50}$)', trim($decimal))) {
$error['dec_input'] = 'Value must be a positive integer with < 50 digits';
return false;
} // if


do {
// get remainder after dividing by BASE
$remainder = bcmod($decimal, $base);


$char      = substr($charset, $remainder, 1);   // get CHAR from array
$string    = "$char$string";                    // prepend to output


//$decimal   = ($decimal - $remainder) / $base;
$decimal   = bcdiv(bcsub($decimal, $remainder), $base);


} while ($decimal > 0);


return $string;


}


?>

I was making a url shortner. In my case I used the "id" of database to create every time a unique short url.

What I did is, first -

Insert Data like "Original url" and "creation date" in db leaving the "short url" empty in db. Then get the "id" from there and pass in the function below.

<?php
function genUniqueCode($id){
$id = $id + 100000000000;
return base_convert($id, 10, 36);
}


//Get Unique Code using ID
/*
id Below is retrived from Database after Inserting Original URL.
*/






$data['id'] =10;
$uniqueCode = genUniqueCode($data['id']);


// Generating the URL
$protocol = strtolower(substr($_SERVER["SERVER_PROTOCOL"],0,5))=='https'?'https':'http';
echo "<a href='{$protocol}://{$_SERVER['HTTP_HOST']}/{$uniqueCode}'>{$protocol}://{$_SERVER['HTTP_HOST']}/{$uniqueCode}</a>";


?>

And then UPDATE value of Short Url Code in Database.

Here I'm using "id" to create short code. Since ID can't be same for multi entries. It's unique hence the Unique Code or Url will be unique.

For a short hash, url friendly, in the view of disallowing possible duplicate contents, we can use hash() and especially the CRC or Adler-32 type, since they are made exactly for that:

Cyclic Redundancy Check

A cyclic redundancy check (CRC) is an error-detecting code commonly used in digital networks and storage devices to detect accidental changes to raw data. Blocks of data entering these systems get a short check value attached, based on the remainder of a polynomial division of their contents. On retrieval, the calculation is repeated and, in the event the check values do not match, corrective action can be taken https://en.wikipedia.org/wiki/Cyclic_redundancy_check

Adler-32 is a checksum algorithm (...), compared to a Cyclic Redundancy Check of the same length, it trades reliability for speed (preferring the latter) https://en.wikipedia.org/wiki/Adler-32

echo hash("crc32", "Content of article...");
// Output fd3e7c6e
echo hash("adler32", "Content of article...");
// Output 55df075f

[Youtube] How do CRCs work?

I wrote some kind of an algorithm.

It's

  1. simpler to understand, tune and change
  2. uses only symbols you allow (so it can be case sensitive easily)
  3. also unique
  4. case insensitive from a box
  5. only for positive int (for IDs)
<?php


class PseudoCrypt1
{
private static $keychars = 'CZPXD5H2FIWB81KE76JY93V4ORLAMT0QSUNG'; // Dictionary of allowed unique symbols, shuffle it for yourself or remove unwanted chars (don't forget to call testParameters after changing)
private static $divider = 19; // Tune divider for yourself (don't forget to call testParameters after changing)
private static $biasDivider = 14; // Tune bias divider for yourself (don't forget to call testParameters after changing)
private static $noise = 53; // Any positive number


public static function testParameters()
{
if (strlen(static::$keychars) < static::$divider + static::$biasDivider - 1) {
throw new Exception('Check your divider and biasDivider. It must be less than keychars length');
}
}


public static function encode(int $i): string
{
if ($i < 0) {
throw new Exception('Expected positive integer');
}


$keychars = static::$keychars;
$i = $i + static::$noise; // add noise to a number
$bias = $i % static::$biasDivider;


$res = '';


while ($i > 0) {
$div = $i % static::$divider;
$i = intdiv($i, static::$divider);
$res .= $keychars[$div + $bias];
}


// Current version of an algorithm is one of these chars (if in the future you will need to identify a version)
// Remember this chars on migrating to a new algorithm/parameters
$res .= str_shuffle('LPTKEZG')[0];
$res .= $keychars[$bias]; // Encoded bias


return $res;
}


public static function decode($code)
{
$keychars = static::$keychars;
$biasC = substr($code, -1);
$bias = strpos($keychars, $biasC);
$code = substr($code, 0, -2);
$code = str_split(strrev($code));


$val = 0;


foreach ($code as $c) {
$val *= static::$divider;
$val += strpos($keychars, $c) - $bias;
}


return $val - static::$noise;
}
}

Output

36926 -> 7IWFZX
927331 -> F4WIKP2
9021324 -> AT66R7P1

You can test it with this little test (it doesn't include test for uniqueness, but algorithm is unique):

PseudoCrypt1::testParameters();


for ($i = 4000000; $i < 9500000; $i++) {
$hash = PseudoCrypt1::encode($i);
echo $i.':'.strlen($hash).':'.$hash.PHP_EOL;
if ($i != PseudoCrypt1::decode($hash)) {
echo 'FAIL:'.$i.PHP_EOL;
die();
}
}

Mmm, perhaps we can use MurmurHash ?

function hash_murmur3($string)
{
$string = array_values(unpack('C*', $string));
$klen = count($string);
$h1 = 0;
$remainder = 0;
$i = 0;


for ($bytes = $klen - ($remainder = $klen & 3); $i < $bytes;) {
$k1 = $string[$i] | ($string[++$i] << 8) | ($string[++$i] << 16) | ($string[++$i] << 24);
++$i;
$k1 = (((($k1 & 0xffff) * 0xcc9e2d51) + (((((($k1 >= 0) ? ($k1 >> 16) : (($k1 & 0x7fffffff) >> 16) | 0x8000)) * 0xcc9e2d51) & 0xffff) << 16))) & 0xffffffff;
$k1 = $k1 << 15 | (($k1 >= 0) ? ($k1 >> 17) : (($k1 & 0x7fffffff) >> 17) | 0x4000);
$k1 = (((($k1 & 0xffff) * 0x1b873593) + (((((($k1 >= 0) ? ($k1 >> 16) : (($k1 & 0x7fffffff) >> 16) | 0x8000)) * 0x1b873593) & 0xffff) << 16))) & 0xffffffff;
$h1 ^= $k1;
$h1 = $h1 << 13 | (($h1 >= 0) ? ($h1 >> 19) : (($h1 & 0x7fffffff) >> 19) | 0x1000);
$h1b = (((($h1 & 0xffff) * 5) + (((((($h1 >= 0) ? ($h1 >> 16) : (($h1 & 0x7fffffff) >> 16) | 0x8000)) * 5) & 0xffff) << 16))) & 0xffffffff;
$h1 = ((($h1b & 0xffff) + 0x6b64) + (((((($h1b >= 0) ? ($h1b >> 16) : (($h1b & 0x7fffffff) >> 16) | 0x8000)) + 0xe654) & 0xffff) << 16));
}


$k1 = 0;


switch ($remainder) {
case 3:
$k1 ^= $string[$i + 2] << 16;


case 2:
$k1 ^= $string[$i + 1] << 8;


case 1:
$k1 ^= $string[$i];
$k1 = ((($k1 & 0xffff) * 0xcc9e2d51) + (((((($k1 >= 0) ? ($k1 >> 16) : (($k1 & 0x7fffffff) >> 16) | 0x8000)) * 0xcc9e2d51) & 0xffff) << 16)) & 0xffffffff;
$k1 = $k1 << 15 | (($k1 >= 0) ? ($k1 >> 17) : (($k1 & 0x7fffffff) >> 17) | 0x4000);
$k1 = ((($k1 & 0xffff) * 0x1b873593) + (((((($k1 >= 0) ? ($k1 >> 16) : (($k1 & 0x7fffffff) >> 16) | 0x8000)) * 0x1b873593) & 0xffff) << 16)) & 0xffffffff;
$h1 ^= $k1;
}


$h1 ^= $klen;
$h1 ^= (($h1 >= 0) ? ($h1 >> 16) : (($h1 & 0x7fffffff) >> 16) | 0x8000);
$h1 = ((($h1 & 0xffff) * 0x85ebca6b) + (((((($h1 >= 0) ? ($h1 >> 16) : (($h1 & 0x7fffffff) >> 16) | 0x8000)) * 0x85ebca6b) & 0xffff) << 16)) & 0xffffffff;
$h1 ^= (($h1 >= 0) ? ($h1 >> 13) : (($h1 & 0x7fffffff) >> 13) | 0x40000);
$h1 = (((($h1 & 0xffff) * 0xc2b2ae35) + (((((($h1 >= 0) ? ($h1 >> 16) : (($h1 & 0x7fffffff) >> 16) | 0x8000)) * 0xc2b2ae35) & 0xffff) << 16))) & 0xffffffff;
$h1 ^= (($h1 >= 0) ? ($h1 >> 16) : (($h1 & 0x7fffffff) >> 16) | 0x8000);


return base_convert(sprintf("%u\n", $h1), 10, 32);
}

Usage example:

echo hash_murmur3('foo'); // prints '3rabh10'