-- first, create a sandbox to play inCREATE DATABASE `sandbox`;use `sandbox`;
-- next, make sure that your client connection is of the same-- character/collate type as the one we're going to test next:charset utf8 collate utf8_general_ci
-- now, create the table and fill it with valuesCREATE TABLE `test` (`key` VARCHAR(16), `value` VARCHAR(16) )CHARACTER SET utf8 COLLATE utf8_general_ci;
INSERT INTO `test` VALUES ('Key ONE', 'value'), ('Key TWO', 'valúe');
-- (verify)SELECT * FROM `test`;
-- now, expose the problem/bug:SELECT * FROM test WHERE `value` = 'value';
---- Note that we get BOTH keys here! MySQLs UTF8 collates that are-- case insensitive (ending with _ci) do not distinguish between-- both values!---- collate 'utf8_bin' doesn't have this problem, as I'll show next:--
-- first, reset the client connection charset/collate typecharset utf8 collate utf8_bin
-- next, convert the values that we've previously inserted in the tableALTER TABLE `test` CONVERT TO CHARACTER SET utf8 COLLATE utf8_bin;
-- now, re-check for the bugSELECT * FROM test WHERE `value` = 'value';
---- Note that we get just one key now, as you'd expect.---- This problem appears to be specific to utf8. Next, I'll try to-- do the same with the 'latin1' charset:--
-- first, reset the client connection charset/collate typecharset latin1 collate latin1_general_ci
-- next, convert the values that we've previously inserted-- in the tableALTER TABLE `test` CONVERT TO CHARACTER SET latin1 COLLATE latin1_general_ci;
-- now, re-check for the bugSELECT * FROM test WHERE `value` = 'value';
---- Again, only one key is returned (expected). This shows-- that the problem with utf8/utf8_generic_ci isn't present-- in latin1/latin1_general_ci---- To complete the example, I'll check with the binary collate-- of latin1 as well:
-- first, reset the client connection charset/collate typecharset latin1 collate latin1_bin
-- next, convert the values that we've previously inserted in the tableALTER TABLE `test` CONVERT TO CHARACTER SET latin1 COLLATE latin1_bin;
-- now, re-check for the bugSELECT * FROM test WHERE `value` = 'value';
---- Again, only one key is returned (expected).---- Finally, I'll re-introduce the problem in the exact same-- way (for any sceptics out there):
-- first, reset the client connection charset/collate typecharset utf8 collate utf8_generic_ci
-- next, convert the values that we've previously inserted in the tableALTER TABLE `test` CONVERT TO CHARACTER SET utf8 COLLATE utf8_general_ci;
-- now, re-check for the problem/bugSELECT * FROM test WHERE `value` = 'value';
---- Two keys.--
DROP DATABASE sandbox;
CREATE TABLE Contacts (id INT AUTO_INCREMENT NOT NULL,ownerId INT DEFAULT NULL,created timestamp NOT NULL DEFAULT '0000-00-00 00:00:00',modified timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,contact VARCHAR(640) NOT NULL,prefix VARCHAR(128) NOT NULL,first VARCHAR(128) NOT NULL,middle VARCHAR(128) NOT NULL,last VARCHAR(128) NOT NULL,suffix VARCHAR(128) NOT NULL,notes MEDIUMTEXT NOT NULL,INDEX IDX_CA367725E05EFD25 (ownerId),INDEX created (created),INDEX modified_idx (modified),INDEX contact_idx (contact),PRIMARY KEY(id)) DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci ENGINE = InnoDB ROW_FORMAT=DYNAMIC;