function FindString($needle,$haystack,$i,$word){ // $i should be "" or "i" for case insensitiveif (strtoupper($word)=="W"){ // if $word is "W" then word search instead of string in string search.if (preg_match("/\b{$needle}\b/{$i}", $haystack)){return true;}}else{if(preg_match("/{$needle}/{$i}", $haystack)){return true;}}return false;// Put quotes around true and false above to return them as strings instead of as bools/ints.}
“咖啡”怎么样?我们如何在正则表达式中提取“咖啡”这个词?实际上,\bcafé\b不起作用。为什么?因为"café"包含非ASCII字符:é.\b不能简单地与Unicode一起使用,例如समुद्र, 감사, месяц 😉
当您想提取Unicode字符时,应直接定义表示单词边界的字符。
答案:(?<=[\s,.:;"']|^)UNICODE_WORD(?=[\s,.:;"']|$)
所以为了在PHP中使用答案,你可以使用这个函数:
function contains($str, array $arr) {// Works in Hebrew and any other unicode characters// Thanks https://medium.com/@shiba1014/regex-word-boundaries-with-unicode-207794f6e7ed// Thanks https://www.phpliveregex.com/if (preg_match('/(?<=[\s,.:;"\']|^)' . $word . '(?=[\s,.:;"\']|$)/', $str)) return true;}
如果你想搜索单词数组,你可以使用这个:
function arrayContainsWord($str, array $arr){foreach ($arr as $word) {// Works in Hebrew and any other unicode characters// Thanks https://medium.com/@shiba1014/regex-word-boundaries-with-unicode-207794f6e7ed// Thanks https://www.phpliveregex.com/if (preg_match('/(?<=[\s,.:;"\']|^)' . $word . '(?=[\s,.:;"\']|$)/', $str)) return true;}return false;}
<?php$mystring = 'abc';$findme = 'a';$pos = strpos($mystring, $findme);
// Note our use of ===. Simply, == would not work as expected// because the position of 'a' was the 0th (first) character.if ($pos === false) {echo "The string '$findme' was not found in the string '$mystring'.";}else {echo "The string '$findme' was found in the string '$mystring',";echo " and exists at position $pos.";}?>
function get_corpus_index($corpus = array(), $separator=' ') {
$dictionary = array();
$doc_count = array();
foreach($corpus as $doc_id => $doc) {
$terms = explode($separator, $doc);
$doc_count[$doc_id] = count($terms);
// tf–idf, short for term frequency–inverse document frequency,// according to wikipedia is a numerical statistic that is intended to reflect// how important a word is to a document in a corpus
foreach($terms as $term) {
if(!isset($dictionary[$term])) {
$dictionary[$term] = array('document_frequency' => 0, 'postings' => array());}if(!isset($dictionary[$term]['postings'][$doc_id])) {
$dictionary[$term]['document_frequency']++;
$dictionary[$term]['postings'][$doc_id] = array('term_frequency' => 0);}
$dictionary[$term]['postings'][$doc_id]['term_frequency']++;}
//from http://phpir.com/simple-search-the-vector-space-model/
}
return array('doc_count' => $doc_count, 'dictionary' => $dictionary);}
function get_similar_documents($query='', $corpus=array(), $separator=' '){
$similar_documents=array();
if($query!=''&&!empty($corpus)){
$words=explode($separator,$query);
$corpus=get_corpus_index($corpus, $separator);
$doc_count=count($corpus['doc_count']);
foreach($words as $word) {
if(isset($corpus['dictionary'][$word])){
$entry = $corpus['dictionary'][$word];
foreach($entry['postings'] as $doc_id => $posting) {
//get term frequency–inverse document frequency$score=$posting['term_frequency'] * log($doc_count + 1 / $entry['document_frequency'] + 1, 2);
if(isset($similar_documents[$doc_id])){
$similar_documents[$doc_id]+=$score;
}else{
$similar_documents[$doc_id]=$score;
}}}}
// length normaliseforeach($similar_documents as $doc_id => $score) {
$similar_documents[$doc_id] = $score/$corpus['doc_count'][$doc_id];
}
// sort from high to low
arsort($similar_documents);
}
return $similar_documents;}
$query = 'are';
$corpus = array(1 => 'how are you today?',2 => 'how do you do',3 => 'here you are! how are you? Are we done yet?');
$match_results=get_similar_documents($query,$corpus);echo '<pre>';print_r($match_results);echo '</pre>';
$query = 'we are done';
$corpus = array(1 => 'how are you today?',2 => 'how do you do',3 => 'here you are! how are you? Are we done yet?');
$match_results=get_similar_documents($query,$corpus);echo '<pre>';print_r($match_results);echo '</pre>';
<?php$grass = "This is pratik joshi";$needle = "pratik";if (stripos($grass,$needle) !== false) {
/*If i EXCLUDE : !== false then if string is found at 0th location,still it will say STRING NOT FOUND as it will return '0' and itwill goto else and will say NOT Found though it is found at 0th location.*/echo 'Contains word';}else{echo "does NOT contain word";}?>
<?php
function contains_word($str, $word) {// split string into words// separators are substrings of at least one non-word character$arr = preg_split('/\W+/', $str, NULL, PREG_SPLIT_NO_EMPTY);
// now the words can be examined eachforeach ($arr as $value) {if ($value === $word) {return true;}}return false;}
function test($str, $word) {if (contains_word($str, $word)) {echo "string '" . $str . "' contains word '" . $word . "'\n";} else {echo "string '" . $str . "' does not contain word '" . $word . "'\n" ;}}
$a = 'How are you?';
test($a, 'are');test($a, 'ar');test($a, 'hare');
?>
跑步给
$ php -f test.phpstring 'How are you?' contains word 'are'string 'How are you?' does not contain word 'ar'string 'How are you?' does not contain word 'hare'
$badWords = array("dette", "capitale", "rembourser", "ivoire", "mandat");
$string = "a string with the word ivoire";
$matchFound = preg_match_all("/\b(" . implode($badWords,"|") . ")\b/i", $string, $matches);
if ($matchFound) {echo "a bad word has been found";}else {echo "your string is okay";}
$text = 'This is a test';echo substr_count($text, 'is'); // 2
// So if you want to check if is exists in the text just put// in a condition like this:if (substr_count($text, 'is') > 0) {echo "is exists";}