|
Development -
PHP
|
|
Written by Rick
|
|
Wednesday, 10 December 2008 10:49 |
|
I found this at Bit Repository. Decided to add it here for posterity. Function:
function calculate_word_popularity($string, $min_word_char = 1, $exclude_words = array()) { $string = strip_tags($string);
$initial_words_array = str_word_count($string, 1); $total_words = sizeof($initial_words_array);
$new_string = $string;
foreach($exclude_words as $filter_word) { $new_string = preg_replace("/\b".$filter_word."\b/i", "", $new_string); // strip excluded words }
$words_array = str_word_count($new_string, 1);
$words_array = array_filter($words_array, create_function('$var', 'return (strlen($var) >= '.$min_word_char.');'));
$popularity = array();
$unique_words_array = array_unique($words_array);
foreach($unique_words_array as $key => $word) { preg_match_all('/\b'.$word.'\b/i', $string, $out);
$count = count($out[0]);
$percent = number_format((($count * 100) / $total_words), 2);
$popularity[$key]['word'] = $word; $popularity[$key]['count'] = $count; $popularity[$key]['percent'] = $percent.'%'; }
function cmp($a, $b) { return ($a['count'] > $b['count']) ? +1 : -1; }
usort($popularity, "cmp");
return $popularity; } ?>
|
Implementation: $text = "The PHP development team would like to announce the immediate availability of PHP 4.4.9. It continues to improve the security and the stability of the 4.4 branch and all users are strongly encouraged to upgrade to it as soon as possible. This release wraps up all the outstanding patches for the PHP 4.4 series, and is therefore the last PHP 4.4 release.";
$exclude_words = array('would','the','and','all','for','are');
$popularity = calculate_word_popularity($text, 3, $exclude_words);
krsort($popularity); // sort array (from higher to lower)
$key = 1;
echo 'Total words in the text: '.str_word_count($text).'
';
echo 'Word / Popularity / Count
';
foreach($popularity as $value) { echo $key.".".$value['word'].' - '.$value['percent'].' ('.$value["count"].')'. " \n"; $key++; } ?> |
|