When empty text is passed, the following functions are returning 1 instead of 0: word_count, syllable_count and sentence_count.
This is causing some errors when statistics are being calculated for empty texts.
/**
* Returns word count for text.
* @param strText Text to be measured
*/
public function word_count($strText) {
if(strlen(trim($strText)) == 0){
return 0;
}
$strText = $this->clean_text($strText);
// Will be tripped by by em dashes with spaces either side, among other similar characters
$intWords = 1 + $this->text_length(preg_replace('/[^ ]/', '', $strText)); // Space count + 1 is word count
return $intWords;
}
/**
* Returns the number of syllables in the word.
* Based in part on Greg Fast's Perl module Lingua::EN::Syllables
* @param strWord Word to be measured
*/
public function syllable_count($strWord) {
if(strlen(trim($strWord)) == 0){
return 0;
}
// Should be no non-alpha characters
$strWord = preg_replace('/[^A_Za-z]/' , '', $strWord);
$intSyllableCount = 0;
$strWord = $this->lower_case($strWord);
// Specific common exceptions that don't follow the rule set below are handled individually
// Array of problem words (with word as key, syllable count as value)
$arrProblemWords = Array(
'simile' => 3
,'forever' => 3
,'shoreline' => 2
);
if (isset($arrProblemWords[$strWord])) {
return $arrProblemWords[$strWord];
}
// These syllables would be counted as two but should be one
$arrSubSyllables = Array(
'cial'
,'tia'
,'cius'
,'cious'
,'giu'
,'ion'
,'iou'
,'sia$'
,'[^aeiuoyt]{2,}ed$'
,'.ely$'
,'[cg]h?e[rsd]?$'
,'rved?$'
,'[aeiouy][dt]es?$'
,'[aeiouy][^aeiouydt]e[rsd]?$'
//,'^[dr]e[aeiou][^aeiou]+$' // Sorts out deal, deign etc
,'[aeiouy]rse$' // Purse, hearse
);
// These syllables would be counted as one but should be two
$arrAddSyllables = Array(
'ia'
,'riet'
,'dien'
,'iu'
,'io'
,'ii'
,'[aeiouym]bl$'
,'[aeiou]{3}'
,'^mc'
,'ism$'
,'([^aeiouy])\1l$'
,'[^l]lien'
,'^coa[dglx].'
,'[^gq]ua[^auieo]'
,'dnt$'
,'uity$'
,'ie(r|st)$'
);
// Single syllable prefixes and suffixes
$arrPrefixSuffix = Array(
'/^un/'
,'/^fore/'
,'/ly$/'
,'/less$/'
,'/ful$/'
,'/ers?$/'
,'/ings?$/'
);
// Remove prefixes and suffixes and count how many were taken
$strWord = preg_replace($arrPrefixSuffix, '', $strWord, -1, $intPrefixSuffixCount);
// Removed non-word characters from word
$strWord = preg_replace('/[^a-z]/is', '', $strWord);
$arrWordParts = preg_split('/[^aeiouy]+/', $strWord);
$intWordPartCount = 0;
foreach ($arrWordParts as $strWordPart) {
if ($strWordPart <> '') {
$intWordPartCount++;
}
}
// Some syllables do not follow normal rules - check for them
// Thanks to Joe Kovar for correcting a bug in the following lines
$intSyllableCount = $intWordPartCount + $intPrefixSuffixCount;
foreach ($arrSubSyllables as $strSyllable) {
$intSyllableCount -= preg_match('/' . $strSyllable . '/', $strWord);
}
foreach ($arrAddSyllables as $strSyllable) {
$intSyllableCount += preg_match('/' . $strSyllable . '/', $strWord);
}
$intSyllableCount = ($intSyllableCount == 0) ? 1 : $intSyllableCount;
return $intSyllableCount;
}
/**
* Returns sentence count for text.
* @param strText Text to be measured
*/
public function sentence_count($strText) {
if(strlen(trim($strText)) == 0){
return 0;
}
$strText = $this->clean_text($strText);
// Will be tripped up by "Mr." or "U.K.". Not a major concern at this point.
$intSentences = max(1, $this->text_length(preg_replace('/[^\.!?]/', '', $strText)));
return $intSentences;
}