diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/librcd.c | 28 | ||||
-rw-r--r-- | src/librcd.h | 1 |
2 files changed, 23 insertions, 6 deletions
diff --git a/src/librcd.c b/src/librcd.c index 36986cc..b81d27f 100644 --- a/src/librcd.c +++ b/src/librcd.c @@ -259,29 +259,36 @@ with latin languages there is in every word besides umlauts should exist at least one standard latin character with code < 127. */ static int check_latin(const unsigned char *buf, int len) { long i; - int word = 0; + int cyr = 0; int latin = 0; for (i=0;i<len;i++) { if (buf[i]<128) { if (((buf[i]>='a')&&(buf[i]<='z'))||((buf[i]>='A')&&(buf[i]<='Z'))) { - // Latin character inside a word, so it isn't cyrillic word + // Latin character inside a word, so it probably isn't cyrillic word latin++; } else { // Treating as a word separator. - if (word > 0) { + if (cyr > 0) { if (!latin) return 0; - if ((word/latin)>4) return 0; + if (cyr>latin) return 0; } - word = 0; + cyr = 0; latin = 0; } } else { // Could be cyrillic word - if (word>=0) word++; + cyr++; } } + + if (cyr > 0) { + if (!latin) return 0; + if (cyr>latin) return 0; + } +// printf("C%u:L%u\n",cyr,latin); + return 1; } @@ -297,6 +304,15 @@ rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { return is_win_charset2(buf,l); } +/* +rcd_russian_charset rcdGetRussianCharset(const char *buf,int len) { + int res; + res = rcdGetRussianCharset1(buf, len); + printf("%u: %s\n", res, (buf&&!len)?buf:"null"); + return res; +} +*/ + /* Compatibility */ rcd_russian_charset get_russian_charset(const char *buf,int len) { return rcdGetRussianCharset(buf, len); diff --git a/src/librcd.h b/src/librcd.h index 6fc3281..918d8c0 100644 --- a/src/librcd.h +++ b/src/librcd.h @@ -29,6 +29,7 @@ rcdGetRussianCharset 1 - KOI8-R 2 - UTF8 3 - CP866 + 4 - ISO8859-1 */ rcd_russian_charset rcdGetRussianCharset(const char *buf, int len); |