Группа :: Редакторы
Пакет: emacs22
Главная Изменения Спек Патчи Загрузить Bugs and FR
Патч: emacs-22.0.50-alt0.4-more-cyrillic-support.patch
diff -Naur emacs/etc/HELLO emacs.build/etc/HELLO
--- emacs/etc/HELLO 2004-04-03 10:41:21 +0600
+++ emacs.build/etc/HELLO 2005-11-03 00:08:09 +0500
@@ -3,6 +3,7 @@
---------------------------------------------------------
Amharic ($(3"c!<!N"^(B) $(3!A!,!>(B
Arabic (38R(47d(3T!JSa(4W(3W(B
+Belarusian (,L1U[P`caZPo(B ,L\^RP(B) ,L4WU]l(B ,LT^Q`k(B!
Braille $,2(3(1('('(5(B
C printf ("Hello, world!\n");
Czech (,Bh(Be,B9(Btina) Dobr,B}(B den
diff -Naur emacs/lisp/faces.el emacs.build/lisp/faces.el
--- emacs/lisp/faces.el 2005-11-01 16:39:33 +0500
+++ emacs.build/lisp/faces.el 2005-11-03 00:08:09 +0500
@@ -88,7 +88,8 @@
'(("gb2312.1980" "gb2312.80&gb8565.88" "gbk*")
("jisx0208.1990" "jisx0208.1983" "jisx0208.1978")
("ksc5601.1989" "ksx1001.1992" "ksc5601.1987")
- ("muletibetan-2" "muletibetan-0")))
+ ("muletibetan-2" "muletibetan-0")
+ ("iso8859-5" "microsoft-cp1251" "koi8-u" "koi8")))
"*Alist of alternative font registry names.
Each element has the form (REGISTRY ALTERNATIVE1 ALTERNATIVE2 ...).
If fonts of registry REGISTRY can be loaded, font selection
diff -Naur emacs/lisp/international/cyrillic-codepages-setup.el emacs.build/lisp/international/cyrillic-codepages-setup.el
--- emacs/lisp/international/cyrillic-codepages-setup.el 1970-01-01 05:00:00 +0500
+++ emacs.build/lisp/international/cyrillic-codepages-setup.el 2005-11-03 00:08:09 +0500
@@ -0,0 +1,102 @@
+; Setup cyrillic codepages and aliases for their coding-systems.
+;
+; This feature is for setting the corresponding Cyrillic language environments
+; seamlessly (without writing any special commands before).
+;
+;
+; Based on some my ideas and the postings in sisyphus@altlinux.ru mailing list,
+; particularly from Alexander Bokovoy and Serhii Hlodin.
+; Added MIME charset names according to http://www.iana.org/assignments/character-sets.
+;
+; February 2002
+; Ivan Zakharyaschev <imz@altlinux.ru>
+;
+; Copyright (C) ALT Linux Team 2002
+; (This file is not a part of the original GNU Emacs.)
+;
+; This code is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 2, or (at your option)
+; any later version.
+
+;;; Commentary:
+
+;;; Code:
+
+(codepage-setup 1251)
+(coding-system-put 'cp1251 'mime-charset 'windows-1251)
+(define-coding-system-alias 'windows-1251 'cp1251)
+(define-coding-system-alias 'cyrillic-cp1251 'cp1251)
+
+(codepage-setup 1125)
+(define-coding-system-alias 'cp866u 'cp1125)
+(define-coding-system-alias 'cyrillic-cp1125 'cp1125)
+
+(codepage-setup 866)
+(coding-system-put 'cp866 'mime-charset 'cp866)
+(define-coding-system-alias 'ibm-866 'cp866)
+(define-coding-system-alias 'ibm866 'cp866)
+(define-coding-system-alias 'cyrillic-cp866 'cp866)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; A piece of code to correct the list of valid codes for the three codepage-based
+; coding systems: the new ``more reasonable'' values fix Emacs looping infinitely
+; on some encoded input:
+
+(defun cp-detect-valid-codes (codepage)
+ "List valid codes for cpCODEPAGE coding-sytem in a format suitable
+for coding-system's `valid-codes' property. The result always has the
+range (0 . 127) as the first element, other elements are extracted from the
+corresponding codepage decode table and sorted.
+
+This function was not present in the original GNU Emacs, rather it has been
+added by ALT mainly to set reasonable `valid-codes' property of cp1251
+ coding-system (otherwise Emacs sometimes looped infinitely trying to process
+some odd input in encoded-kbd mode)."
+ (let* ((decode-table (intern (format "%s-decode-table" codepage))))
+ (cons
+ (cons 0 127)
+ (sort (delq nil (mapcar 'identity (symbol-value decode-table))) '<))))
+
+
+(defun cp-correct-valid-codes (codepage)
+ "Try to set more reasonable values for the `valid-codes' property of
+cpCODEPAGE coding-system than the default single full range ((0. 255)).
+The ``more reasonable'' values are acquired by calling `cp-detect-valid-codes'
+on CODEPAGE (it lists the codes really present in the decode-table).
+
+This function was not present in the original GNU Emacs, rather it has been
+added by ALT mainly to set reasonable `valid-codes' property of cp1251
+coding-system (otherwise Emacs sometimes looped infinitely trying to process
+some odd input in encoded-kbd mode)."
+ (coding-system-put codepage 'valid-codes (cp-detect-valid-codes codepage)))
+
+(mapc 'cp-correct-valid-codes '(cp1251 cp866 cp1125))
+
+; And now
+;;(coding-system-get 'cp1251 'valid-codes)
+; will return a much more detailed list of valid codes.
+
+; Valid codes lists fixed.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Add information about recoding X fonts in these charset/encodings.
+
+(define-ccl-program ccl-encode-cp1251-font
+ `(0
+ ((translate-character cp1251-encode-translation-table r0 r1)))
+ "CCL program to encode Cyrillic chars to CP1251 font.")
+
+;; (setq font-ccl-encoder-alist
+;; (cons '("microsoft-cp1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+(setq font-ccl-encoder-alist
+ (cons '(".*1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+; X font data setup end.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(provide 'cyrillic-codepages-setup)
+
+; End of cyrillic-codepages-setup
diff -Naur emacs/lisp/international/fontset.el emacs.build/lisp/international/fontset.el
--- emacs/lisp/international/fontset.el 2005-10-13 11:34:35 +0600
+++ emacs.build/lisp/international/fontset.el 2005-11-03 00:20:03 +0500
@@ -28,6 +28,13 @@
;;; Code:
+; Require the setup of cp1251 support, otherwise
+; it might be too late. (imz@altlinux.ru)
+; Additional comment from Eugene Vlasov: this code break preparing to dump.
+; So we need check purify-flag
+(unless purify-flag
+ (require 'cyrillic-codepages-setup))
+
;; Set standard fontname specification of characters in the default
;; fontset to find an appropriate font for each charset. This is used
;; to generate a font name for a fontset if the fontset doesn't
@@ -58,6 +65,11 @@
(katakana-jisx0201 . (nil . "JISX0201"))
(latin-jisx0201 . (nil . "JISX0201"))
(cyrillic-iso8859-5 . (nil . "ISO8859-5"))
+; These are the other possibilities (alternatives);
+; We don't need to use them here (face-font-registry-alternatives
+; will help us to use all the available Cyrillic fonts):
+; (cyrillic-iso8859-5 . (nil . "*-*1251"))
+; (cyrillic-iso8859-5 . (nil . "KOI8"))
(latin-iso8859-9 . (nil . "ISO8859-9"))
(japanese-jisx0208-1978 . (nil . "JISX0208.1978"))
(chinese-gb2312 . (nil . "GB2312.1980"))
@@ -248,6 +260,7 @@
("iso8859-15" ascii latin-iso8859-15)
("tis620" ascii thai-tis620)
("koi8" ascii cyrillic-iso8859-5)
+ (".*1251" ascii cyrillic-iso8859-5)
("viscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
("vscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
("mulelao-1" ascii lao)
diff -Naur emacs/lisp/international/mule-cmds.el emacs.build/lisp/international/mule-cmds.el
--- emacs/lisp/international/mule-cmds.el 2005-10-06 21:52:12 +0600
+++ emacs.build/lisp/international/mule-cmds.el 2005-11-03 00:08:09 +0500
@@ -2108,7 +2108,8 @@
; ay Aymara
("az" . "UTF-8") ; Azerbaijani
; ba Bashkir
- ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
+ ("be.*[_.]cp1251" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
+ ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
("bg" "Bulgarian" cp1251) ; Bulgarian
; bh Bihari
; bi Bislama
@@ -2207,7 +2208,9 @@
("rm" . "Latin-1") ; Rhaeto-Romanic
; rn Kirundi
("ro" "Romanian" iso-8859-2)
+ ("ru.*[_.]cp1251" . "Cyrillic-CP1251") ; Russian
("ru_RU" "Russian" iso-8859-5)
+ ("ru[_.]ua[_.]cp1251" . "Ukrainian-CP1251") ; Russian in Ukraine
("ru_UA" "Russian" koi8-u)
; rw Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
@@ -2246,6 +2249,8 @@
("tt" . "UTF-8") ; Tatar
; tw Twi
; ug Uighur
+ ("uk.*[_.]cp1251" . "Ukrainian-CP1251") ; Ukrainian
+ ("uk.*[_.]cp1125" . "Ukrainian-CP1125") ; Ukrainian
("uk" "Ukrainian" koi8-u)
("ur" . "UTF-8") ; Urdu
("uz_UZ@cyrillic" . "UTF-8"); Uzbek
@@ -2302,6 +2307,7 @@
(".*8859[-_]?2\\>" . "Latin-2")
(".*8859[-_]?3\\>" . "Latin-3")
(".*8859[-_]?4\\>" . "Latin-4")
+ (".*8859[-_]?5\\>" . "Cyrillic-ISO")
(".*8859[-_]?9\\>" . "Latin-5")
(".*8859[-_]?14\\>" . "Latin-8")
(".*8859[-_]?15\\>" . "Latin-9")
diff -Naur emacs/lisp/language/cyrillic.el emacs.build/lisp/language/cyrillic.el
--- emacs/lisp/language/cyrillic.el 2005-07-04 23:18:39 +0600
+++ emacs.build/lisp/language/cyrillic.el 2005-11-03 00:08:09 +0500
@@ -218,7 +218,9 @@
(translate-character cyrillic-koi8-r-encode-table r0 r1))
"CCL program to encode Cyrillic chars to KOI font.")
-(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font))
+(add-to-list 'font-ccl-encoder-alist '("koi8-r" . ccl-encode-koi8-font))
+; We need a finer (than just "koi8") regexp in order
+; to encode & display Ukrainian correctly (with koi8-u).
(set-language-info-alist
"Cyrillic-KOI8" `((charset cyrillic-iso8859-5)
@@ -545,6 +547,98 @@
\(The name Belarusian replaced Byelorussian in the early 1990s.)"))
'("Cyrillic"))
+(define-coding-system-alias 'koi8-ru 'koi8-u)
+
+; Several CP1251, CP866 and CP1125 based environments.
+;
+; They can be chosen without taking care
+; of (codepage-setup 1251) -- it is done automatically by pre-requiring the feature
+; cyrillic-codepages-setup. (The source for it is in cyrillic-codepages-setup.el.)
+;
+; Based on the postings in sisyphus@altlinux.ru mailing list, particularly
+; from Alexander Bokovoy and Serhii Hlodin.
+;
+; CP1251 support in not complete because it is based on iso-8859-5 (and there
+; are not all the letters, e.g. no "Ghe with upturn").
+;
+; The new language-environments descriptions added by this patch
+; make use of the key "prereq-features" of the language-info structure.
+; This key is not present in original GNU Emacs 21, the support for it
+; is added by another patch (or hack): emacs-21.1-lang-env-prereqs.patch.
+; The descriptions in this form can exist and work even if there is no support
+; for the new key -- then you have to manually require 'cyrillic-codepages-setup
+; or to evaluate a corresponding (codepage-setup ...) before you use them.
+;
+; February 2002
+; imz@altlinux.ru
+
+(set-language-info-alist
+ "Cyrillic-CP1251" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1251)
+ (coding-priority cp1251)
+ (input-method . "cyrillic-jcuken")
+ (nonascii-translation . cp1251-nonascii-translation-table)
+ (unibyte-display . cp1251)
+ (features cyril-util)
+ (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
+ (documentation . "Support for Cyrillic CP1251. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1251" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1251)
+ (coding-priority cp1251)
+ (nonascii-translation . cp1251-nonascii-translation-table)
+ (input-method . "cyrillic-ukrainian")
+ (unibyte-display . cp1251)
+ (features cyril-util)
+ (sample-text . "Ukrainian (,LCZ`Pw]alZP(B) ,L4^Q`^S^(B ,LT]o(B!")
+ (documentation . "Support for Ukrainian language (with CP1251 coding system; incomplete). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+
+(set-language-info-alist
+ "Cyrillic-CP866" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp866)
+ (coding-priority cp866)
+ (input-method . "cyrillic-jcuken")
+ (nonascii-translation . cp866-nonascii-translation-table)
+ (unibyte-display . cp866)
+ (features cyril-util)
+ (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
+ (documentation . "Support for Cyrillic CP866. There is little difference between this one and Cyrillic-ALT. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1125" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1125)
+ (coding-priority cp1125)
+ (nonascii-translation . cp1125-nonascii-translation-table)
+ (input-method . "cyrillic-ukrainian")
+ (unibyte-display . cp1125)
+ (features cyril-util)
+ (sample-text . "Ukrainian (,LCZ`Pw]alZP(B) ,L4^Q`^S^(B ,LT]o(B!")
+ (documentation . "Support for Ukrainian language (with CP1125 coding system). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+; End of the block of CP1251, CP866 and CP1125 based language environments.
+
+; Some special X font encodings:
+
+(add-to-list 'font-ccl-encoder-alist '("koi8-c" . ccl-encode-koi8-u-font))
+; Apparantly, koi8-c is a coding for Extended (Old) Cyrillic which matches
+; koi8-u in all letter positions. Since we don't have a special coding-system
+; for it in Emacs, but there happen to be such X fonts (e.g., -val-*-koi8-c), we treat it
+; the same way as koi8-u.
+
+(add-to-list 'font-ccl-encoder-alist '("koi8-1" . ccl-encode-koi8-font))
+; koi8-1 was present in some older fonts.
+
+
(provide 'cyrillic)
;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
diff -Naur emacs/man/gnus.texi emacs.build/man/gnus.texi
--- emacs/man/gnus.texi 2005-10-22 18:16:00 +0600
+++ emacs.build/man/gnus.texi 2005-11-03 00:08:10 +0500
@@ -9446,6 +9446,7 @@
encode using quoted-printable) or @code{t} (always use 8bit).
@end table
+@cindex Ukrainian
@cindex Russian
@cindex koi8-r
@cindex koi8-u
@@ -9470,14 +9471,14 @@
This means that Russian will be encoded using @code{koi8-r} instead of
the default @code{iso-8859-5} @acronym{MIME} charset.
-If you want to read messages in @code{koi8-u}, you can cheat and say
+If you want to write messages in @code{koi8-u}, you can say (no cheat:
+@code{koi8-u} is really supported now)
@lisp
-(define-coding-system-alias 'koi8-u 'koi8-r)
+(put-charset-property 'cyrillic-iso8859-5
+ 'preferred-coding-system 'koi8-u)
@end lisp
-This will almost do the right thing.
-
And finally, to read charsets like @code{windows-1251}, you can say
something like
diff -Naur emacs/man/mule.texi emacs.build/man/mule.texi
--- emacs/man/mule.texi 2005-08-10 21:14:33 +0600
+++ emacs.build/man/mule.texi 2005-11-03 00:08:10 +0500
@@ -9,6 +9,7 @@
@cindex multibyte characters
@cindex encoding of characters
+@cindex Belarusian
@cindex Celtic
@cindex Chinese
@cindex Cyrillic
@@ -26,6 +27,7 @@
@cindex Lao
@cindex Latin
@cindex Polish
+@cindex Russian
@cindex Romanian
@cindex Slovak
@cindex Slovenian
@@ -33,6 +35,7 @@
@cindex Tibetan
@cindex Turkish
@cindex Vietnamese
+@cindex Ukrainian
@cindex Dutch
@cindex Spanish
Emacs supports a wide variety of international character sets,
@@ -243,17 +246,17 @@
@quotation
Belarusian, Brazilian Portuguese, Bulgarian, Chinese-BIG5,
Chinese-CNS, Chinese-EUC-TW, Chinese-GB, Croatian, Cyrillic-ALT,
-Cyrillic-ISO, Cyrillic-KOI8, Czech, Devanagari, Dutch, English,
-Ethiopic, French, Georgian, German, Greek, Hebrew, IPA, Italian,
-Japanese, Kannada, Korean, Lao, Latin-1, Latin-2, Latin-3,
-Latin-4, Latin-5, Latin-6, Latin-7, Latin-8 (Celtic),
-Latin-9 (updated Latin-1 with the Euro sign), Latvian,
+Cyrillic-CP1251, Cyrillic-CP866, Cyrillic-ISO, Cyrillic-KOI8, Czech,
+Devanagari, Dutch, English, Ethiopic, French, Georgian, German, Greek,
+Hebrew, IPA, Italian, Japanese, Kannada, Korean, Lao, Latin-1,
+Latin-2, Latin-3, Latin-4, Latin-5, Latin-6, Latin-7, Latin-8
+(Celtic), Latin-9 (updated Latin-1 with the Euro sign), Latvian,
Lithuanian, Malayalam, Polish, Romanian, Russian, Slovak,
Slovenian, Spanish, Swedish, Tajik, Tamil, Thai, Tibetan,
Turkish, UTF-8 (for a setup which prefers Unicode characters and
-files encoded in UTF-8), Ukrainian, Vietnamese, Welsh, and
-Windows-1255 (for a setup which prefers Cyrillic characters and
-files encoded in Windows-1255).
+files encoded in UTF-8), Ukrainian-CP1125, Ukrainian-CP1251,
+Vietnamese, Welsh, and Windows-1255 (for a setup which prefers
+Cyrillic characters and files encoded in Windows-1255).
@end quotation
@cindex fonts for various scripts
@@ -354,8 +357,8 @@
The simplest kind of input method works by mapping @acronym{ASCII} letters
into another alphabet; this allows you to use one other alphabet
-instead of @acronym{ASCII}. The Greek and Russian input methods
-work this way.
+instead of @acronym{ASCII}. The Greek and Cyrillic (e.g. Russian)
+input methods work this way.
A more powerful technique is composition: converting sequences of
characters into one letter. Many European input methods use composition