Группа :: Редакторы
Пакет: emacs21
Главная Изменения Спек Патчи Загрузить Bugs and FR
Патч: emacs-21.3-alt1-more-cyrillic-support.patch
diff --exclude='*.orig' -duNr emacs-21.2.orig/lisp/faces.el emacs-21.2/lisp/faces.el
--- emacs-21.2.orig/lisp/faces.el 2002-01-28 19:32:36 +0300
+++ emacs-21.2/lisp/faces.el 2003-02-10 19:15:27 +0300
@@ -88,7 +90,8 @@
'(("gb2312.1980" "gb2312.80&gb8565.88" "gbk*")
("jisx0208.1990" "jisx0208.1983" "jisx0208.1978")
("ksc5601.1989" "ksx1001.1992" "ksc5601.1987")
- ("muletibetan-2" "muletibetan-0")))
+ ("muletibetan-2" "muletibetan-0")
+ ("iso8859-5" "microsoft-cp1251" "koi8-u" "koi8")))
"*Alist of alternative font registry names.
Each element has the the form (REGISTRY ALTERNATIVE1 ALTERNATIVE2 ...).
If fonts of registry REGISTRY can be loaded, font selection
diff --exclude='*.orig' -duNr emacs-21.2.orig/lisp/international/cyrillic-codepages-setup.el emacs-21.2/lisp/international/cyrillic-codepages-setup.el
--- emacs-21.2.orig/lisp/international/cyrillic-codepages-setup.el 1970-01-01 03:00:00 +0300
+++ emacs-21.2/lisp/international/cyrillic-codepages-setup.el 2003-02-10 21:12:24 +0300
@@ -0,0 +1,98 @@
+; Setup cyrillic codepages and aliases for their coding-systems.
+;
+; This feature is for setting the corresponding Cyrillic language environments
+; seamlessly (without writing any special commands before).
+;
+;
+; Based on some my ideas and the postings in sisyphus@altlinux.ru mailing list,
+; particularly from Alexander Bokovoy and Serhii Hlodin.
+; Added MIME charset names according to http://www.iana.org/assignments/character-sets.
+;
+; February 2002
+; Ivan Zakharyaschev <imz@altlinux.ru>
+;
+; Copyright (C) ALT Linux Team 2002
+; (This file is not a part of the original GNU Emacs.)
+;
+; This code is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 2, or (at your option)
+; any later version.
+
+(codepage-setup 1251)
+(coding-system-put 'cp1251 'mime-charset 'windows-1251)
+(define-coding-system-alias 'windows-1251 'cp1251)
+(define-coding-system-alias 'cyrillic-cp1251 'cp1251)
+
+(codepage-setup 1125)
+(define-coding-system-alias 'cp866u 'cp1125)
+(define-coding-system-alias 'cyrillic-cp1125 'cp1125)
+
+(codepage-setup 866)
+(coding-system-put 'cp866 'mime-charset 'cp866)
+(define-coding-system-alias 'ibm-866 'cp866)
+(define-coding-system-alias 'ibm866 'cp866)
+(define-coding-system-alias 'cyrillic-cp866 'cp866)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; A piece of code to correct the list of valid codes for the three codepage-based
+; coding systems: the new ``more reasonable'' values fix Emacs looping infinitely
+; on some encoded input:
+
+(defun cp-detect-valid-codes(codepage) "List valid codes for cpCODEPAGE coding-sytem
+in a format suitable for coding-system's `valid-codes' property.
+The result always has the range (0 . 127) as the first element,
+other elements are extracted from the corresponding codepage decode table
+and sorted.
+
+This function was not present in the original GNU Emacs, rather it has been added
+by ALT mainly to set reasonable `valid-codes' property of cp1251 coding-system
+(otherwise Emacs sometimes looped infinitely trying to process some odd input in
+encoded-kbd mode)."
+ (let* ((decode-table (intern (format "%s-decode-table" codepage))))
+ (cons
+ (cons 0 127)
+ (sort (delq nil (mapcar 'identity (symbol-value decode-table))) '<))))
+
+
+(defun cp-correct-valid-codes(codepage) "Try to set more reasonable values
+for the `valid-codes' property of cpCODEPAGE coding-system than the default
+single full range ((0. 255)). The ``more reasonable'' values are acquired
+by calling `cp-detect-valid-codes' on CODEPAGE (it lists the codes really
+present in the decode-table).
+
+This function was not present in the original GNU Emacs, rather it has been added
+by ALT mainly to set reasonable `valid-codes' property of cp1251 coding-system
+(otherwise Emacs sometimes looped infinitely trying to process some odd input in
+encoded-kbd mode)."
+(coding-system-put codepage 'valid-codes (cp-detect-valid-codes codepage)))
+
+(mapc 'cp-correct-valid-codes '(cp1251 cp866 cp1125))
+
+; And now
+;;(coding-system-get 'cp1251 'valid-codes)
+; will return a much more detailed list of valid codes.
+
+; Valid codes lists fixed.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Add information about recoding X fonts in these charset/encodings.
+
+(define-ccl-program ccl-encode-cp1251-font
+ `(0
+ ((translate-character cp1251-encode-translation-table r0 r1)))
+ "CCL program to encode Cyrillic chars to CP1251 font.")
+
+;; (setq font-ccl-encoder-alist
+;; (cons '("microsoft-cp1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+(setq font-ccl-encoder-alist
+ (cons '(".*1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+; X font data setup end.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(provide 'cyrillic-codepages-setup)
+
+; End of cyrillic-codepages-setup
diff --exclude='*.orig' -duNr emacs-21.2.orig/lisp/international/fontset.el emacs-21.2/lisp/international/fontset.el
--- emacs-21.2.orig/lisp/international/fontset.el 2001-10-24 15:07:58 +0400
+++ emacs-21.2/lisp/international/fontset.el 2003-02-10 18:22:12 +0300
@@ -27,6 +27,10 @@
;;; Code:
+; Require the setup of cp1251 support, otherwise
+; it might be too late. (imz@altlinux.ru)
+(require 'cyrillic-codepages-setup)
+
;; Set standard fontname specification of characters in the default
;; fontset to find an appropriate font for each charset. This is used
;; to generate a font name for a fontset if the fontset doesn't
@@ -50,7 +54,12 @@
(hebrew-iso8859-8 . ("*" . "ISO8859-8"))
(katakana-jisx0201 . ("*" . "JISX0201"))
(latin-jisx0201 . (nil . "JISX0201"))
- (cyrillic-iso8859-5 . ("*" . "ISO8859-5"))
+ (cyrillic-iso8859-5 . (nil . "ISO8859-5"))
+; These are the other possibilities (alternatives);
+; We don't need to use them here (face-font-registry-alternatives
+; will help us to use all the available Cyrillic fonts):
+; (cyrillic-iso8859-5 . (nil . "*-*1251"))
+; (cyrillic-iso8859-5 . (nil . "KOI8"))
(latin-iso8859-9 . (nil . "ISO8859-9"))
(japanese-jisx0208-1978 . ("*" . "JISX0208.1978"))
(chinese-gb2312 . ("*" . "GB2312.1980"))
@@ -111,6 +120,7 @@
(set-font-encoding "ISO8859-1" 'ascii 0)
(set-font-encoding "JISX0201" 'latin-jisx0201 0)
+;(set-font-encoding "MICROSOFT-CP1251" 'cp1251 0)
(define-ccl-program ccl-encode-unicode-font
`(0
@@ -164,6 +174,7 @@
("iso8859-15" ascii latin-iso8859-15)
("tis620" ascii thai-tis620)
("koi8" ascii cyrillic-iso8859-5)
+ (".*1251" ascii cyrillic-iso8859-5)
("viscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
("vscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
("mulelao-1" ascii lao)
diff --exclude='*.orig' -duNr emacs-21.2.orig/lisp/international/mule-conf.el emacs-21.2/lisp/international/mule-conf.el
--- emacs-21.2.orig/lisp/international/mule-conf.el 2002-03-13 22:51:55 +0300
+++ emacs-21.2/lisp/international/mule-conf.el 2003-02-10 21:12:24 +0300
@@ -425,9 +425,12 @@
'(("ISO8859-15" . latin-iso8859-15)
("ISO8859-14" . latin-iso8859-14)
("KOI8-R" . koi8-r)
+ ("KOI8-U" . koi8-u)
+ ("MICROSOFT-CP1251" . cp1251)
("BIG5-0" . big5))
"Alist of font charset names defined by XLFD, and the corresponding Emacs
-charsets or coding systems.")
+charsets or coding systems.
+(koi8-u and microsoft-cp1251 added by ALT Linux Team.)")
;; Functions to support "Non-Standard Character Set Encodings" defined
;; by the ICCCM spec. We support that by converting the leading
diff --exclude='*.orig' -duNr emacs-21.2.orig/lisp/language/cyrillic.el emacs-21.2/lisp/language/cyrillic.el
--- emacs-21.2.orig/lisp/language/cyrillic.el 2003-02-10 21:08:57 +0300
+++ emacs-21.2/lisp/language/cyrillic.el 2003-02-10 21:32:05 +0300
@@ -221,7 +221,9 @@
"CCL program to encode Cyrillic chars to KOI font.")
(setq font-ccl-encoder-alist
- (cons '("koi8" . ccl-encode-koi8-font) font-ccl-encoder-alist))
+ (cons '("koi8-r" . ccl-encode-koi8-font) font-ccl-encoder-alist))
+; We need a finer (than just "koi8") regexp in order
+; to encode & display Ukrainian correctly (with koi8-u).
(set-language-info-alist
"Cyrillic-KOI8" `((charset cyrillic-iso8859-5)
@@ -329,6 +331,113 @@
(documentation . "Support for Cyrillic ALTERNATIVNYJ."))
'("Cyrillic"))
+(define-coding-system-alias 'koi8-ru 'koi8-u)
+
+; Several CP1251, CP866 and CP1125 based environments.
+;
+; They can be chosen without taking care
+; of (codepage-setup 1251) -- it is done automatically by pre-requiring the feature
+; cyrillic-codepages-setup. (The source for it is in cyrillic-codepages-setup.el.)
+;
+; Based on the postings in sisyphus@altlinux.ru mailing list, particularly
+; from Alexander Bokovoy and Serhii Hlodin.
+;
+; CP1251 support in not complete because it is based on iso-8859-5 (and there
+; are not all the letters, e.g. no "Ghe with upturn").
+;
+; The new language-environments descriptions added by this patch
+; make use of the key "prereq-features" of the language-info structure.
+; This key is not present in original GNU Emacs 21, the support for it
+; is added by another patch (or hack): emacs-21.1-lang-env-prereqs.patch.
+; The descriptions in this form can exist and work even if there is no support
+; for the new key -- then you have to manually require 'cyrillic-codepages-setup
+; or to evaluate a corresponding (codepage-setup ...) before you use them.
+;
+; February 2002
+; imz@altlinux.ru
+
+(set-language-info-alist
+ "Cyrillic-CP1251" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1251)
+ (coding-priority cp1251)
+ (input-method . "cyrillic-jcuken")
+ (nonascii-translation . cp1251-nonascii-translation-table)
+ (unibyte-display . cp1251)
+ (features cyril-util)
+ (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
+ (documentation . "Support for Cyrillic CP1251. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Belarusian" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1251)
+ (coding-priority cp1251)
+ (nonascii-translation . cp1251-nonascii-translation-table)
+ (input-method . "cyrillic-belarusian")
+ (unibyte-display . cp1251)
+ (features cyril-util)
+ (sample-text . "Belarusian (,L1U[P`caZPo(B ,L\^RP(B) ,L4WU]l(B ,LT^Q`k(B!")
+ (documentation . "Support for Belarusian language (with CP1251 coding system). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1251" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1251)
+ (coding-priority cp1251)
+ (nonascii-translation . cp1251-nonascii-translation-table)
+ (input-method . "cyrillic-ukrainian")
+ (unibyte-display . cp1251)
+ (features cyril-util)
+ (sample-text . "Ukrainian (,LCZ`Pw]alZP(B) ,L4^Q`^S^(B ,LT]o(B!")
+ (documentation . "Support for Ukrainian language (with CP1251 coding system; incomplete). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+
+(set-language-info-alist
+ "Cyrillic-CP866" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp866)
+ (coding-priority cp866)
+ (input-method . "cyrillic-jcuken")
+ (nonascii-translation . cp866-nonascii-translation-table)
+ (unibyte-display . cp866)
+ (features cyril-util)
+ (sample-text . "Russian (,L@caaZXY(B) ,L7T`PRabRcYbU(B!")
+ (documentation . "Support for Cyrillic CP866. There is little difference between this one and Cyrillic-ALT. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1125" `((charset cyrillic-iso8859-5)
+ (prereq-features cyrillic-codepages-setup)
+ (coding-system cp1125)
+ (coding-priority cp1125)
+ (nonascii-translation . cp1125-nonascii-translation-table)
+ (input-method . "cyrillic-ukrainian")
+ (unibyte-display . cp1125)
+ (features cyril-util)
+ (sample-text . "Ukrainian (,LCZ`Pw]alZP(B) ,L4^Q`^S^(B ,LT]o(B!")
+ (documentation . "Support for Ukrainian language (with CP1125 coding system). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+; End of the block of CP1251, CP866 and CP1125 based language environments.
+
+; Some special X font encodings:
+
+(setq font-ccl-encoder-alist
+ (cons '("koi8-c" . ccl-encode-koi8-u-font) font-ccl-encoder-alist))
+; Apparantly, koi8-c is a coding for Extended (Old) Cyrillic which matches
+; koi8-u in all letter positions. Since we don't have a special coding-system
+; for it in Emacs, but there happen to be such X fonts (e.g., -val-*-koi8-c), we treat it
+; the same way as koi8-u.
+
+(setq font-ccl-encoder-alist
+ (cons '("koi8-1" . ccl-encode-koi8-font) font-ccl-encoder-alist))
+; koi8-1 was present in some older fonts.
+
+
(provide 'cyrillic)
;;; cyrillic.el ends here
--- emacs-21.2.orig/lisp/textmodes/ispell.el 2003-02-10 21:08:57 +0300
+++ emacs-21.2/lisp/textmodes/ispell.el 2003-02-10 21:12:24 +0300
@@ -486,6 +486,7 @@
(choice :tag "Character set"
(const iso-8859-1)
(const iso-8859-2)
+ (const cp1251)
(const koi8-u)
(const koi8-r))))
:group 'ispell)
--- emacs-21.3/man/gnus.texi.more-cyr 2003-03-24 23:15:53 +0300
+++ emacs-21.3/man/gnus.texi 2003-03-24 23:16:04 +0300
@@ -8020,6 +8020,7 @@
encode using quoted-printable) or @code{t} (always use 8bit).
@end table
+@cindex Ukrainian
@cindex Russian
@cindex koi8-r
@cindex koi8-u
@@ -8040,14 +8041,14 @@
This means that Russian will be encoded using @code{koi8-r} instead of
the default @code{iso-8859-5} @sc{mime} charset.
-If you want to read messages in @code{koi8-u}, you can cheat and say
+If you want to write messages in @code{koi8-u}, you can say (no cheat:
+@code{koi8-u} is really supported now)
@lisp
-(define-coding-system-alias 'koi8-u 'koi8-r)
+(put-charset-property 'cyrillic-iso8859-5
+ 'preferred-coding-system 'koi8-u)
@end lisp
-This will almost do the right thing.
-
And finally, to read charsets like @code{windows-1251}, you can say
something like
--- emacs-21.3/man/mule.texi.more-cyr 2003-03-24 22:59:49 +0300
+++ emacs-21.3/man/mule.texi 2003-03-24 23:09:42 +0300
@@ -8,6 +8,7 @@
@cindex multibyte characters
@cindex encoding of characters
+@cindex Belarusian
@cindex Celtic
@cindex Chinese
@cindex Cyrillic
@@ -25,6 +26,7 @@
@cindex Lao
@cindex Latin
@cindex Polish
+@cindex Russian
@cindex Romanian
@cindex Slovak
@cindex Slovenian
@@ -32,6 +34,7 @@
@cindex Tibetan
@cindex Turkish
@cindex Vietnamese
+@cindex Ukrainian
@cindex Dutch
@cindex Spanish
Emacs supports a wide variety of international character sets,
@@ -261,12 +264,13 @@
@cindex Euro sign
@quotation
-Chinese-BIG5, Chinese-CNS, Chinese-GB, Cyrillic-ALT, Cyrillic-ISO,
-Cyrillic-KOI8, Czech, Devanagari, Dutch, English, Ethiopic, German,
+Belarusian, Chinese-BIG5, Chinese-CNS, Chinese-GB, Cyrillic-ALT,
+Cyrillic-CP1251, Cyrillic-CP866, Cyrillic-ISO, Cyrillic-KOI8,
+Cyrillic-KOI8-U, Czech, Devanagari, Dutch, English, Ethiopic, German,
Greek, Hebrew, IPA, Japanese, Korean, Lao, Latin-1, Latin-2, Latin-3,
Latin-4, Latin-5, Latin-8 (Celtic), Latin-9 (updated Latin-1, with the
Euro sign), Polish, Romanian, Slovak, Slovenian, Spanish, Thai,
-Tibetan, Turkish, and Vietnamese.
+Tibetan, Turkish, Ukrainian-CP1125, Ukrainian-CP1251, and Vietnamese.
@end quotation
@cindex fonts for various scripts
@@ -367,7 +371,7 @@
The simplest kind of input method works by mapping ASCII letters
into another alphabet; this allows you to use one other alphabet
-instead of ASCII. The Greek and Russian input methods
+instead of ASCII. The Greek and Cyrillic (e.g. Russian) input methods
work this way.
A more powerful technique is composition: converting sequences of
--- emacs-21.1.orig/etc/HELLO Sat Sep 29 14:13:44 2001
+++ emacs-21.1/etc/HELLO Sat Feb 16 18:07:39 2002
@@ -4,6 +4,7 @@
---------------------------------------------------------
Amharic ($(3"c!<!N"^(B) $(3!A!,!>(B
Arabic (38R(47d(3T!JSa(4W(3W(B
+Belarusian (,L1U[P`caZPo(B ,L\^RP(B) ,L4WU]l(B ,LT^Q`k(B!
Czech (,Bh(Besky) Dobr,B}(B den
Danish (Dansk) Hej, Goddag
English Hello
@@ -31,6 +32,7 @@
Tibetan (4$(7"7r'"]0"7"]14"20"21!;4%P0"G#!"Q14"20"21!;(B) 4$(7"70"714$P0"!#C"Q1!;4"Er'"S0"E"S14"G0"G1!;4"70"714"2r'"[0"2"[1!;4"Dr'"[0"D"[14"#0"#14"G0"G1!>(B
Tigrigna ($(3"8#r!N"^(B) $(3!Q!,!<"8(B
Turkish (T,M|(Brk,Mg(Be) Merhaba
+Ukrainian (,LCZ`Pw]alZP(B ,L\^RP(B) ,L?`XRvb(B!
Vietnamese (Ti,1*(Bng Vi,1.(Bt) Ch,1`(Bo b,1U(Bn
Japanese ($BF|K\8l(B) $B$3$s$K$A$O(B, (I:]FAJ(B
--- emacs-21.3/lisp/international/mule-cmds.el.more-cyr2 2003-03-25 00:12:53 +0300
+++ emacs-21.3/lisp/international/mule-cmds.el 2003-03-25 00:24:21 +0300
@@ -1669,7 +1669,8 @@
; ay Aymara
; az Azerbaijani
; ba Bashkir
- ("be" . "Latin-5") ; Belarusian [Byelorussian until early 1990s]
+ ("be.*[_.]cp1251" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
+ ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
("bg" . "Latin-5") ; Bulgarian
; bh Bihari
; bi Bislama
@@ -1754,7 +1755,11 @@
("rm" . "Latin-1") ; Rhaeto-Romanic
; rn Kirundi
("ro" . "Romanian")
+ ("ru[_.]ua[_.]cp1251" . "Ukrainian-CP1251") ; Russian in Ukraine
+ ("ru[_.]ua[_.]koi8-u" . "Cyrillic-KOI8-U") ; Russian in Ukraine
+ ("ru.*[_.]cp1251" . "Cyrillic-CP1251") ; Russian
("ru.*[_.]koi8-r" . "Cyrillic-KOI8") ; Russian
+ ; ru.*[_.]pt154 Russian
("ru" . "Cyrillic-ISO") ; Russian
; rw Kinyarwanda
("sa" . "Devanagari") ; Sanskrit
@@ -1788,7 +1793,10 @@
; tt Tatar
; tw Twi
; ug Uighur
- ("uk" . "Cyrillic-ISO") ; Ukrainian
+ ("uk.*[_.]cp1251" . "Ukrainian-CP1251") ; Ukrainian
+ ("uk.*[_.]cp1125" . "Ukrainian-CP1125") ; Ukrainian
+ ("uk.*[_.]koi8-u" . "Cyrillic-KOI8-U") ; Ukrainian
+ ("uk" . "Cyrillic-KOI8-U") ; Ukrainian
; ur Urdu
; uz Uzbek
("vi" . "Vietnamese")
@@ -1832,6 +1840,7 @@
(".*8859[-_]?2\\>" . "Latin-2")
(".*8859[-_]?3\\>" . "Latin-3")
(".*8859[-_]?4\\>" . "Latin-4")
+ (".*8859[-_]?5\\>" . "Cyrillic-ISO")
(".*8859[-_]?9\\>" . "Latin-5")
(".*8859[-_]?14\\>" . "Latin-8")
(".*8859[-_]?15\\>" . "Latin-9")