Репозиторий ALT Linux backports/2.4
Последнее обновление: 9 июля 2008 | Пакетов: 497 | Посещений: 1585080
 поиск   регистрация   авторизация 
 
Группа :: Редакторы
Пакет: emacs22

 Главная   Изменения   Спек   Патчи   Загрузить   Bugs and FR 

Патч: emacs-22.0.50-alt0.4-more-cyrillic-support.patch


diff -Naur emacs/etc/HELLO emacs.build/etc/HELLO
--- emacs/etc/HELLO	2004-04-03 10:41:21 +0600
+++ emacs.build/etc/HELLO	2005-11-03 00:08:09 +0500
@@ -3,6 +3,7 @@
 ---------------------------------------------------------
 Amharic ($(3"c!<!N"^(B)	$(3!A!,!>(B
 Arabic	(38R(47d(3T!JSa(4W(3W(B
+Belarusian (,L1U[P`caZPo(B ,L\^RP(B)	,L4WU]l(B ,LT^Q`k(B!
 Braille	$,2(3(1('('(5(B
 C	printf ("Hello, world!\n");
 Czech (,Bh(Be,B9(Btina)	Dobr,B}(B den
diff -Naur emacs/lisp/faces.el emacs.build/lisp/faces.el
--- emacs/lisp/faces.el	2005-11-01 16:39:33 +0500
+++ emacs.build/lisp/faces.el	2005-11-03 00:08:09 +0500
@@ -88,7 +88,8 @@
     '(("gb2312.1980" "gb2312.80&gb8565.88" "gbk*")
       ("jisx0208.1990" "jisx0208.1983" "jisx0208.1978")
       ("ksc5601.1989" "ksx1001.1992" "ksc5601.1987")
-      ("muletibetan-2" "muletibetan-0")))
+      ("muletibetan-2" "muletibetan-0")
+      ("iso8859-5" "microsoft-cp1251" "koi8-u" "koi8")))
   "*Alist of alternative font registry names.
 Each element has the form (REGISTRY ALTERNATIVE1 ALTERNATIVE2 ...).
 If fonts of registry REGISTRY can be loaded, font selection
diff -Naur emacs/lisp/international/cyrillic-codepages-setup.el emacs.build/lisp/international/cyrillic-codepages-setup.el
--- emacs/lisp/international/cyrillic-codepages-setup.el	1970-01-01 05:00:00 +0500
+++ emacs.build/lisp/international/cyrillic-codepages-setup.el	2005-11-03 00:08:09 +0500
@@ -0,0 +1,102 @@
+; Setup cyrillic codepages and aliases for their coding-systems.
+;
+; This feature is for setting the corresponding Cyrillic language environments
+; seamlessly (without writing any special commands before).
+;
+;
+; Based on some my ideas and the postings in sisyphus@altlinux.ru mailing list, 
+; particularly from Alexander Bokovoy and Serhii Hlodin.
+; Added MIME charset names according to http://www.iana.org/assignments/character-sets.
+;
+; February 2002
+; Ivan Zakharyaschev <imz@altlinux.ru>
+;
+; Copyright (C) ALT Linux Team 2002
+; (This file is not a part of the original GNU Emacs.)
+;
+; This code is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as published by
+; the Free Software Foundation; either version 2, or (at your option)
+; any later version.
+
+;;; Commentary:
+
+;;; Code:
+
+(codepage-setup 1251)
+(coding-system-put 'cp1251 'mime-charset 'windows-1251)
+(define-coding-system-alias 'windows-1251 'cp1251)
+(define-coding-system-alias 'cyrillic-cp1251 'cp1251)
+
+(codepage-setup 1125)
+(define-coding-system-alias 'cp866u 'cp1125)
+(define-coding-system-alias 'cyrillic-cp1125 'cp1125)
+
+(codepage-setup 866)
+(coding-system-put 'cp866 'mime-charset 'cp866)
+(define-coding-system-alias 'ibm-866 'cp866)
+(define-coding-system-alias 'ibm866 'cp866)
+(define-coding-system-alias 'cyrillic-cp866 'cp866)
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; A piece of code to correct the list of valid codes for the three codepage-based
+; coding systems: the new ``more reasonable'' values fix Emacs looping infinitely
+; on some encoded input:
+
+(defun cp-detect-valid-codes (codepage) 
+  "List valid codes for cpCODEPAGE coding-sytem in a format suitable
+for coding-system's `valid-codes' property. The result always has the 
+range (0 . 127) as the first element, other elements are extracted from the
+corresponding codepage decode table and sorted.
+
+This function was not present in the original GNU Emacs, rather it has been
+added by ALT mainly to set reasonable `valid-codes' property of cp1251
+ coding-system (otherwise Emacs sometimes looped infinitely trying to process
+some odd input in encoded-kbd mode)."
+  (let* ((decode-table (intern (format "%s-decode-table" codepage))))
+    (cons
+     (cons 0 127)
+     (sort (delq nil (mapcar 'identity (symbol-value decode-table))) '<))))
+
+
+(defun cp-correct-valid-codes (codepage) 
+  "Try to set more reasonable values for the `valid-codes' property of
+cpCODEPAGE coding-system than the default  single full range ((0. 255)).
+The ``more reasonable'' values are acquired by calling `cp-detect-valid-codes'
+on CODEPAGE (it lists the codes really present in the decode-table).
+
+This function was not present in the original GNU Emacs, rather it has been
+added by ALT mainly to set reasonable `valid-codes' property of cp1251
+coding-system (otherwise Emacs sometimes looped infinitely trying to process
+some odd input in encoded-kbd mode)."
+  (coding-system-put codepage 'valid-codes (cp-detect-valid-codes codepage)))
+
+(mapc 'cp-correct-valid-codes '(cp1251 cp866 cp1125))
+
+; And now
+;;(coding-system-get 'cp1251 'valid-codes)
+; will return a much more detailed list of valid codes.
+
+; Valid codes lists fixed.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Add information about recoding X fonts in these charset/encodings.
+
+(define-ccl-program ccl-encode-cp1251-font
+  `(0
+    ((translate-character cp1251-encode-translation-table r0 r1)))
+  "CCL program to encode Cyrillic chars to CP1251 font.")
+
+;; (setq font-ccl-encoder-alist
+;;       (cons '("microsoft-cp1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+(setq font-ccl-encoder-alist
+      (cons '(".*1251" . ccl-encode-cp1251-font) font-ccl-encoder-alist))
+
+; X font data setup end.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(provide 'cyrillic-codepages-setup)
+
+; End of cyrillic-codepages-setup
diff -Naur emacs/lisp/international/fontset.el emacs.build/lisp/international/fontset.el
--- emacs/lisp/international/fontset.el	2005-10-13 11:34:35 +0600
+++ emacs.build/lisp/international/fontset.el	2005-11-03 00:20:03 +0500
@@ -28,6 +28,13 @@
 
 ;;; Code:
 
+; Require the setup of cp1251 support, otherwise
+; it might be too late. (imz@altlinux.ru)
+; Additional comment from Eugene Vlasov: this code break preparing to dump.
+; So we need check purify-flag
+(unless purify-flag
+  (require 'cyrillic-codepages-setup))
+
 ;; Set standard fontname specification of characters in the default
 ;; fontset to find an appropriate font for each charset.  This is used
 ;; to generate a font name for a fontset if the fontset doesn't
@@ -58,6 +65,11 @@
 	     (katakana-jisx0201 . (nil . "JISX0201"))
 	     (latin-jisx0201 . (nil . "JISX0201"))
 	     (cyrillic-iso8859-5 . (nil . "ISO8859-5"))
+; These are the other possibilities (alternatives);
+; We don't need to use them here (face-font-registry-alternatives 
+; will help us to use all the available Cyrillic fonts):
+;	     (cyrillic-iso8859-5 . (nil . "*-*1251"))
+;	     (cyrillic-iso8859-5 . (nil . "KOI8"))
 	     (latin-iso8859-9 . (nil . "ISO8859-9"))
 	     (japanese-jisx0208-1978 . (nil . "JISX0208.1978"))
 	     (chinese-gb2312 . (nil . "GB2312.1980"))
@@ -248,6 +260,7 @@
     ("iso8859-15" ascii latin-iso8859-15)
     ("tis620" ascii thai-tis620)
     ("koi8" ascii cyrillic-iso8859-5)
+    (".*1251" ascii cyrillic-iso8859-5)
     ("viscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
     ("vscii" ascii vietnamese-viscii-upper vietnamese-viscii-lower)
     ("mulelao-1" ascii lao)
diff -Naur emacs/lisp/international/mule-cmds.el emacs.build/lisp/international/mule-cmds.el
--- emacs/lisp/international/mule-cmds.el	2005-10-06 21:52:12 +0600
+++ emacs.build/lisp/international/mule-cmds.el	2005-11-03 00:08:09 +0500
@@ -2108,7 +2108,8 @@
     ; ay Aymara
     ("az" . "UTF-8") ; Azerbaijani
     ; ba Bashkir
-    ("be" "Belarusian" cp1251) ; Belarusian [Byelorussian until early 1990s]
+    ("be.*[_.]cp1251" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
+    ("be" . "Belarusian") ; Belarusian [Byelorussian until early 1990s]
     ("bg" "Bulgarian" cp1251) ; Bulgarian
     ; bh Bihari
     ; bi Bislama
@@ -2207,7 +2208,9 @@
     ("rm" . "Latin-1") ; Rhaeto-Romanic
     ; rn Kirundi
     ("ro" "Romanian" iso-8859-2)
+    ("ru.*[_.]cp1251" . "Cyrillic-CP1251") ; Russian
     ("ru_RU" "Russian" iso-8859-5)
+    ("ru[_.]ua[_.]cp1251" . "Ukrainian-CP1251") ; Russian in Ukraine
     ("ru_UA" "Russian" koi8-u)
     ; rw Kinyarwanda
     ("sa" . "Devanagari") ; Sanskrit
@@ -2246,6 +2249,8 @@
     ("tt" . "UTF-8") ; Tatar
     ; tw Twi
     ; ug Uighur
+    ("uk.*[_.]cp1251" . "Ukrainian-CP1251") ; Ukrainian
+    ("uk.*[_.]cp1125" . "Ukrainian-CP1125") ; Ukrainian
     ("uk" "Ukrainian" koi8-u)
     ("ur" . "UTF-8") ; Urdu
     ("uz_UZ@cyrillic" . "UTF-8"); Uzbek
@@ -2302,6 +2307,7 @@
      (".*8859[-_]?2\\>" . "Latin-2")
      (".*8859[-_]?3\\>" . "Latin-3")
      (".*8859[-_]?4\\>" . "Latin-4")
+     (".*8859[-_]?5\\>" . "Cyrillic-ISO")
      (".*8859[-_]?9\\>" . "Latin-5")
      (".*8859[-_]?14\\>" . "Latin-8")
      (".*8859[-_]?15\\>" . "Latin-9")
diff -Naur emacs/lisp/language/cyrillic.el emacs.build/lisp/language/cyrillic.el
--- emacs/lisp/language/cyrillic.el	2005-07-04 23:18:39 +0600
+++ emacs.build/lisp/language/cyrillic.el	2005-11-03 00:08:09 +0500
@@ -218,7 +218,9 @@
     (translate-character cyrillic-koi8-r-encode-table r0 r1))
   "CCL program to encode Cyrillic chars to KOI font.")
 
-(add-to-list 'font-ccl-encoder-alist '("koi8" . ccl-encode-koi8-font))
+(add-to-list 'font-ccl-encoder-alist '("koi8-r" . ccl-encode-koi8-font))
+; We need a finer (than just "koi8") regexp in order
+; to encode & display Ukrainian correctly (with koi8-u).
 
 (set-language-info-alist
  "Cyrillic-KOI8" `((charset cyrillic-iso8859-5)
@@ -545,6 +547,98 @@
 \(The name Belarusian replaced Byelorussian in the early 1990s.)"))
  '("Cyrillic"))
 
+(define-coding-system-alias 'koi8-ru 'koi8-u)
+
+; Several CP1251, CP866 and CP1125 based environments. 
+;
+; They can be chosen without taking care
+; of (codepage-setup 1251) -- it is done automatically by pre-requiring the feature
+; cyrillic-codepages-setup. (The source for it is in cyrillic-codepages-setup.el.)
+;
+; Based on the postings in sisyphus@altlinux.ru mailing list, particularly
+; from Alexander Bokovoy and Serhii Hlodin.
+;
+; CP1251 support in not complete because it is based on iso-8859-5 (and there
+; are not all the letters, e.g. no "Ghe with upturn").
+;
+; The new language-environments descriptions added by this patch
+; make use of the key "prereq-features" of the language-info structure.
+; This key is not present in original GNU Emacs 21, the support for it
+; is added by another patch (or hack): emacs-21.1-lang-env-prereqs.patch.
+; The descriptions in this form can exist and work even if there is no support
+; for the new key -- then you have to manually require 'cyrillic-codepages-setup
+; or to evaluate a corresponding (codepage-setup ...) before you use them.
+; 
+; February 2002
+; imz@altlinux.ru
+
+(set-language-info-alist
+ "Cyrillic-CP1251" `((charset cyrillic-iso8859-5)
+		   (prereq-features cyrillic-codepages-setup)
+                   (coding-system cp1251)
+                   (coding-priority cp1251)
+                   (input-method . "cyrillic-jcuken")
+		   (nonascii-translation . cp1251-nonascii-translation-table)
+                   (unibyte-display . cp1251)
+		   (features cyril-util)
+                   (sample-text . "Russian (,L@caaZXY(B)       ,L7T`PRabRcYbU(B!")
+                   (documentation . "Support for Cyrillic CP1251. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1251" `((charset cyrillic-iso8859-5)
+		   (prereq-features cyrillic-codepages-setup)
+		      (coding-system cp1251)
+		      (coding-priority cp1251)
+		      (nonascii-translation . cp1251-nonascii-translation-table)
+		      (input-method . "cyrillic-ukrainian")
+		      (unibyte-display . cp1251)
+		   (features cyril-util)
+		   (sample-text . "Ukrainian (,LCZ`Pw]alZP(B)	,L4^Q`^S^(B ,LT]o(B!")
+		      (documentation . "Support for Ukrainian language (with CP1251 coding system; incomplete). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+
+(set-language-info-alist
+ "Cyrillic-CP866" `((charset cyrillic-iso8859-5)
+		   (prereq-features cyrillic-codepages-setup)
+                   (coding-system cp866)
+                   (coding-priority cp866)
+                   (input-method . "cyrillic-jcuken")
+		   (nonascii-translation . cp866-nonascii-translation-table)
+                   (unibyte-display . cp866)
+		   (features cyril-util)
+                   (sample-text . "Russian (,L@caaZXY(B)       ,L7T`PRabRcYbU(B!")
+                   (documentation . "Support for Cyrillic CP866. There is little difference between this one and Cyrillic-ALT. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+(set-language-info-alist
+ "Ukrainian-CP1125" `((charset cyrillic-iso8859-5)
+		   (prereq-features cyrillic-codepages-setup)
+		      (coding-system cp1125)
+		      (coding-priority cp1125)
+		      (nonascii-translation . cp1125-nonascii-translation-table)
+		      (input-method . "cyrillic-ukrainian")
+		      (unibyte-display . cp1125)
+		   (features cyril-util)
+		   (sample-text . "Ukrainian (,LCZ`Pw]alZP(B)	,L4^Q`^S^(B ,LT]o(B!")
+		      (documentation . "Support for Ukrainian language (with CP1125 coding system). Suggests an input method from additional `leim' package. Added in ALT's distribution."))
+ '("Cyrillic"))
+
+; End of the block of CP1251, CP866 and CP1125 based language environments.
+
+; Some special X font encodings:
+
+(add-to-list 'font-ccl-encoder-alist '("koi8-c" . ccl-encode-koi8-u-font))
+; Apparantly, koi8-c is a coding for Extended (Old) Cyrillic which matches
+; koi8-u in all letter positions. Since we don't have a special coding-system
+; for it in Emacs, but there happen to be such X fonts (e.g., -val-*-koi8-c), we treat it
+; the same way as koi8-u.
+
+(add-to-list 'font-ccl-encoder-alist '("koi8-1" . ccl-encode-koi8-font))
+; koi8-1 was present in some older fonts.
+
+
 (provide 'cyrillic)
 
 ;;; arch-tag: bda71ae0-ba41-4cb6-a6e0-1dff542313d3
diff -Naur emacs/man/gnus.texi emacs.build/man/gnus.texi
--- emacs/man/gnus.texi	2005-10-22 18:16:00 +0600
+++ emacs.build/man/gnus.texi	2005-11-03 00:08:10 +0500
@@ -9446,6 +9446,7 @@
 encode using quoted-printable) or @code{t} (always use 8bit).
 @end table
 
+@cindex Ukrainian
 @cindex Russian
 @cindex koi8-r
 @cindex koi8-u
@@ -9470,14 +9471,14 @@
 This means that Russian will be encoded using @code{koi8-r} instead of
 the default @code{iso-8859-5} @acronym{MIME} charset.
 
-If you want to read messages in @code{koi8-u}, you can cheat and say
+If you want to write messages in @code{koi8-u}, you can say (no cheat:
+@code{koi8-u} is really supported now)
 
 @lisp
-(define-coding-system-alias 'koi8-u 'koi8-r)
+(put-charset-property 'cyrillic-iso8859-5
+                      'preferred-coding-system 'koi8-u)
 @end lisp
 
-This will almost do the right thing.
-
 And finally, to read charsets like @code{windows-1251}, you can say
 something like
 
diff -Naur emacs/man/mule.texi emacs.build/man/mule.texi
--- emacs/man/mule.texi	2005-08-10 21:14:33 +0600
+++ emacs.build/man/mule.texi	2005-11-03 00:08:10 +0500
@@ -9,6 +9,7 @@
 @cindex multibyte characters
 @cindex encoding of characters
 
+@cindex Belarusian
 @cindex Celtic
 @cindex Chinese
 @cindex Cyrillic
@@ -26,6 +27,7 @@
 @cindex Lao
 @cindex Latin
 @cindex Polish
+@cindex Russian
 @cindex Romanian
 @cindex Slovak
 @cindex Slovenian
@@ -33,6 +35,7 @@
 @cindex Tibetan
 @cindex Turkish
 @cindex Vietnamese
+@cindex Ukrainian
 @cindex Dutch
 @cindex Spanish
   Emacs supports a wide variety of international character sets,
@@ -243,17 +246,17 @@
 @quotation
 Belarusian, Brazilian Portuguese, Bulgarian, Chinese-BIG5,
 Chinese-CNS, Chinese-EUC-TW, Chinese-GB, Croatian, Cyrillic-ALT,
-Cyrillic-ISO, Cyrillic-KOI8, Czech, Devanagari, Dutch, English,
-Ethiopic, French, Georgian, German, Greek, Hebrew, IPA, Italian,
-Japanese, Kannada, Korean, Lao, Latin-1, Latin-2, Latin-3,
-Latin-4, Latin-5, Latin-6, Latin-7, Latin-8 (Celtic),
-Latin-9 (updated Latin-1 with the Euro sign), Latvian,
+Cyrillic-CP1251, Cyrillic-CP866, Cyrillic-ISO, Cyrillic-KOI8, Czech,
+Devanagari, Dutch, English, Ethiopic, French, Georgian, German, Greek,
+Hebrew, IPA, Italian, Japanese, Kannada, Korean, Lao, Latin-1,
+Latin-2, Latin-3, Latin-4, Latin-5, Latin-6, Latin-7, Latin-8
+(Celtic), Latin-9 (updated Latin-1 with the Euro sign), Latvian,
 Lithuanian, Malayalam, Polish, Romanian, Russian, Slovak,
 Slovenian, Spanish, Swedish, Tajik, Tamil, Thai, Tibetan,
 Turkish, UTF-8 (for a setup which prefers Unicode characters and
-files encoded in UTF-8), Ukrainian, Vietnamese, Welsh, and
-Windows-1255 (for a setup which prefers Cyrillic characters and
-files encoded in Windows-1255).
+files encoded in UTF-8), Ukrainian-CP1125, Ukrainian-CP1251,
+Vietnamese, Welsh, and Windows-1255 (for a setup which prefers
+Cyrillic characters and files encoded in Windows-1255).
 @end quotation
 
 @cindex fonts for various scripts
@@ -354,8 +357,8 @@
 
   The simplest kind of input method works by mapping @acronym{ASCII} letters
 into another alphabet; this allows you to use one other alphabet
-instead of @acronym{ASCII}.  The Greek and Russian input methods
-work this way.
+instead of @acronym{ASCII}.  The Greek and Cyrillic (e.g. Russian)
+input methods work this way.
 
   A more powerful technique is composition: converting sequences of
 characters into one letter.  Many European input methods use composition
 
design & coding: Vladimir Lettiev aka crux © 2004-2005