tenc-jpn.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
HTML git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/
DIR Log
DIR Files
DIR Refs
DIR Tags
DIR LICENSE
---
tenc-jpn.lisp (37967B)
---
1 ;;;; -*- Mode: lisp; indent-tabs-mode: nil -*-
2 ;;;
3 ;;; enc-jpn.lisp --- Japanese encodings.
4 ;;;
5
6 (in-package #:babel-encodings)
7
8 ;;;; helper functions
9 (defvar *eucjp-to-ucs-hash* (make-hash-table))
10 (defvar *ucs-to-eucjp-hash* (make-hash-table))
11 (defvar *cp932-to-ucs-hash* (make-hash-table))
12 (defvar *ucs-to-cp932-hash* (make-hash-table))
13
14 (dolist (i `((,*cp932-only*
15 ,*cp932-to-ucs-hash*
16 ,*ucs-to-cp932-hash*)
17 (,*eucjp-only*
18 ,*eucjp-to-ucs-hash*
19 ,*ucs-to-eucjp-hash*)
20 (,*eucjp*
21 ,*eucjp-to-ucs-hash*
22 ,*ucs-to-eucjp-hash*)))
23 (dolist (j (first i))
24 (setf (gethash (car j) (second i)) (cadr j))
25 (setf (gethash (cadr j) (third i)) (car j))))
26
27 (flet ((euc-cp932 (x)
28 (let ((high (ash x -16))
29 (mid (logand (ash x -8) 255))
30 (low (logand x 255)))
31 (cond ((not (zerop high))
32 nil)
33 ((= mid #x8e)
34 (logand x 255))
35 ((zerop mid)
36 x)
37 ((decf mid #xa1)
38 (decf low #x80)
39 (incf low (if (zerop (logand mid 1)) #x1f #x7e))
40 (incf low (if (<= #x7f low #x9d) 1 0))
41 (setq mid (ash mid -1))
42 (incf mid (if (<= mid #x1e) #x81 #xc1))
43 (+ (ash mid 8) low))))))
44 (dolist (i *eucjp*)
45 (let ((cp932 (euc-cp932 (first i))))
46 (when cp932
47 (setf (gethash cp932 *cp932-to-ucs-hash*) (second i))
48 (setf (gethash (second i) *ucs-to-cp932-hash*) cp932)))))
49
50 ;ascii
51 (loop for i from #x00 to #x7f do
52 (setf (gethash i *cp932-to-ucs-hash*) i)
53 (setf (gethash i *eucjp-to-ucs-hash*) i)
54 (setf (gethash i *ucs-to-eucjp-hash*) i)
55 (setf (gethash i *ucs-to-cp932-hash*) i))
56
57 ;half-width katakana
58 (loop for i from #xa1 to #xdf do
59 (setf (gethash i *cp932-to-ucs-hash*) (+ #xff61 #x-a1 i))
60 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-cp932-hash*) i)
61 (setf (gethash (+ #x8e00 i) *eucjp-to-ucs-hash*) (+ #xff61 #x-a1 i))
62 (setf (gethash (+ #xff61 #x-a1 i) *ucs-to-eucjp-hash*) (+ #x8e00 i)))
63
64 ;; This is quoted from https://support.microsoft.com/en-us/kb/170559/en-us
65 (let ((kb170559 "0x8790 -> U+2252 -> 0x81e0 Approximately Equal To Or The Image Of
66 0x8791 -> U+2261 -> 0x81df Identical To
67 0x8792 -> U+222b -> 0x81e7 Integral
68 0x8795 -> U+221a -> 0x81e3 Square Root
69 0x8796 -> U+22a5 -> 0x81db Up Tack
70 0x8797 -> U+2220 -> 0x81da Angle
71 0x879a -> U+2235 -> 0x81e6 Because
72 0x879b -> U+2229 -> 0x81bf Intersection
73 0x879c -> U+222a -> 0x81be Union
74 0xed40 -> U+7e8a -> 0xfa5c CJK Unified Ideograph
75 0xed41 -> U+891c -> 0xfa5d CJK Unified Ideograph
76 0xed42 -> U+9348 -> 0xfa5e CJK Unified Ideograph
77 0xed43 -> U+9288 -> 0xfa5f CJK Unified Ideograph
78 0xed44 -> U+84dc -> 0xfa60 CJK Unified Ideograph
79 0xed45 -> U+4fc9 -> 0xfa61 CJK Unified Ideograph
80 0xed46 -> U+70bb -> 0xfa62 CJK Unified Ideograph
81 0xed47 -> U+6631 -> 0xfa63 CJK Unified Ideograph
82 0xed48 -> U+68c8 -> 0xfa64 CJK Unified Ideograph
83 0xed49 -> U+92f9 -> 0xfa65 CJK Unified Ideograph
84 0xed4a -> U+66fb -> 0xfa66 CJK Unified Ideograph
85 0xed4b -> U+5f45 -> 0xfa67 CJK Unified Ideograph
86 0xed4c -> U+4e28 -> 0xfa68 CJK Unified Ideograph
87 0xed4d -> U+4ee1 -> 0xfa69 CJK Unified Ideograph
88 0xed4e -> U+4efc -> 0xfa6a CJK Unified Ideograph
89 0xed4f -> U+4f00 -> 0xfa6b CJK Unified Ideograph
90 0xed50 -> U+4f03 -> 0xfa6c CJK Unified Ideograph
91 0xed51 -> U+4f39 -> 0xfa6d CJK Unified Ideograph
92 0xed52 -> U+4f56 -> 0xfa6e CJK Unified Ideograph
93 0xed53 -> U+4f92 -> 0xfa6f CJK Unified Ideograph
94 0xed54 -> U+4f8a -> 0xfa70 CJK Unified Ideograph
95 0xed55 -> U+4f9a -> 0xfa71 CJK Unified Ideograph
96 0xed56 -> U+4f94 -> 0xfa72 CJK Unified Ideograph
97 0xed57 -> U+4fcd -> 0xfa73 CJK Unified Ideograph
98 0xed58 -> U+5040 -> 0xfa74 CJK Unified Ideograph
99 0xed59 -> U+5022 -> 0xfa75 CJK Unified Ideograph
100 0xed5a -> U+4fff -> 0xfa76 CJK Unified Ideograph
101 0xed5b -> U+501e -> 0xfa77 CJK Unified Ideograph
102 0xed5c -> U+5046 -> 0xfa78 CJK Unified Ideograph
103 0xed5d -> U+5070 -> 0xfa79 CJK Unified Ideograph
104 0xed5e -> U+5042 -> 0xfa7a CJK Unified Ideograph
105 0xed5f -> U+5094 -> 0xfa7b CJK Unified Ideograph
106 0xed60 -> U+50f4 -> 0xfa7c CJK Unified Ideograph
107 0xed61 -> U+50d8 -> 0xfa7d CJK Unified Ideograph
108 0xed62 -> U+514a -> 0xfa7e CJK Unified Ideograph
109 0xed63 -> U+5164 -> 0xfa80 CJK Unified Ideograph
110 0xed64 -> U+519d -> 0xfa81 CJK Unified Ideograph
111 0xed65 -> U+51be -> 0xfa82 CJK Unified Ideograph
112 0xed66 -> U+51ec -> 0xfa83 CJK Unified Ideograph
113 0xed67 -> U+5215 -> 0xfa84 CJK Unified Ideograph
114 0xed68 -> U+529c -> 0xfa85 CJK Unified Ideograph
115 0xed69 -> U+52a6 -> 0xfa86 CJK Unified Ideograph
116 0xed6a -> U+52c0 -> 0xfa87 CJK Unified Ideograph
117 0xed6b -> U+52db -> 0xfa88 CJK Unified Ideograph
118 0xed6c -> U+5300 -> 0xfa89 CJK Unified Ideograph
119 0xed6d -> U+5307 -> 0xfa8a CJK Unified Ideograph
120 0xed6e -> U+5324 -> 0xfa8b CJK Unified Ideograph
121 0xed6f -> U+5372 -> 0xfa8c CJK Unified Ideograph
122 0xed70 -> U+5393 -> 0xfa8d CJK Unified Ideograph
123 0xed71 -> U+53b2 -> 0xfa8e CJK Unified Ideograph
124 0xed72 -> U+53dd -> 0xfa8f CJK Unified Ideograph
125 0xed73 -> U+fa0e -> 0xfa90 CJK compatibility Ideograph
126 0xed74 -> U+549c -> 0xfa91 CJK Unified Ideograph
127 0xed75 -> U+548a -> 0xfa92 CJK Unified Ideograph
128 0xed76 -> U+54a9 -> 0xfa93 CJK Unified Ideograph
129 0xed77 -> U+54ff -> 0xfa94 CJK Unified Ideograph
130 0xed78 -> U+5586 -> 0xfa95 CJK Unified Ideograph
131 0xed79 -> U+5759 -> 0xfa96 CJK Unified Ideograph
132 0xed7a -> U+5765 -> 0xfa97 CJK Unified Ideograph
133 0xed7b -> U+57ac -> 0xfa98 CJK Unified Ideograph
134 0xed7c -> U+57c8 -> 0xfa99 CJK Unified Ideograph
135 0xed7d -> U+57c7 -> 0xfa9a CJK Unified Ideograph
136 0xed7e -> U+fa0f -> 0xfa9b CJK compatibility Ideograph
137 0xed80 -> U+fa10 -> 0xfa9c CJK compatibility Ideograph
138 0xed81 -> U+589e -> 0xfa9d CJK Unified Ideograph
139 0xed82 -> U+58b2 -> 0xfa9e CJK Unified Ideograph
140 0xed83 -> U+590b -> 0xfa9f CJK Unified Ideograph
141 0xed84 -> U+5953 -> 0xfaa0 CJK Unified Ideograph
142 0xed85 -> U+595b -> 0xfaa1 CJK Unified Ideograph
143 0xed86 -> U+595d -> 0xfaa2 CJK Unified Ideograph
144 0xed87 -> U+5963 -> 0xfaa3 CJK Unified Ideograph
145 0xed88 -> U+59a4 -> 0xfaa4 CJK Unified Ideograph
146 0xed89 -> U+59ba -> 0xfaa5 CJK Unified Ideograph
147 0xed8a -> U+5b56 -> 0xfaa6 CJK Unified Ideograph
148 0xed8b -> U+5bc0 -> 0xfaa7 CJK Unified Ideograph
149 0xed8c -> U+752f -> 0xfaa8 CJK Unified Ideograph
150 0xed8d -> U+5bd8 -> 0xfaa9 CJK Unified Ideograph
151 0xed8e -> U+5bec -> 0xfaaa CJK Unified Ideograph
152 0xed8f -> U+5c1e -> 0xfaab CJK Unified Ideograph
153 0xed90 -> U+5ca6 -> 0xfaac CJK Unified Ideograph
154 0xed91 -> U+5cba -> 0xfaad CJK Unified Ideograph
155 0xed92 -> U+5cf5 -> 0xfaae CJK Unified Ideograph
156 0xed93 -> U+5d27 -> 0xfaaf CJK Unified Ideograph
157 0xed94 -> U+5d53 -> 0xfab0 CJK Unified Ideograph
158 0xed95 -> U+fa11 -> 0xfab1 CJK compatibility Ideograph
159 0xed96 -> U+5d42 -> 0xfab2 CJK Unified Ideograph
160 0xed97 -> U+5d6d -> 0xfab3 CJK Unified Ideograph
161 0xed98 -> U+5db8 -> 0xfab4 CJK Unified Ideograph
162 0xed99 -> U+5db9 -> 0xfab5 CJK Unified Ideograph
163 0xed9a -> U+5dd0 -> 0xfab6 CJK Unified Ideograph
164 0xed9b -> U+5f21 -> 0xfab7 CJK Unified Ideograph
165 0xed9c -> U+5f34 -> 0xfab8 CJK Unified Ideograph
166 0xed9d -> U+5f67 -> 0xfab9 CJK Unified Ideograph
167 0xed9e -> U+5fb7 -> 0xfaba CJK Unified Ideograph
168 0xed9f -> U+5fde -> 0xfabb CJK Unified Ideograph
169 0xeda0 -> U+605d -> 0xfabc CJK Unified Ideograph
170 0xeda1 -> U+6085 -> 0xfabd CJK Unified Ideograph
171 0xeda2 -> U+608a -> 0xfabe CJK Unified Ideograph
172 0xeda3 -> U+60de -> 0xfabf CJK Unified Ideograph
173 0xeda4 -> U+60d5 -> 0xfac0 CJK Unified Ideograph
174 0xeda5 -> U+6120 -> 0xfac1 CJK Unified Ideograph
175 0xeda6 -> U+60f2 -> 0xfac2 CJK Unified Ideograph
176 0xeda7 -> U+6111 -> 0xfac3 CJK Unified Ideograph
177 0xeda8 -> U+6137 -> 0xfac4 CJK Unified Ideograph
178 0xeda9 -> U+6130 -> 0xfac5 CJK Unified Ideograph
179 0xedaa -> U+6198 -> 0xfac6 CJK Unified Ideograph
180 0xedab -> U+6213 -> 0xfac7 CJK Unified Ideograph
181 0xedac -> U+62a6 -> 0xfac8 CJK Unified Ideograph
182 0xedad -> U+63f5 -> 0xfac9 CJK Unified Ideograph
183 0xedae -> U+6460 -> 0xfaca CJK Unified Ideograph
184 0xedaf -> U+649d -> 0xfacb CJK Unified Ideograph
185 0xedb0 -> U+64ce -> 0xfacc CJK Unified Ideograph
186 0xedb1 -> U+654e -> 0xfacd CJK Unified Ideograph
187 0xedb2 -> U+6600 -> 0xface CJK Unified Ideograph
188 0xedb3 -> U+6615 -> 0xfacf CJK Unified Ideograph
189 0xedb4 -> U+663b -> 0xfad0 CJK Unified Ideograph
190 0xedb5 -> U+6609 -> 0xfad1 CJK Unified Ideograph
191 0xedb6 -> U+662e -> 0xfad2 CJK Unified Ideograph
192 0xedb7 -> U+661e -> 0xfad3 CJK Unified Ideograph
193 0xedb8 -> U+6624 -> 0xfad4 CJK Unified Ideograph
194 0xedb9 -> U+6665 -> 0xfad5 CJK Unified Ideograph
195 0xedba -> U+6657 -> 0xfad6 CJK Unified Ideograph
196 0xedbb -> U+6659 -> 0xfad7 CJK Unified Ideograph
197 0xedbc -> U+fa12 -> 0xfad8 CJK compatibility Ideograph
198 0xedbd -> U+6673 -> 0xfad9 CJK Unified Ideograph
199 0xedbe -> U+6699 -> 0xfada CJK Unified Ideograph
200 0xedbf -> U+66a0 -> 0xfadb CJK Unified Ideograph
201 0xedc0 -> U+66b2 -> 0xfadc CJK Unified Ideograph
202 0xedc1 -> U+66bf -> 0xfadd CJK Unified Ideograph
203 0xedc2 -> U+66fa -> 0xfade CJK Unified Ideograph
204 0xedc3 -> U+670e -> 0xfadf CJK Unified Ideograph
205 0xedc4 -> U+f929 -> 0xfae0 CJK compatibility Ideograph
206 0xedc5 -> U+6766 -> 0xfae1 CJK Unified Ideograph
207 0xedc6 -> U+67bb -> 0xfae2 CJK Unified Ideograph
208 0xedc7 -> U+6852 -> 0xfae3 CJK Unified Ideograph
209 0xedc8 -> U+67c0 -> 0xfae4 CJK Unified Ideograph
210 0xedc9 -> U+6801 -> 0xfae5 CJK Unified Ideograph
211 0xedca -> U+6844 -> 0xfae6 CJK Unified Ideograph
212 0xedcb -> U+68cf -> 0xfae7 CJK Unified Ideograph
213 0xedcc -> U+fa13 -> 0xfae8 CJK compatibility Ideograph
214 0xedcd -> U+6968 -> 0xfae9 CJK Unified Ideograph
215 0xedce -> U+fa14 -> 0xfaea CJK compatibility Ideograph
216 0xedcf -> U+6998 -> 0xfaeb CJK Unified Ideograph
217 0xedd0 -> U+69e2 -> 0xfaec CJK Unified Ideograph
218 0xedd1 -> U+6a30 -> 0xfaed CJK Unified Ideograph
219 0xedd2 -> U+6a6b -> 0xfaee CJK Unified Ideograph
220 0xedd3 -> U+6a46 -> 0xfaef CJK Unified Ideograph
221 0xedd4 -> U+6a73 -> 0xfaf0 CJK Unified Ideograph
222 0xedd5 -> U+6a7e -> 0xfaf1 CJK Unified Ideograph
223 0xedd6 -> U+6ae2 -> 0xfaf2 CJK Unified Ideograph
224 0xedd7 -> U+6ae4 -> 0xfaf3 CJK Unified Ideograph
225 0xedd8 -> U+6bd6 -> 0xfaf4 CJK Unified Ideograph
226 0xedd9 -> U+6c3f -> 0xfaf5 CJK Unified Ideograph
227 0xedda -> U+6c5c -> 0xfaf6 CJK Unified Ideograph
228 0xeddb -> U+6c86 -> 0xfaf7 CJK Unified Ideograph
229 0xeddc -> U+6c6f -> 0xfaf8 CJK Unified Ideograph
230 0xeddd -> U+6cda -> 0xfaf9 CJK Unified Ideograph
231 0xedde -> U+6d04 -> 0xfafa CJK Unified Ideograph
232 0xeddf -> U+6d87 -> 0xfafb CJK Unified Ideograph
233 0xede0 -> U+6d6f -> 0xfafc CJK Unified Ideograph
234 0xede1 -> U+6d96 -> 0xfb40 CJK Unified Ideograph
235 0xede2 -> U+6dac -> 0xfb41 CJK Unified Ideograph
236 0xede3 -> U+6dcf -> 0xfb42 CJK Unified Ideograph
237 0xede4 -> U+6df8 -> 0xfb43 CJK Unified Ideograph
238 0xede5 -> U+6df2 -> 0xfb44 CJK Unified Ideograph
239 0xede6 -> U+6dfc -> 0xfb45 CJK Unified Ideograph
240 0xede7 -> U+6e39 -> 0xfb46 CJK Unified Ideograph
241 0xede8 -> U+6e5c -> 0xfb47 CJK Unified Ideograph
242 0xede9 -> U+6e27 -> 0xfb48 CJK Unified Ideograph
243 0xedea -> U+6e3c -> 0xfb49 CJK Unified Ideograph
244 0xedeb -> U+6ebf -> 0xfb4a CJK Unified Ideograph
245 0xedec -> U+6f88 -> 0xfb4b CJK Unified Ideograph
246 0xeded -> U+6fb5 -> 0xfb4c CJK Unified Ideograph
247 0xedee -> U+6ff5 -> 0xfb4d CJK Unified Ideograph
248 0xedef -> U+7005 -> 0xfb4e CJK Unified Ideograph
249 0xedf0 -> U+7007 -> 0xfb4f CJK Unified Ideograph
250 0xedf1 -> U+7028 -> 0xfb50 CJK Unified Ideograph
251 0xedf2 -> U+7085 -> 0xfb51 CJK Unified Ideograph
252 0xedf3 -> U+70ab -> 0xfb52 CJK Unified Ideograph
253 0xedf4 -> U+710f -> 0xfb53 CJK Unified Ideograph
254 0xedf5 -> U+7104 -> 0xfb54 CJK Unified Ideograph
255 0xedf6 -> U+715c -> 0xfb55 CJK Unified Ideograph
256 0xedf7 -> U+7146 -> 0xfb56 CJK Unified Ideograph
257 0xedf8 -> U+7147 -> 0xfb57 CJK Unified Ideograph
258 0xedf9 -> U+fa15 -> 0xfb58 CJK compatibility Ideograph
259 0xedfa -> U+71c1 -> 0xfb59 CJK Unified Ideograph
260 0xedfb -> U+71fe -> 0xfb5a CJK Unified Ideograph
261 0xedfc -> U+72b1 -> 0xfb5b CJK Unified Ideograph
262 0xee40 -> U+72be -> 0xfb5c CJK Unified Ideograph
263 0xee41 -> U+7324 -> 0xfb5d CJK Unified Ideograph
264 0xee42 -> U+fa16 -> 0xfb5e CJK compatibility Ideograph
265 0xee43 -> U+7377 -> 0xfb5f CJK Unified Ideograph
266 0xee44 -> U+73bd -> 0xfb60 CJK Unified Ideograph
267 0xee45 -> U+73c9 -> 0xfb61 CJK Unified Ideograph
268 0xee46 -> U+73d6 -> 0xfb62 CJK Unified Ideograph
269 0xee47 -> U+73e3 -> 0xfb63 CJK Unified Ideograph
270 0xee48 -> U+73d2 -> 0xfb64 CJK Unified Ideograph
271 0xee49 -> U+7407 -> 0xfb65 CJK Unified Ideograph
272 0xee4a -> U+73f5 -> 0xfb66 CJK Unified Ideograph
273 0xee4b -> U+7426 -> 0xfb67 CJK Unified Ideograph
274 0xee4c -> U+742a -> 0xfb68 CJK Unified Ideograph
275 0xee4d -> U+7429 -> 0xfb69 CJK Unified Ideograph
276 0xee4e -> U+742e -> 0xfb6a CJK Unified Ideograph
277 0xee4f -> U+7462 -> 0xfb6b CJK Unified Ideograph
278 0xee50 -> U+7489 -> 0xfb6c CJK Unified Ideograph
279 0xee51 -> U+749f -> 0xfb6d CJK Unified Ideograph
280 0xee52 -> U+7501 -> 0xfb6e CJK Unified Ideograph
281 0xee53 -> U+756f -> 0xfb6f CJK Unified Ideograph
282 0xee54 -> U+7682 -> 0xfb70 CJK Unified Ideograph
283 0xee55 -> U+769c -> 0xfb71 CJK Unified Ideograph
284 0xee56 -> U+769e -> 0xfb72 CJK Unified Ideograph
285 0xee57 -> U+769b -> 0xfb73 CJK Unified Ideograph
286 0xee58 -> U+76a6 -> 0xfb74 CJK Unified Ideograph
287 0xee59 -> U+fa17 -> 0xfb75 CJK compatibility Ideograph
288 0xee5a -> U+7746 -> 0xfb76 CJK Unified Ideograph
289 0xee5b -> U+52af -> 0xfb77 CJK Unified Ideograph
290 0xee5c -> U+7821 -> 0xfb78 CJK Unified Ideograph
291 0xee5d -> U+784e -> 0xfb79 CJK Unified Ideograph
292 0xee5e -> U+7864 -> 0xfb7a CJK Unified Ideograph
293 0xee5f -> U+787a -> 0xfb7b CJK Unified Ideograph
294 0xee60 -> U+7930 -> 0xfb7c CJK Unified Ideograph
295 0xee61 -> U+fa18 -> 0xfb7d CJK compatibility Ideograph
296 0xee62 -> U+fa19 -> 0xfb7e CJK compatibility Ideograph
297 0xee63 -> U+fa1a -> 0xfb80 CJK compatibility Ideograph
298 0xee64 -> U+7994 -> 0xfb81 CJK Unified Ideograph
299 0xee65 -> U+fa1b -> 0xfb82 CJK compatibility Ideograph
300 0xee66 -> U+799b -> 0xfb83 CJK Unified Ideograph
301 0xee67 -> U+7ad1 -> 0xfb84 CJK Unified Ideograph
302 0xee68 -> U+7ae7 -> 0xfb85 CJK Unified Ideograph
303 0xee69 -> U+fa1c -> 0xfb86 CJK compatibility Ideograph
304 0xee6a -> U+7aeb -> 0xfb87 CJK Unified Ideograph
305 0xee6b -> U+7b9e -> 0xfb88 CJK Unified Ideograph
306 0xee6c -> U+fa1d -> 0xfb89 CJK compatibility Ideograph
307 0xee6d -> U+7d48 -> 0xfb8a CJK Unified Ideograph
308 0xee6e -> U+7d5c -> 0xfb8b CJK Unified Ideograph
309 0xee6f -> U+7db7 -> 0xfb8c CJK Unified Ideograph
310 0xee70 -> U+7da0 -> 0xfb8d CJK Unified Ideograph
311 0xee71 -> U+7dd6 -> 0xfb8e CJK Unified Ideograph
312 0xee72 -> U+7e52 -> 0xfb8f CJK Unified Ideograph
313 0xee73 -> U+7f47 -> 0xfb90 CJK Unified Ideograph
314 0xee74 -> U+7fa1 -> 0xfb91 CJK Unified Ideograph
315 0xee75 -> U+fa1e -> 0xfb92 CJK compatibility Ideograph
316 0xee76 -> U+8301 -> 0xfb93 CJK Unified Ideograph
317 0xee77 -> U+8362 -> 0xfb94 CJK Unified Ideograph
318 0xee78 -> U+837f -> 0xfb95 CJK Unified Ideograph
319 0xee79 -> U+83c7 -> 0xfb96 CJK Unified Ideograph
320 0xee7a -> U+83f6 -> 0xfb97 CJK Unified Ideograph
321 0xee7b -> U+8448 -> 0xfb98 CJK Unified Ideograph
322 0xee7c -> U+84b4 -> 0xfb99 CJK Unified Ideograph
323 0xee7d -> U+8553 -> 0xfb9a CJK Unified Ideograph
324 0xee7e -> U+8559 -> 0xfb9b CJK Unified Ideograph
325 0xee80 -> U+856b -> 0xfb9c CJK Unified Ideograph
326 0xee81 -> U+fa1f -> 0xfb9d CJK compatibility Ideograph
327 0xee82 -> U+85b0 -> 0xfb9e CJK Unified Ideograph
328 0xee83 -> U+fa20 -> 0xfb9f CJK compatibility Ideograph
329 0xee84 -> U+fa21 -> 0xfba0 CJK compatibility Ideograph
330 0xee85 -> U+8807 -> 0xfba1 CJK Unified Ideograph
331 0xee86 -> U+88f5 -> 0xfba2 CJK Unified Ideograph
332 0xee87 -> U+8a12 -> 0xfba3 CJK Unified Ideograph
333 0xee88 -> U+8a37 -> 0xfba4 CJK Unified Ideograph
334 0xee89 -> U+8a79 -> 0xfba5 CJK Unified Ideograph
335 0xee8a -> U+8aa7 -> 0xfba6 CJK Unified Ideograph
336 0xee8b -> U+8abe -> 0xfba7 CJK Unified Ideograph
337 0xee8c -> U+8adf -> 0xfba8 CJK Unified Ideograph
338 0xee8d -> U+fa22 -> 0xfba9 CJK compatibility Ideograph
339 0xee8e -> U+8af6 -> 0xfbaa CJK Unified Ideograph
340 0xee8f -> U+8b53 -> 0xfbab CJK Unified Ideograph
341 0xee90 -> U+8b7f -> 0xfbac CJK Unified Ideograph
342 0xee91 -> U+8cf0 -> 0xfbad CJK Unified Ideograph
343 0xee92 -> U+8cf4 -> 0xfbae CJK Unified Ideograph
344 0xee93 -> U+8d12 -> 0xfbaf CJK Unified Ideograph
345 0xee94 -> U+8d76 -> 0xfbb0 CJK Unified Ideograph
346 0xee95 -> U+fa23 -> 0xfbb1 CJK compatibility Ideograph
347 0xee96 -> U+8ecf -> 0xfbb2 CJK Unified Ideograph
348 0xee97 -> U+fa24 -> 0xfbb3 CJK compatibility Ideograph
349 0xee98 -> U+fa25 -> 0xfbb4 CJK compatibility Ideograph
350 0xee99 -> U+9067 -> 0xfbb5 CJK Unified Ideograph
351 0xee9a -> U+90de -> 0xfbb6 CJK Unified Ideograph
352 0xee9b -> U+fa26 -> 0xfbb7 CJK compatibility Ideograph
353 0xee9c -> U+9115 -> 0xfbb8 CJK Unified Ideograph
354 0xee9d -> U+9127 -> 0xfbb9 CJK Unified Ideograph
355 0xee9e -> U+91da -> 0xfbba CJK Unified Ideograph
356 0xee9f -> U+91d7 -> 0xfbbb CJK Unified Ideograph
357 0xeea0 -> U+91de -> 0xfbbc CJK Unified Ideograph
358 0xeea1 -> U+91ed -> 0xfbbd CJK Unified Ideograph
359 0xeea2 -> U+91ee -> 0xfbbe CJK Unified Ideograph
360 0xeea3 -> U+91e4 -> 0xfbbf CJK Unified Ideograph
361 0xeea4 -> U+91e5 -> 0xfbc0 CJK Unified Ideograph
362 0xeea5 -> U+9206 -> 0xfbc1 CJK Unified Ideograph
363 0xeea6 -> U+9210 -> 0xfbc2 CJK Unified Ideograph
364 0xeea7 -> U+920a -> 0xfbc3 CJK Unified Ideograph
365 0xeea8 -> U+923a -> 0xfbc4 CJK Unified Ideograph
366 0xeea9 -> U+9240 -> 0xfbc5 CJK Unified Ideograph
367 0xeeaa -> U+923c -> 0xfbc6 CJK Unified Ideograph
368 0xeeab -> U+924e -> 0xfbc7 CJK Unified Ideograph
369 0xeeac -> U+9259 -> 0xfbc8 CJK Unified Ideograph
370 0xeead -> U+9251 -> 0xfbc9 CJK Unified Ideograph
371 0xeeae -> U+9239 -> 0xfbca CJK Unified Ideograph
372 0xeeaf -> U+9267 -> 0xfbcb CJK Unified Ideograph
373 0xeeb0 -> U+92a7 -> 0xfbcc CJK Unified Ideograph
374 0xeeb1 -> U+9277 -> 0xfbcd CJK Unified Ideograph
375 0xeeb2 -> U+9278 -> 0xfbce CJK Unified Ideograph
376 0xeeb3 -> U+92e7 -> 0xfbcf CJK Unified Ideograph
377 0xeeb4 -> U+92d7 -> 0xfbd0 CJK Unified Ideograph
378 0xeeb5 -> U+92d9 -> 0xfbd1 CJK Unified Ideograph
379 0xeeb6 -> U+92d0 -> 0xfbd2 CJK Unified Ideograph
380 0xeeb7 -> U+fa27 -> 0xfbd3 CJK compatibility Ideograph
381 0xeeb8 -> U+92d5 -> 0xfbd4 CJK Unified Ideograph
382 0xeeb9 -> U+92e0 -> 0xfbd5 CJK Unified Ideograph
383 0xeeba -> U+92d3 -> 0xfbd6 CJK Unified Ideograph
384 0xeebb -> U+9325 -> 0xfbd7 CJK Unified Ideograph
385 0xeebc -> U+9321 -> 0xfbd8 CJK Unified Ideograph
386 0xeebd -> U+92fb -> 0xfbd9 CJK Unified Ideograph
387 0xeebe -> U+fa28 -> 0xfbda CJK compatibility Ideograph
388 0xeebf -> U+931e -> 0xfbdb CJK Unified Ideograph
389 0xeec0 -> U+92ff -> 0xfbdc CJK Unified Ideograph
390 0xeec1 -> U+931d -> 0xfbdd CJK Unified Ideograph
391 0xeec2 -> U+9302 -> 0xfbde CJK Unified Ideograph
392 0xeec3 -> U+9370 -> 0xfbdf CJK Unified Ideograph
393 0xeec4 -> U+9357 -> 0xfbe0 CJK Unified Ideograph
394 0xeec5 -> U+93a4 -> 0xfbe1 CJK Unified Ideograph
395 0xeec6 -> U+93c6 -> 0xfbe2 CJK Unified Ideograph
396 0xeec7 -> U+93de -> 0xfbe3 CJK Unified Ideograph
397 0xeec8 -> U+93f8 -> 0xfbe4 CJK Unified Ideograph
398 0xeec9 -> U+9431 -> 0xfbe5 CJK Unified Ideograph
399 0xeeca -> U+9445 -> 0xfbe6 CJK Unified Ideograph
400 0xeecb -> U+9448 -> 0xfbe7 CJK Unified Ideograph
401 0xeecc -> U+9592 -> 0xfbe8 CJK Unified Ideograph
402 0xeecd -> U+f9dc -> 0xfbe9 CJK compatibility Ideograph
403 0xeece -> U+fa29 -> 0xfbea CJK compatibility Ideograph
404 0xeecf -> U+969d -> 0xfbeb CJK Unified Ideograph
405 0xeed0 -> U+96af -> 0xfbec CJK Unified Ideograph
406 0xeed1 -> U+9733 -> 0xfbed CJK Unified Ideograph
407 0xeed2 -> U+973b -> 0xfbee CJK Unified Ideograph
408 0xeed3 -> U+9743 -> 0xfbef CJK Unified Ideograph
409 0xeed4 -> U+974d -> 0xfbf0 CJK Unified Ideograph
410 0xeed5 -> U+974f -> 0xfbf1 CJK Unified Ideograph
411 0xeed6 -> U+9751 -> 0xfbf2 CJK Unified Ideograph
412 0xeed7 -> U+9755 -> 0xfbf3 CJK Unified Ideograph
413 0xeed8 -> U+9857 -> 0xfbf4 CJK Unified Ideograph
414 0xeed9 -> U+9865 -> 0xfbf5 CJK Unified Ideograph
415 0xeeda -> U+fa2a -> 0xfbf6 CJK compatibility Ideograph
416 0xeedb -> U+fa2b -> 0xfbf7 CJK compatibility Ideograph
417 0xeedc -> U+9927 -> 0xfbf8 CJK Unified Ideograph
418 0xeedd -> U+fa2c -> 0xfbf9 CJK compatibility Ideograph
419 0xeede -> U+999e -> 0xfbfa CJK Unified Ideograph
420 0xeedf -> U+9a4e -> 0xfbfb CJK Unified Ideograph
421 0xeee0 -> U+9ad9 -> 0xfbfc CJK Unified Ideograph
422 0xeee1 -> U+9adc -> 0xfc40 CJK Unified Ideograph
423 0xeee2 -> U+9b75 -> 0xfc41 CJK Unified Ideograph
424 0xeee3 -> U+9b72 -> 0xfc42 CJK Unified Ideograph
425 0xeee4 -> U+9b8f -> 0xfc43 CJK Unified Ideograph
426 0xeee5 -> U+9bb1 -> 0xfc44 CJK Unified Ideograph
427 0xeee6 -> U+9bbb -> 0xfc45 CJK Unified Ideograph
428 0xeee7 -> U+9c00 -> 0xfc46 CJK Unified Ideograph
429 0xeee8 -> U+9d70 -> 0xfc47 CJK Unified Ideograph
430 0xeee9 -> U+9d6b -> 0xfc48 CJK Unified Ideograph
431 0xeeea -> U+fa2d -> 0xfc49 CJK compatibility Ideograph
432 0xeeeb -> U+9e19 -> 0xfc4a CJK Unified Ideograph
433 0xeeec -> U+9ed1 -> 0xfc4b CJK Unified Ideograph
434 0xeeef -> U+2170 -> 0xfa40 Small Roman Numeral One
435 0xeef0 -> U+2171 -> 0xfa41 Small Roman Numeral Two
436 0xeef1 -> U+2172 -> 0xfa42 Small Roman Numeral Three
437 0xeef2 -> U+2173 -> 0xfa43 Small Roman Numeral Four
438 0xeef3 -> U+2174 -> 0xfa44 Small Roman Numeral Five
439 0xeef4 -> U+2175 -> 0xfa45 Small Roman Numeral Six
440 0xeef5 -> U+2176 -> 0xfa46 Small Roman Numeral Seven
441 0xeef6 -> U+2177 -> 0xfa47 Small Roman Numeral Eight
442 0xeef7 -> U+2178 -> 0xfa48 Small Roman Numeral Nine
443 0xeef8 -> U+2179 -> 0xfa49 Small Roman Numeral Ten
444 0xeef9 -> U+ffe2 -> 0x81ca Fullwidth Not Sign
445 0xeefa -> U+ffe4 -> 0xfa55 Fullwidth Broken Bar
446 0xeefb -> U+ff07 -> 0xfa56 Fullwidth Apostrophe
447 0xeefc -> U+ff02 -> 0xfa57 Fullwidth Quotation Mark
448 0xfa4a -> U+2160 -> 0x8754 Roman Numeral One
449 0xfa4b -> U+2161 -> 0x8755 Roman Numeral Two
450 0xfa4c -> U+2162 -> 0x8756 Roman Numeral Three
451 0xfa4d -> U+2163 -> 0x8757 Roman Numeral Four
452 0xfa4e -> U+2164 -> 0x8758 Roman Numeral Five
453 0xfa4f -> U+2165 -> 0x8759 Roman Numeral Six
454 0xfa50 -> U+2166 -> 0x875a Roman Numeral Seven
455 0xfa51 -> U+2167 -> 0x875b Roman Numeral Eight
456 0xfa52 -> U+2168 -> 0x875c Roman Numeral Nine
457 0xfa53 -> U+2169 -> 0x875d Roman Numeral Ten
458 0xfa54 -> U+ffe2 -> 0x81ca Fullwidth Not Sign
459 0xfa58 -> U+3231 -> 0x878a Parenthesized Ideograph Stock
460 0xfa59 -> U+2116 -> 0x8782 Numero Sign
461 0xfa5a -> U+2121 -> 0x8784 Telephone Sign
462 0xfa5b -> U+2235 -> 0x81e6 Because"))
463 (with-input-from-string (s kb170559)
464 (loop for line = (read-line s nil) until (null line)
465 do (let ((ucs (parse-integer (subseq line 14 18) :radix 16))
466 (cp932 (parse-integer (subseq line 26 30) :radix 16)))
467 (setf (gethash ucs *ucs-to-cp932-hash*) cp932)))))
468
469 (defun eucjp-to-ucs (code)
470 (values (gethash code *eucjp-to-ucs-hash*)))
471
472 (defun ucs-to-eucjp (code)
473 (values (gethash code *ucs-to-eucjp-hash*)))
474
475 (defun cp932-to-ucs (code)
476 (values (gethash code *cp932-to-ucs-hash*)))
477
478 (defun ucs-to-cp932 (code)
479 (values (gethash code *ucs-to-cp932-hash*)))
480
481 ;;;; EUC-JP
482
483 (define-character-encoding :eucjp
484 "An 8-bit, variable-length character encoding in which
485 character code points in the range #x00-#x7f can be encoded in a
486 single octet; characters with larger code values can be encoded
487 in 2 to 3 bytes."
488 :max-units-per-char 3
489 :literal-char-code-limit #x80)
490
491
492 (define-octet-counter :eucjp (getter type)
493 `(named-lambda eucjp-octet-counter (seq start end max)
494 (declare (type ,type seq) (fixnum start end max))
495 (loop with noctets fixnum = 0
496 for i fixnum from start below end
497 for code of-type code-point = (,getter seq i)
498 do (let* ((c (ucs-to-eucjp code))
499 (new (+ (cond ((< #xffff c) 3)
500 ((< #xff c) 2)
501 (t 1))
502 noctets)))
503 (if (and (plusp max) (> new max))
504 (loop-finish)
505 (setq noctets new)))
506 finally (return (values noctets i)))))
507
508 (define-code-point-counter :eucjp (getter type)
509 `(named-lambda eucjp-code-point-counter (seq start end max)
510 (declare (type ,type seq) (fixnum start end max))
511 (loop with nchars fixnum = 0
512 with i fixnum = start
513 while (< i end) do
514 (let* ((octet (,getter seq i))
515 (next-i (+ i (cond ((= #x8f octet) 3)
516 ((or (< #xa0 octet #xff)
517 (= #x8e octet)) 2)
518 (t 1)))))
519 (declare (type ub8 octet) (fixnum next-i))
520 (cond ((> next-i end)
521 ;; Should we add restarts to this error, we'll have
522 ;; to figure out a way to communicate with the
523 ;; decoder since we probably want to do something
524 ;; about it right here when we have a chance to
525 ;; change the count or something. (Like an
526 ;; alternative replacement character or perhaps the
527 ;; existence of this error so that the decoder
528 ;; doesn't have to check for it on every iteration
529 ;; like we do.)
530 ;;
531 ;; FIXME: The data for this error is not right.
532 (decoding-error (vector octet) :eucjp seq i
533 nil 'end-of-input-in-character)
534 (return (values (1+ nchars) end)))
535 (t
536 (setq nchars (1+ nchars)
537 i next-i)
538 (when (and (plusp max) (= nchars max))
539 (return (values nchars i))))))
540 finally (progn (assert (= i end))
541 (return (values nchars i))))))
542
543 (define-encoder :eucjp (getter src-type setter dest-type)
544 `(named-lambda eucjp-encoder (src start end dest d-start)
545 (declare (type ,src-type src)
546 (type ,dest-type dest)
547 (fixnum start end d-start))
548 (loop with di fixnum = d-start
549 for i fixnum from start below end
550 for code of-type code-point = (,getter src i)
551 for eucjp of-type code-point
552 = (ucs-to-eucjp code) do
553 (macrolet ((set-octet (offset value)
554 `(,',setter ,value dest (the fixnum (+ di ,offset)))))
555 (cond
556 ;; 1 octet
557 ((< eucjp #x100)
558 (set-octet 0 eucjp)
559 (incf di))
560 ;; 2 octets
561 ((< eucjp #x10000)
562 (set-octet 0 (f-logand #xff (f-ash eucjp -8)))
563 (set-octet 1 (logand eucjp #xff))
564 (incf di 2))
565 ;; 3 octets
566 (t
567 (set-octet 0 (f-logand #xff (f-ash eucjp -16)))
568 (set-octet 1 (f-logand #xff (f-ash eucjp -8)))
569 (set-octet 2 (logand eucjp #xff))
570 (incf di 3))
571 ))
572 finally (return (the fixnum (- di d-start))))))
573
574
575 (define-decoder :eucjp (getter src-type setter dest-type)
576 `(named-lambda eucjp-decoder (src start end dest d-start)
577 (declare (type ,src-type src)
578 (type ,dest-type dest)
579 (fixnum start end d-start))
580 (let ((u2 0))
581 (declare (type ub8 u2))
582 (loop for di fixnum from d-start
583 for i fixnum from start below end
584 for u1 of-type ub8 = (,getter src i) do
585 ;; Note: CONSUME-OCTET doesn't check if I is being
586 ;; incremented past END. We're assuming that END has
587 ;; been calculated with the CODE-POINT-POINTER above that
588 ;; checks this.
589 (macrolet
590 ((consume-octet ()
591 `(let ((next-i (incf i)))
592 (if (= next-i end)
593 ;; FIXME: data for this error is incomplete.
594 ;; and signalling this error twice
595 (return-from setter-block
596 (decoding-error nil :eucjp src i +repl+
597 'end-of-input-in-character))
598 (,',getter src next-i))))
599 (handle-error (n &optional (c 'character-decoding-error))
600 `(decoding-error
601 (vector ,@(subseq '(u1 u2) 0 n))
602 :eucjp src (1+ (- i ,n)) +repl+ ',c))
603 (handle-error-if-icb (var n)
604 `(when (not (< #x7f ,var #xc0))
605 (decf i)
606 (return-from setter-block
607 (handle-error ,n invalid-utf8-continuation-byte)))))
608 (,setter
609 (block setter-block
610 (cond
611 ;; 3 octets
612 ((= u1 #x8f)
613 (setq u2 (consume-octet))
614 (eucjp-to-ucs (logior #x8f0000
615 (f-ash u2 8)
616 (consume-octet))))
617 ;; 2 octets
618 ((or (= u1 #x8e)
619 (< #xa0 u1 #xff))
620 (eucjp-to-ucs (logior (f-ash u1 8)
621 (consume-octet))))
622 ;; 1 octet
623 (t
624 (eucjp-to-ucs u1))))
625 dest di))
626 finally (return (the fixnum (- di d-start)))))))
627
628 ;;;; CP932
629
630 (define-character-encoding :cp932
631 "An 8-bit, variable-length character encoding in which
632 character code points in the range #x00-#x7f can be encoded in a
633 single octet; characters with larger code values can be encoded
634 in 2 bytes."
635 :max-units-per-char 2
636 :literal-char-code-limit #x80)
637
638
639 (define-octet-counter :cp932 (getter type)
640 `(named-lambda cp932-octet-counter (seq start end max)
641 (declare (type ,type seq) (fixnum start end max))
642 (loop with noctets fixnum = 0
643 for i fixnum from start below end
644 for code of-type code-point = (,getter seq i)
645 do (let* ((c (ucs-to-cp932 code))
646 (new (+ (cond ((< #xff c) 2)
647 (t 1))
648 noctets)))
649 (if (and (plusp max) (> new max))
650 (loop-finish)
651 (setq noctets new)))
652 finally (return (values noctets i)))))
653
654 (define-code-point-counter :cp932 (getter type)
655 `(named-lambda cp932-code-point-counter (seq start end max)
656 (declare (type ,type seq) (fixnum start end max))
657 (loop with nchars fixnum = 0
658 with i fixnum = start
659 while (< i end) do
660 (let* ((octet (,getter seq i))
661 (next-i (+ i (cond ((or (<= #x81 octet #x9f)
662 (<= #xe0 octet #xfc))
663 2)
664 (t 1)))))
665 (declare (type ub8 octet) (fixnum next-i))
666 (cond ((> next-i end)
667 ;; Should we add restarts to this error, we'll have
668 ;; to figure out a way to communicate with the
669 ;; decoder since we probably want to do something
670 ;; about it right here when we have a chance to
671 ;; change the count or something. (Like an
672 ;; alternative replacement character or perhaps the
673 ;; existence of this error so that the decoder
674 ;; doesn't have to check for it on every iteration
675 ;; like we do.)
676 ;;
677 ;; FIXME: The data for this error is not right.
678 (decoding-error (vector octet) :cp932 seq i
679 nil 'end-of-input-in-character)
680 (return (values (1+ nchars) end)))
681 (t
682 (setq nchars (1+ nchars)
683 i next-i)
684 (when (and (plusp max) (= nchars max))
685 (return (values nchars i))))))
686 finally (progn (assert (= i end))
687 (return (values nchars i))))))
688
689 (define-encoder :cp932 (getter src-type setter dest-type)
690 `(named-lambda cp932-encoder (src start end dest d-start)
691 (declare (type ,src-type src)
692 (type ,dest-type dest)
693 (fixnum start end d-start))
694 (loop with di fixnum = d-start
695 for i fixnum from start below end
696 for code of-type code-point = (,getter src i)
697 for cp932 of-type code-point
698 = (ucs-to-cp932 code) do
699 (macrolet ((set-octet (offset value)
700 `(,',setter ,value dest (the fixnum (+ di ,offset)))))
701 (cond
702 ;; 1 octet
703 ((< cp932 #x100)
704 (set-octet 0 cp932)
705 (incf di))
706 ;; 2 octets
707 ((< cp932 #x10000)
708 (set-octet 0 (f-logand #xff (f-ash cp932 -8)))
709 (set-octet 1 (logand cp932 #xff))
710 (incf di 2))
711 ;; 3 octets
712 (t
713 (set-octet 0 (f-logand #xff (f-ash cp932 -16)))
714 (set-octet 1 (f-logand #xff (f-ash cp932 -8)))
715 (set-octet 2 (logand cp932 #xff))
716 (incf di 3))
717 ))
718 finally (return (the fixnum (- di d-start))))))
719
720
721 (define-decoder :cp932 (getter src-type setter dest-type)
722 `(named-lambda cp932-decoder (src start end dest d-start)
723 (declare (type ,src-type src)
724 (type ,dest-type dest)
725 (fixnum start end d-start))
726 (let ((u2 0))
727 (declare (type ub8 u2))
728 (loop for di fixnum from d-start
729 for i fixnum from start below end
730 for u1 of-type ub8 = (,getter src i) do
731 ;; Note: CONSUME-OCTET doesn't check if I is being
732 ;; incremented past END. We're assuming that END has
733 ;; been calculated with the CODE-POINT-POINTER above that
734 ;; checks this.
735 (macrolet
736 ((consume-octet ()
737 `(let ((next-i (incf i)))
738 (if (= next-i end)
739 ;; FIXME: data for this error is incomplete.
740 ;; and signalling this error twice
741 (return-from setter-block
742 (decoding-error nil :cp932 src i +repl+
743 'end-of-input-in-character))
744 (,',getter src next-i))))
745 (handle-error (n &optional (c 'character-decoding-error))
746 `(decoding-error
747 (vector ,@(subseq '(u1 u2) 0 n))
748 :cp932 src (1+ (- i ,n)) +repl+ ',c))
749 (handle-error-if-icb (var n)
750 `(when (not (< #x7f ,var #xc0))
751 (decf i)
752 (return-from setter-block
753 (handle-error ,n invalid-utf8-continuation-byte)))))
754 (,setter
755 (block setter-block
756 (cond
757 ;; 2 octets
758 ((or (<= #x81 u1 #x9f)
759 (<= #xe0 u1 #xfc))
760 (setq u2 (consume-octet))
761 (cp932-to-ucs (logior (f-ash u1 8)
762 u2)))
763 ;; 1 octet
764 (t
765 (cp932-to-ucs u1))))
766 dest di))
767 finally (return (the fixnum (- di d-start)))))))