config.ur_hi - transliterate_data - Data for Urdu<->Hindi transliteration
HTML git clone git://lumidify.org/transliterate_data.git (fast, but not encrypted)
HTML git clone https://lumidify.org/transliterate_data.git (encrypted, but very slow)
HTML git clone git://4kcetb7mo7hj6grozzybxtotsub5bempzo4lirzc3437amof2c2impyd.onion/transliterate_data.git (over tor)
DIR Log
DIR Files
DIR Refs
DIR README
---
config.ur_hi (6316B)
---
1 # Configuration for Urdu->Hindi
2
3 split "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]+"
4 beforeword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]"
5 afterword "[-.\s\\۔،؟!—‘’“”:؛()[\]{}%―=]"
6
7 ignore "data/ignore.txt"
8 table misc_beginword.ur_hi "data/misc_beginword.ur_hi.txt" nodisplay
9 table misc_endword "data/misc_endword.txt" nodisplay
10 table special.ur_hi "data/special.ur_hi.txt" nodisplay
11 table exceptions_beginword_endword.ur_hi "data/exceptions_beginword_endword.ur_hi.txt"
12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay
13
14 # Verbs
15
16 table verbs_irregular "data/verbs/irregular.txt"
17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt"
18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay
19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt"
20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay
21
22 # Nouns/Adjectives
23
24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt"
25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt"
26 table na_irregular "data/nouns_adjectives/irregular.txt"
27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt"
28 table na_yahmasc "data/nouns_adjectives/yahmasc.txt"
29 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt"
30 table na_amasc "data/nouns_adjectives/amasc.txt"
31 table na_an "data/nouns_adjectives/an.txt"
32 table na_cfem "data/nouns_adjectives/cfem.txt"
33 table na_cmasc "data/nouns_adjectives/cmasc.txt"
34 table na_ifem "data/nouns_adjectives/ifem.txt"
35 table na_imasc "data/nouns_adjectives/imasc.txt"
36 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt"
37 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt"
38 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt"
39 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt"
40 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt"
41
42 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay
43 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay
44 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay
45 table na_yahmasc_forms "data/nouns_adjectives/yahmasc_forms.txt" nodisplay
46 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay
47 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay
48 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay
49 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay
50 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay
51 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay
52 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay
53 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay
54 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay
55 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay
56 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay
57 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay
58
59 # Punctuation
60
61 table punctuation "data/punctuation.txt" nodisplay
62
63 # Regular verb expansions
64 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
65 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
66
67 # Regular noun/adjective expansions
68
69 expand na_imascfemshort na_imascfemshort_forms noroot
70 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
71 expand na_ahmasc na_ahmasc_forms noroot
72 expand na_yahmasc na_yahmasc_forms noroot
73 expand na_aishortmasc na_aishortmasc_forms noroot
74 expand na_amasc na_amasc_forms noroot
75 expand na_an na_an_forms noroot
76 expand na_cfem na_cfem_forms
77 expand na_cmasc na_cmasc_forms
78 expand na_ifem na_ifem_forms noroot
79 expand na_imasc na_imasc_forms noroot
80 expand na_o_a_staysfem na_o_a_staysfem_forms
81 expand na_u_staysfem na_u_staysfem_forms noroot
82 expand na_o_a_staysmasc na_o_a_staysmasc_forms
83 expand na_u_staysmasc na_u_staysmasc_forms noroot
84 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
85
86 # Conversion rules
87
88 match "نشو و نما" "नशो-नुमा"
89 match "مطیع و " "मतीओ-"
90 match "صحیح و " "सहीओ-"
91 match "وی سی آر" "वीसीआर"
92 match "محوِ " "महवे-" #exception to ए rule
93 match "ابھر کر" "उभरकर"
94 match "اُبھر کر" "उभरकर"
95 match "بھر کر" "भरकर"
96 match "بھر پور" "भरपूर"
97 match "بھر پُور" "भरपूर"
98 match "چوں و چرا" "चूँओ-चरा"
99 matchignore "[a-zA-Z]+" beginword endword
100 match "(?<=[یٰیاو]) و " "ओ-"
101 match "(?<=[عوی])ِ " "ए-"
102 match "ِ والا" "े-वाला"
103 match "ِ والو" "े-वालो"
104 match "ِ والے" "े-वाले"
105 match "ہی والا" "ही वाला"
106 match "ہی والے" "ही वाले"
107 match "ہی والوں" "ही वालों"
108 match "ہی والی" "ही वाली"
109 match "ِ " "े-"
110 match "(?<=[ہی])ٔ " "ए-"
111 match "(?<=[0123456789])واں" "वाँ"
112 match "(?<=[0123456789])ویں" "वें$वीं"
113 match "(?<=[0123456789])ء" " ई."
114 match "(?<![0123456789]):" "~:"
115
116 group beginword
117 replace misc_beginword.ur_hi
118 endgroup
119
120 group endword
121 replace misc_endword
122 endgroup
123
124 group
125 replace special.ur_hi
126 endgroup
127
128 group beginword endword
129 replace pairs_middle_e_o
130 replace na_imascfemshort
131 replace na_adjectiveregular_a_i
132 replace na_irregular
133 replace na_ahmasc
134 replace na_yahmasc
135 replace na_aishortmasc
136 replace na_amasc
137 replace na_an
138 replace na_cfem
139 replace na_cmasc
140 replace na_ifem
141 replace na_imasc
142 replace na_o_a_staysfem
143 replace na_u_staysfem
144 replace na_o_a_staysmasc
145 replace na_u_staysmasc
146 replace na_ui_oi_ai_mascfem
147
148 replace verbs_irregular
149 replace verbs_regular_consonant_ending
150 replace verbs_regular_ending_in_a_o
151
152 replace exceptions_beginword_endword.ur_hi override
153 endgroup
154
155 #because of numbers before Bible books, this needs to come after the tables above
156 matchignore "[0123456789]+" beginword endword
157
158 group
159 replace punctuation
160 endgroup
161
162 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ"
163 retrywithout "spac_e" " "
164 retrywithout "nothing"
165 comment "#"