config.hi_ur - transliterate_data - Data for Urdu<->Hindi transliteration
HTML git clone git://lumidify.org/transliterate_data.git (fast, but not encrypted)
HTML git clone https://lumidify.org/transliterate_data.git (encrypted, but very slow)
HTML git clone git://4kcetb7mo7hj6grozzybxtotsub5bempzo4lirzc3437amof2c2impyd.onion/transliterate_data.git (over tor)
DIR Log
DIR Files
DIR Refs
DIR README
---
config.hi_ur (6948B)
---
1 # Configuration for Hindi->Urdu
2
3 split "[-.?,;।\s\\۔،؟―!—‘’“”:؛()[\]{}%―]+"
4 beforeword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]"
5 afterword "[-.?,;।\s\\۔،؟!—‘’“”:؛()[\]{}%―]"
6
7 ignore "data/ignore.txt"
8 table misc_endword "data/misc_endword.txt" nodisplay revert
9 table special.hi_ur "data/special.hi_ur.txt" nodisplay revert
10 table exceptions_beginword.hi_ur "data/exceptions_beginword.hi_ur.txt" revert
11 table exceptions_beginword_endword.hi_ur "data/exceptions_beginword_endword.hi_ur.txt" revert
12 table pairs_middle_e_o "data/pairs_middle_e_o.txt" nodisplay revert
13
14 # Verbs
15
16 table verbs_irregular "data/verbs/irregular.txt" revert
17 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt" revert
18 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay revert
19 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt" revert
20 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay revert
21
22 # Nouns/Adjectives
23
24 table na_imascfemshort "data/nouns_adjectives/imascfemshort.txt" revert
25 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt" revert
26 table na_irregular "data/nouns_adjectives/irregular.txt" revert
27 table na_ahmasc "data/nouns_adjectives/ahmasc.txt" revert
28 table na_aishortmasc "data/nouns_adjectives/aishortmasc.txt" revert
29 table na_amasc "data/nouns_adjectives/amasc.txt" revert
30 table na_an "data/nouns_adjectives/an.txt" revert
31 table na_cfem "data/nouns_adjectives/cfem.txt" revert
32 table na_cmasc "data/nouns_adjectives/cmasc.txt" revert
33 table na_ifem "data/nouns_adjectives/ifem.txt" revert
34 table na_imasc "data/nouns_adjectives/imasc.txt" revert
35 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt" revert
36 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt" revert
37 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt" revert
38 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt" revert
39 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt" revert
40
41 table na_imascfemshort_forms "data/nouns_adjectives/imascfemshort_forms.txt" nodisplay revert
42 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay revert
43 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay revert
44 table na_aishortmasc_forms "data/nouns_adjectives/aishortmasc_forms.txt" nodisplay revert
45 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay revert
46 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay revert
47 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay revert
48 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay revert
49 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay revert
50 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay revert
51 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay revert
52 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay revert
53 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay revert
54 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay revert
55 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay revert
56
57 # Punctuation
58
59 table punctuation "data/punctuation.txt" nodisplay revert
60
61 # Regular verb expansions
62 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
63 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
64
65 # Regular noun/adjective expansions
66
67 expand na_imascfemshort na_imascfemshort_forms noroot
68 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
69 expand na_ahmasc na_ahmasc_forms noroot
70 expand na_aishortmasc na_aishortmasc_forms noroot
71 expand na_amasc na_amasc_forms noroot
72 expand na_an na_an_forms noroot
73 expand na_cfem na_cfem_forms
74 expand na_cmasc na_cmasc_forms
75 expand na_ifem na_ifem_forms noroot
76 expand na_imasc na_imasc_forms noroot
77 expand na_o_a_staysfem na_o_a_staysfem_forms
78 expand na_u_staysfem na_u_staysfem_forms noroot
79 expand na_o_a_staysmasc na_o_a_staysmasc_forms
80 expand na_u_staysmasc na_u_staysmasc_forms noroot
81 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
82
83 # Conversion rules
84
85 matchignore "[a-zA-Z=]+" beginword endword
86
87 group beginword
88 replace exceptions_beginword.hi_ur
89 endgroup
90
91 match "(?<=[ाीू])ओ-" " و " # the letters ी ा ू
92 match "(?<=ा)ए-" "ٔ "
93 match "(?<=[ीूुअ])ए-" "ِ " # the letters ी ू ु अ
94 match "(?<=[0123456789])वाँ" "واں" endword
95 match "(?<=[0123456789])वें" "ویں" endword
96 match "(?<=[0123456789])वीं" "ویں" endword
97 match "(?<=[0123456789]) ई." "ء" endword
98 match "(?<![0123456789]) :" ":" endword
99
100 #The Persian Genetive े- conflicts with word pairs containing regular inflections and a dash.
101 group beginword endword
102 replace pairs_middle_e_o
103 endgroup
104
105 group
106 replace special.hi_ur
107 endgroup
108
109 match "बा-" "با " beginword
110 match "ता-" "تا " beginword
111
112 group endword
113 replace misc_endword
114 endgroup
115
116 group beginword endword
117 replace na_imascfemshort
118 replace na_adjectiveregular_a_i
119 replace na_irregular
120 replace na_ahmasc
121 replace na_aishortmasc
122 replace na_amasc
123 replace na_an
124 replace na_cfem
125 replace na_cmasc
126 replace na_ifem
127 replace na_imasc
128 replace na_o_a_staysfem
129 replace na_u_staysfem
130 replace na_o_a_staysmasc
131 replace na_u_staysmasc
132 replace na_ui_oi_ai_mascfem
133
134 replace verbs_irregular
135 replace verbs_regular_consonant_ending
136 replace verbs_regular_ending_in_a_o
137 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words
138 endgroup
139
140 #In the above tables are words that begin with the prefixes below but don't contain them as prefixes. Therefore they are replaced first.
141 match "बे" "بے" beginword
142 match "ग़ैर" "غیر" beginword
143
144 #because of numbers before Bible books, this needs to come after the tables above
145 matchignore "[0123456789]+" beginword endword
146
147 #After replacing "बे" and "ग़ैर" a second run is needed to replace the rest of the words.
148 group beginword endword
149 replace na_imascfemshort
150 replace na_adjectiveregular_a_i
151 replace na_irregular
152 replace na_ahmasc
153 replace na_aishortmasc
154 replace na_amasc
155 replace na_an
156 replace na_cfem
157 replace na_cmasc
158 replace na_ifem
159 replace na_imasc
160 replace na_o_a_staysfem
161 replace na_u_staysfem
162 replace na_o_a_staysmasc
163 replace na_u_staysmasc
164 replace na_ui_oi_ai_mascfem
165
166 replace verbs_irregular
167 replace verbs_regular_consonant_ending
168 replace verbs_regular_ending_in_a_o
169 replace exceptions_beginword_endword.hi_ur override #override multiple choices for common words
170 endgroup
171
172 group
173 replace punctuation
174 endgroup
175
176 targetdiacritics "ُ" "ِ" "ّ" "َ" "ٰ"
177
178 retrywithout "_diacritics" "ُ" "ِ" "ّ" "َ" "ٰ"
179 retrywithout "spac_e" " "
180 retrywithout "nothing"
181 comment "#"