tconfig - transliterate_data - Data for Urdu->Hindi transliteration
HTML git clone git://lumidify.org/transliterate_data.git
DIR Log
DIR Files
DIR Refs
DIR README
---
tconfig (4785B)
---
1 # Configuration for Urdu->Hindi
2
3 split "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]+"
4 beforeword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]"
5 afterword "[-\s\\۔،؟!—‘’“”:؛()[\]{}%]"
6
7 ignore "data/ignore.txt"
8 table misc_beginword "data/misc_beginword.txt" nodisplay
9 table misc_endword "data/misc_endword.txt" nodisplay
10 table special "data/special.txt" nodisplay
11
12
13 # Verbs
14
15 table verbs_irregular "data/verbs/irregular.txt"
16 table verbs_regular_consonant_ending "data/verbs/regular_consonant_ending.txt"
17 table verbs_regular_consonant_ending_forms "data/verbs/regular_consonant_ending_forms.txt" nodisplay
18 table verbs_regular_ending_in_a_o "data/verbs/regular_ending_in_a_o.txt"
19 table verbs_regular_ending_in_a_o_forms "data/verbs/regular_ending_in_a_o_forms.txt" nodisplay
20
21 # Nouns/Adjectives
22
23 table na_ifemshort "data/nouns_adjectives/ifemshort.txt"
24 table na_adjectiveregular_a_i "data/nouns_adjectives/adjectiveregular_a_i.txt"
25 table na_irregular "data/nouns_adjectives/irregular.txt"
26 table na_ahmasc "data/nouns_adjectives/ahmasc.txt"
27 table na_aimasc "data/nouns_adjectives/aimasc.txt"
28 table na_amasc "data/nouns_adjectives/amasc.txt"
29 table na_an "data/nouns_adjectives/an.txt"
30 table na_cfem "data/nouns_adjectives/cfem.txt"
31 table na_cmasc "data/nouns_adjectives/cmasc.txt"
32 table na_ifem "data/nouns_adjectives/ifem.txt"
33 table na_imasc "data/nouns_adjectives/imasc.txt"
34 table na_o_a_staysfem "data/nouns_adjectives/o_a_staysfem.txt"
35 table na_u_staysfem "data/nouns_adjectives/u_staysfem.txt"
36 table na_o_a_staysmasc "data/nouns_adjectives/o_a_staysmasc.txt"
37 table na_u_staysmasc "data/nouns_adjectives/u_staysmasc.txt"
38 table na_ui_oi_ai_mascfem "data/nouns_adjectives/ui_oi_ai_mascfem.txt"
39
40 table na_ifemshort_forms "data/nouns_adjectives/ifemshort_forms.txt" nodisplay
41 table na_adjectiveregular_a_i_forms "data/nouns_adjectives/adjectiveregular_a_i_forms.txt" nodisplay
42 table na_ahmasc_forms "data/nouns_adjectives/ahmasc_forms.txt" nodisplay
43 table na_aimasc_forms "data/nouns_adjectives/aimasc_forms.txt" nodisplay
44 table na_amasc_forms "data/nouns_adjectives/amasc_forms.txt" nodisplay
45 table na_an_forms "data/nouns_adjectives/an_forms.txt" nodisplay
46 table na_cfem_forms "data/nouns_adjectives/cfem_forms.txt" nodisplay
47 table na_cmasc_forms "data/nouns_adjectives/cmasc_forms.txt" nodisplay
48 table na_ifem_forms "data/nouns_adjectives/ifem_forms.txt" nodisplay
49 table na_imasc_forms "data/nouns_adjectives/imasc_forms.txt" nodisplay
50 table na_o_a_staysfem_forms "data/nouns_adjectives/o_a_staysfem_forms.txt" nodisplay
51 table na_u_staysfem_forms "data/nouns_adjectives/u_staysfem_forms.txt" nodisplay
52 table na_o_a_staysmasc_forms "data/nouns_adjectives/o_a_staysmasc_forms.txt" nodisplay
53 table na_u_staysmasc_forms "data/nouns_adjectives/u_staysmasc_forms.txt" nodisplay
54 table na_ui_oi_ai_mascfem_forms "data/nouns_adjectives/ui_oi_ai_mascfem_forms.txt" nodisplay
55
56 # Punctuation
57
58 table punctuation "data/punctuation.txt" nodisplay
59
60 # Regular verb expansions
61 expand verbs_regular_consonant_ending verbs_regular_consonant_ending_forms
62 expand verbs_regular_ending_in_a_o verbs_regular_ending_in_a_o_forms
63
64 # Regular noun/adjective expansions
65
66 expand na_ifemshort na_ifemshort_forms noroot
67 expand na_adjectiveregular_a_i na_adjectiveregular_a_i_forms noroot
68 expand na_ahmasc na_ahmasc_forms noroot
69 expand na_aimasc na_aimasc_forms noroot
70 expand na_amasc na_amasc_forms noroot
71 expand na_an na_an_forms noroot
72 expand na_cfem na_cfem_forms
73 expand na_cmasc na_cmasc_forms
74 expand na_ifem na_ifem_forms noroot
75 expand na_imasc na_imasc_forms noroot
76 expand na_o_a_staysfem na_o_a_staysfem_forms
77 expand na_u_staysfem na_u_staysfem_forms noroot
78 expand na_o_a_staysmasc na_o_a_staysmasc_forms
79 expand na_u_staysmasc na_u_staysmasc_forms noroot
80 expand na_ui_oi_ai_mascfem na_ui_oi_ai_mascfem_forms noroot
81
82 # Conversion rules
83
84 match "(?<=ی) و " "ओ-"
85 match "(?<=[ہی])ٔ" "ए-" endword
86 match "(?<=[ہی])ِ" "ए-" endword
87 match "(?<=ا) و " "ओ-"
88 match "(?<=[0123456789])واں" "वाँ"
89 match "(?<=[0123456789])ویں" "वें$वीं"
90 match "(?<=[0123456789])ء" " ई."
91 match "(?<![0123456789]):" " :"
92
93 matchignore "\d+" beginword endword
94
95 group beginword
96 replace misc_beginword
97 endgroup
98
99 group endword
100 replace misc_endword
101 endgroup
102
103 group
104 replace special
105 replace punctuation
106 endgroup
107
108 group beginword endword
109 replace na_ifemshort
110 replace na_adjectiveregular_a_i
111 replace na_irregular
112 replace na_ahmasc
113 replace na_aimasc
114 replace na_amasc
115 replace na_an
116 replace na_cfem
117 replace na_cmasc
118 replace na_ifem
119 replace na_imasc
120 replace na_o_a_staysfem
121 replace na_u_staysfem
122 replace na_o_a_staysmasc
123 replace na_u_staysmasc
124 replace na_ui_oi_ai_mascfem
125
126 replace verbs_irregular
127 replace verbs_regular_consonant_ending
128 replace verbs_regular_ending_in_a_o
129 endgroup
130
131 diacritics "ُ" "ِ" "ّ" "َ"