# © 2016 and later: Unicode, Inc. and others. # License & terms of use: http://www.unicode.org/copyright.html # Generated using tools/cldr/cldr-to-icu/build-icu-data.xml # # File: sat_Olck_sat_FONIPA.txt # Generated from CLDR # # Santali (Ol Chiki) → Santali (International Phonetic Alphabet) # Output # ------ # m mː n nː ɳ ɳː ɲ ɲː ŋ ŋː # p pʰ pʼ b bʰ t tʰ tʼ d dʰ ʈ ʈʰ ɖ ɖʰ c cʰ cʼ k kʰ kʼ ɡ ʔ # s sː h # d\u0361ʒ # ɽ r # l lː # w wː w\u0303 w\u0303ː # # i iː ĩ ĩː u uː ũ ũː # e eː ẽ ẽː ə əː ə\u0303 ə\u0303ː o oː õ õː # ɛ ɛː ɛ\u0303 ɛ\u0303ː ɔ ɔː ɔ\u0303 ɔ\u0303ː # a aː ã ãː # References # ---------- # [1] Michael Everson: Final proposal to encode the Ol Chiki script # in the UCS. ISO/IEC JTC1/SC2/WG2 Working Group Document N2984R, # September 21, 2005. http://std.dkuug.dk/jtc1/sc2/wg2/docs/n2984.pdf # # [2] George L. Campbell: Compendium of the World's Languages. # Volume 2: Ladakhi to Zuni. ISBN 0-415-20297-3. Taylor & Francis, 2000. # Pages 1454 to 1458. # Notes # ----- # According to [1] (page 3), ᱽ can only follow the four ejective # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/; these become # ᱵᱽ /b/, ᱫᱽ /d/, ᱡᱽ /d\u0361ʒ/, and ᱜᱽ /ɡ/. In online texts, however, # we have occasionally encountered ᱽ following non-ejective plosives, # for example after ᱯ /p/. These might possibly be typos. Our rules # try to be resilient and handle ᱯᱽ as /b/. # # According to [1] (page 2), U+1C7C PHAARKAA follows the four “glottal” # consonants ᱵ /pʼ/, ᱡ /cʼ/, ᱫ /tʼ/, and ᱜ /kʼ/ (these are actually # ejective, not glottal). In online texts, however, we have frequently # encountered ᱼ following non-ejective consonants. $inword = [[:L:][:M:]]; # Some online texts use a decomposed form of U+1C7A MU-GAAHLAA TTUDDAG. ᱹᱸ → ᱺ ; ᱸᱹ → ᱺ ; ::null(); # To simplify the rules below, enforce a uniform ordering of marks. ᱻᱹ → ᱹᱻ ; ᱻᱸ → ᱸᱻ ; ᱻᱺ → ᱺᱻ ; ᱼᱹ → ᱹᱼ ; ᱼᱸ → ᱸᱼ ; ᱼᱺ → ᱺᱼ ; ::null(); # Some online texts use U+1C7C PHAARKAA instead of U+1C7B RELAA for indicating # long phonemes, presumably because the graphemes look similar in some fonts. # Since phaarkaa is used for voicing ejectives and plosives (which cannot # be lengthened), we rewrite phaarkaa to relaa. [ᱚᱟᱤᱩᱮᱳᱶᱢᱝᱞᱱ] [ᱹᱸᱺ]* {ᱼ} → ᱻ ; ::null(); ᱚᱹᱻ → ɔː ; ᱚᱹ → ɔ ; ᱚᱸᱻ → ɔ\u0303ː ; ᱚᱸ → ɔ\u0303 ; ᱚᱺᱻ → ɔ\u0303ː ; ᱚᱺ → ɔ\u0303 ; ᱚᱻ → ɔː ; ᱚ → ɔ ; ᱛᱼ → t ; ᱛᱷ → tʰ ; ᱛᱽ → d ; $inword {ᱛ} → d ; ᱛ → t ; ᱜᱼ → kʼ ; ᱜᱷ → kʰ ; ᱜᱽ → ɡ ; $inword {ᱜ} → ɡ ; ᱜ → kʼ ; ᱝᱻ → ŋː ; ᱝ → ŋ ; ᱞᱻ → lː ; ᱞ → l ; ᱟᱹᱻ → əː ; ᱟᱹ → ə ; ᱟᱸᱻ → ãː ; ᱟᱸ → ã ; ᱟᱺᱻ → ə\u0303ː ; ᱟᱺ → ə\u0303 ; ᱟᱻ → aː ; ᱟ → a ; ᱠᱼ → k ; ᱠᱷ → kʰ ; ᱠᱽ → ɡ ; ᱠ → k ; ᱡᱼ → cʼ ; ᱡᱷ → cʰ ; ᱡᱽ → d\u0361ʒ ; $inword {ᱡ} → d\u0361ʒ ; ᱡ → cʼ ; ᱢᱻ → mː ; ᱢ → m ; # According to [1], ᱣ is sometimes /v/ and sometimes /w/. # TODO: Find out if there is a rule for this. ᱣᱸ → w\u0303 ; ᱣ → w ; ᱤᱹᱻ → iː ; ᱤᱹ → i ; ᱤᱸᱻ → ĩː ; ᱤᱸ → ĩ ; ᱤᱺᱻ → ĩː ; ᱤᱺ → ĩ ; ᱤᱻ → iː ; ᱤ → i ; ᱥᱻ → sː ; ᱥ → s ; # According to [1], ᱦ is sometimes /h/ and sometimes /ʔ/. # TODO: Find out if there is a rule for this. ᱦ → h ; ᱧᱻ → ɲː ; ᱧ → ɲ ; ᱨᱻ → r ; ᱨ → r ; ᱩᱹᱻ → uː ; ᱩᱹ → u ; ᱩᱸᱻ → ũː ; ᱩᱸ → ũ ; ᱩᱺᱻ → ũː ; ᱩᱺ → ũ ; ᱩᱻ → uː ; ᱩ → u ; ᱪᱼ → c ; ᱪᱷ → cʰ ; ᱪᱽ → d\u0361ʒ ; ᱪ → c ; ᱫᱼ → tʼ ; ᱫᱷ → tʰ ; ᱫᱽ → d ; $inword {ᱫ} → d ; ᱫ → tʼ ; ᱬᱻ → ɳː ; ᱬ → ɳ ; # TODO: ᱵᱷᱭᱨᱚᱵ → bʰhrɔb seems unlikely; would be good to verify. ᱭ → h ; ᱮᱹᱻ → ɛː ; ᱮᱹ → ɛ ; ᱮᱺᱻ → ɛ\u0303ː ; ᱮᱺ → ɛ\u0303 ; ᱮᱸᱻ → ẽː ; ᱮᱸ → ẽ ; ᱮᱻ → eː ; ᱮ → e ; ᱯᱼ → p ; ᱯᱷ → pʰ ; ᱯᱽ → b ; ᱯ → p ; ᱰᱷ → ɖʰ ; ᱰ → ɖ ; ᱱᱻ → nː ; ᱱ → n ; ᱲᱻ → ɽ ; ᱲ → ɽ ; ᱳᱸᱻ → õː ; ᱳᱸ → õ ; ᱳᱻ → oː ; ᱳ → o ; ᱴᱼ → ʈ ; ᱴᱷ → ʈʰ ; ᱴᱽ → ɖ ; ᱴ → ʈ ; ᱵᱼ → pʼ ; ᱵᱷ → bʰ ; ᱵᱽ → b ; $inword {ᱵ} → b ; ᱵ → pʼ ; ᱶᱻ → w\u0303ː ; ᱶ → w\u0303 ;