Get language name and ISO code based on native language name
For one of my PhoneGap project’s I needed a function that will convert local (native) language name into international (English) language name. And another one that will return correct two-letter language code (ISO 639-1), again, based on native language name. This was required because PhoneGap applications are returning local name for currently selected language (so, you’ll get polski
instead of Polish
, Tiếng Việt
instead of Vietnamese
and so on).
Here are these functions for future reference and for others, who could possible find them useful.
Getting ISO 639-1 code for most language is as easy as cutting their local name to two characters and lowering them down:
code = lang.toLocaleLowerCase().substring(0, 2);
But, for some languages, you’ll need some sort of dictionary, that will “translate” wrong code (got from above transform) into correct one. That’s how nativeLanguageNameToISOCode
function was born:
function nativeLanguageNameToISOCode(lang)
{
var
dict = {},
llang = lang.toLocaleLowerCase(),
code = lang.toLocaleLowerCase().substring(0, 2);
/**
* Fix certain languages' codes
*
* JavaScript escapes: http://www.rishida.net/tools/conversion/
* More languages (ISO 639-1 codes): http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
*/
dict["bahasa indonesia"] = "id"; //Indonesian
dict["indonesian"] = "id"; //Indonesian
dict["bahasa melayu"] = "ms"; //Malay
dict["bosnian"] = "bs"; //Bosnian
dict["filipino"] = "fil"; //Filipino
dict["galego"] = "gl"; //Galician
dict["u00EDslenska"] = "is"; //Icelandic
dict["javanese"] = "jv"; //Javanese
dict["latvieu0161u"] = "lv"; //Latvian
dict["lietuviu0173"] = "lt"; //Lithuanian
dict["magyar"] = "hu"; //Hungarian
dict["nederlands"] = "nl"; //Dutch
dict["norsk bokmu00E5l"] = "nb"; //Norwegian Bokmal
dict["polski"] = "pl"; //Polish
dict["portuguu00EAs"] = "pt"; //Portuguese
dict["shqipe"] = "sq"; //Albanian
dict["slovenu010Dina"] = "sk"; //Slovak
dict["suomi"] = "fi"; //Finnish
dict["tagalog"] = "tl"; //Tagalog
dict["tu00FCrku00E7e"] = "tr"; //Turkish
dict["tiu1EBFng viu1EC7t"] = "vi"; //Vietnamese
dict["u010Deu0161tina"] = "cs"; //Czech
dict["u09ACu09BEu0982u09B2u09BE"] = "bn";
dict["u0BA4u0BAEu0BBFu0BB4u0BCD"] = "ta";
dict["u0C95u0CA8u0CCDu0CA8u0CA1"] = "kn";
dict["u0C24u0C46u0C32u0C41u0C17u0C41"] = "te";
dict["u0D2Eu0D32u0D2Fu0D3Eu0D23u0D4Du0D2E"] = "ml";
dict["u049Bu0430u0437u0430u049B u0442u0456u043Bu0456"] = "kk"; //Kazakh
dict["u043Cu0430u043Au0435u0434u043Eu043Du0441u043Au0438"] = "mk"; //Macedonian
dict["u03B5u03BBu03BBu03B7u03BDu03B9u03BAu03AC"] = "el"; //Modern Greek
dict["u0431u044Au043Bu0433u0430u0440u0441u043Au0438"] = "bg"; //Bulgarian
dict["u0440u0443u0441u0441u043Au0438u0439"] = "ru"; //Russian
dict["u0441u0440u043Fu0441u043Au0438"] = "sr"; //Serbian
dict["u092Eu0930u093Eu0920u0940"] = "mr"; //Marathi
dict["u0443u043Au0440u0430u0457u043Du0441u044Cu043Au0430"] = "uk"; //Ukrainian
dict["u05E2u05D1u05E8u05D9u05EA"] = "he"; //Modern Hebrew
dict["u0627u0644u0639u0631u0628u064Au0629"] = "ar"; //Arabic
dict["u0641u0627u0631u0633u06CC"] = "fa"; //Persian
dict["u0E44u0E17u0E22"] = "th"; //Thai
dict["u4E2Du6587"] = "zh"; //Chinese
dict["u65E5u672Cu8A9E"] = "ja"; //Japanese
dict["uD55CuAD6DuC5B4"] = "ko"; //Korean
for(key in dict)
{
if(dict.hasOwnProperty(key))
{
if(key === llang) code = dict[key];
}
}
return code;
}
As you may conclude from comment, this list in not full. I wrote “translation” only for languages found in Android 4.3 system. If you want to add new languages, then ISO 639-1 codes list at Wikipedia and JavaScript escapes might be useful for you.
Function used for getting international (English) language name out of local one was nearly similar. It could be written together as one, but I was to lazy! :]
function nativeLanguageNameToEnglishName(lang)
{
var
dict = {},
llang = lang.toLocaleLowerCase();
/**
* Fix certain languages' codes
*
* JavaScript escapes: http://www.rishida.net/tools/conversion/
* More languages: http://en.wikipedia.org/wiki/List_of_ISO_639-1_codes
*/
dict["bahasa indonesia"] = "Indonesian";
dict["indonesian"] = "Indonesian";
dict["bahasa melayu"] = "Malay";
dict["bosnian"] = "Bosnian";
dict["filipino"] = "Filipino";
dict["galego"] = "Galician";
dict["u00EDslenska"] = "Icelandic";
dict["javanese"] = "Javanese";
dict["latvieu0161u"] = "Latvian";
dict["lietuviu0173"] = "Lithuanian";
dict["magyar"] = "Hungarian";
dict["nederlands"] = "Dutch";
dict["norsk bokmu00E5l"] = "Norwegian Bokmu00E5l";
dict["polski"] = "Polish";
dict["portuguu00EAs"] = "Portuguese";
dict["shqipe"] = "Albanian";
dict["slovenu010Dina"] = "Slovak";
dict["suomi"] = "Finnish";
dict["tagalog"] = "Tagalog";
dict["tu00FCrku00E7e"] = "Turkish";
dict["tiu1EBFng viu1EC7t"] = "Vietnamese";
dict["u010Deu0161tina"] = "Czech";
dict["u09ACu09BEu0982u09B2u09BE"] = "Bengali";
dict["u0BA4u0BAEu0BBFu0BB4u0BCD"] = "Tamil";
dict["u0C95u0CA8u0CCDu0CA8u0CA1"] = "Kannada";
dict["u0C24u0C46u0C32u0C41u0C17u0C41"] = "Telugu";
dict["u0D2Eu0D32u0D2Fu0D3Eu0D23u0D4Du0D2E"] = "Malayalam";
dict["u049Bu0430u0437u0430u049B u0442u0456u043Bu0456"] = "Kazakh";
dict["u043Cu0430u043Au0435u0434u043Eu043Du0441u043Au0438"] = "Macedonian";
dict["u03B5u03BBu03BBu03B7u03BDu03B9u03BAu03AC"] = "Modern Greek";
dict["u0431u044Au043Bu0433u0430u0440u0441u043Au0438"] = "Bulgarian";
dict["u0440u0443u0441u0441u043Au0438u0439"] = "Russian";
dict["u0441u0440u043Fu0441u043Au0438"] = "Serbian";
dict["u092Eu0930u093Eu0920u0940"] = "Marathi";
dict["u0443u043Au0440u0430u0457u043Du0441u044Cu043Au0430"] = "Ukrainian";
dict["u05E2u05D1u05E8u05D9u05EA"] = "Modern Hebrew";
dict["u0627u0644u0639u0631u0628u064Au0629"] = "Arabic";
dict["u0641u0627u0631u0633u06CC"] = "Persian";
dict["u0E44u0E17u0E22"] = "Thai";
dict["u4E2Du6587"] = "Chinese";
dict["u65E5u672Cu8A9E"] = "Japanese";
dict["uD55CuAD6DuC5B4"] = "Korean";
for(key in dict)
{
if(dict.hasOwnProperty(key))
{
if(key === llang) lang = dict[key];
}
}
return lang;
}
Keep in mind, that main reason for writing these two functions was to provide language code fixup for these languages, for which first two letters are not equal to ISO language code (for example: polski
= po
!= pl
).
Due to time limits, nativeLanguageNameToEnglishName
function was written based on nativeLanguageNameToISOCode
and it is not complete. It provides English (international) language names only for those few languages that doesn’t have first two letters in name equal to language code.
If you need to extend this function, to support all other languages, good source of data and JS escaping tool will be useful for you — see links above.