Raised This Month: $ Target: $400

Unicode escape sequences to utf-8 strings converter

Post New Thread Reply   
Thread Tools Display Modes
Author Message
Senior Member
Join Date: Nov 2016
Old 08-25-2020 , 18:29   Unicode escape sequences to utf-8 strings converter
Reply With Quote #1

Sharing my unicode escape sequence converter, were in need of sp implementation, and here it is:

void EscapeUnicodeSequences(const char[] text, char[] buff, int size)
	int leading, trailing, codepoint;
	bool valid_unicode;
	for(int i = 0, j = 0; text[i] && j < size; i++)
		// Handles escaped characters and saves them literally
		if(text[i] == '\\' && text[i + 1] != 'u')
			buff[j++] = text[i + 1];
		leading = GetUnicodeCodepoint(text[i], valid_unicode);
			buff[j++] = text[i];
		i += 5;
		if(0xD800 <= leading <= 0xDFFF)
			trailing = GetUnicodeCodepoint(text[i + 1], valid_unicode);
			if(valid_unicode && 0xD800 <= trailing <= 0xDFFF)
				i += 6;
				// http://unicode.org/faq/utf_bom.html
				codepoint = (leading << 10) + trailing + (0x10000 - (0xD800 << 10) - 0xDC00);
				codepoint = INVALID_CODEPOINT;
			codepoint = leading;
		if(codepoint < 0 || codepoint > 0x10FFFF)
			codepoint = INVALID_CODEPOINT;
		if(0 <= codepoint <= 0x007F)
			if(j + 1 >= size)
			buff[j++] = codepoint;
		else if(0x0080 <= codepoint <= 0x07FF)
			if(j + 2 >= size)
			buff[j++] = 0xC0 | (codepoint >> 6);
			buff[j++] = 0x80 | (codepoint & 0x3F);
		else if(0x0800 <= codepoint <= 0xFFFF)
			if(j + 3 >= size)
			buff[j++] = 0xE0 | (codepoint >> 12);
			buff[j++] = 0x80 | ((codepoint >> 6) & 0x3F);
			buff[j++] = 0x80 | (codepoint & 0x3F);
		else if(0x10000 <= codepoint <= 0x10FFFF)
			if(j + 4 >= size)
			buff[j++] = 0xF0 | (codepoint >> 18);
			buff[j++] = 0x80 | ((codepoint >> 12) & 0x3F);
			buff[j++] = 0x80 | ((codepoint >> 6) & 0x3F);
			buff[j++] = 0x80 | (codepoint & 0x3F);

int GetUnicodeCodepoint(const char[] buff, bool &parsed)
	parsed = false;
	if(buff[0] != '\\' || buff[1] != 'u')
		return 0;
	return ParseUnicodeHex(buff[2], parsed);

int ParseUnicodeHex(const char[] buff, bool &parsed)
	parsed = false;
	int result, base = 1;
	for(int i = 3; i >= 0; i--)
		if(buff[i] == '\0')
			return 0;
		if('0' <= buff[i] <= '9')
			result += (buff[i] - '0') * base;
		else if('a' <= buff[i] <= 'f')
			result += (buff[i] - 'a' + 10) * base;
		else if('A' <= buff[i] <= 'F')
			result += (buff[i] - 'A' + 10) * base;
			return 0;
		base *= 16;
	parsed = true;
	return result;
Basic usage example:
char buff[32];
EscapeUnicodeSequences("\u2B50", buff, sizeof(buff));
PrintToChatAll(buff); //Will print ⭐ in game chat

Last edited by butare; 08-25-2020 at 18:38.
butare is offline

Posting Rules
You may not post new threads
You may not post replies
You may not post attachments
You may not edit your posts

BB code is On
Smilies are On
[IMG] code is On
HTML code is Off

Forum Jump

All times are GMT -4. The time now is 11:35.

Powered by vBulletin®
Copyright ©2000 - 2024, vBulletin Solutions, Inc.
Theme made by Freecode