ahamed101 40 Junior Poster

Hi,
For last one week I have been trying to do conversions between UTF8, Multibyte, Wide Character and Ascii... Finally managed to code a working program... Thought this might help some one, so posting it here... If there are any errors, please point out... If you have any doubts please ask...

#include <stdio.h>
#include <stdlib.h>
#include "locale.h"
#include "string.h"

#define WCMAXLEN 20
#define MBMAXLEN 10

int main(void) {
	/* Temp Variable */
	int iTemp = 0;
	/* Choice of the user */
	int iChoice = 0;
	/* Length of the string */
	int iLength = 0;
	/* MB and WC string */
	char acString[MBMAXLEN] = "";
	wchar_t awArr[WCMAXLEN];
	/* File Pointer */
	FILE *pFile = NULL;
	/* Pointer to character */
	char *pcOut;
	/* Loop Index */
	int iLoopIndex = 0;

	do {
		/* Display the menu */
		system("cls");
		printf("\n 1. Japanese");
		printf("\n 2. Ascii to Wide Character");
		printf("\n 3. Ascii to UTF-8");
		printf("\n 4. Wide Character to UTF-8");
		printf("\n 5. UTF-8 to Wide Character");
		printf("\n Enter  your choice [0 to quit]: ");

		/* Get the input */
		scanf("%d", &iChoice);

		switch (iChoice) {
		case 1:
		{
			/* Multibyte to Wide Character */
			/* Copy the japansese chracter */
			strcpy(acString, "\x0e\x41\x71\x0f");
			/* Set the locale and check the return value for error */
			if (setlocale(LC_ALL, "ja_JP.utf-8") == NULL) {
				printf("\n Locale failed");
			}
			iLength = strlen(acString);
			/* Convert to side char */
			iTemp = mbstowcs(awArr, acString, iLength + 1);
			awArr[iLength + 1] = L'\0';
			printf("\n Wide character string: %ls", awArr);
			printf("\n Length : %d",iLength);
		}
			break;
		case 2:
		{
			/* ASII to Wide Character */
			printf("\n Enter a String [size <= 5 chars] : ");
			/* Scan the string using wsanf */
			wscanf(L"%ls",awArr);
			printf("\n Wide Character String : %ls",awArr);
			/* Write to file */
			pFile = fopen("wide.txt", "a+");
			if (pFile != NULL) {
				/* Wide character file operation */
				fwprintf(pFile, L"%ls\n", awArr);
				fclose(pFile);
			}
			else {
				printf("\n Error in opening...");
			}
		}
			break;
		case 3:
		{
			/* ASCII to UTF8 */
			printf("\n Enter a String [size <= 5 chars] : ");
			fgets(acString, sizeof(acString),stdin);
			iLength = strlen(acString);
			pFile = fopen("utf.txt", "a+");
			if (pFile == NULL) {
				printf("\n Error in opening...");
			}
			else {
				printf("\n UTF8 String : ");
				for(iLoopIndex=0;iLoopIndex<iLength;iLoopIndex++){
					/* if the character is less than 128 then leave it as it is since anything less than 128 is reresented in binary as 0xxxxxxx */
					if(acString[iLoopIndex] < 128){
						pcOut = (char *)calloc(2, sizeof(char));
						pcOut[0] = acString[iLoopIndex];
						pcOut[1] = '\0';
					}
					else{
						/* If the character is greater than 128, then it is represented as 110xxxxx 10xxxxxx ie.e. 2 bytes. So for getting the first byte we right shift the character 6 times  and or it with 0xC0 (11000000) i.e. c>>6 = 000xxx, then 000xxxxx OR 11000000 = 110xxxxx. For the second byte we need the lower 6 bits, so just block the first 2 bits i.e. (00111111 AND xxxxxxxx) OR 10000000 = 10xxxxxx */
						pcOut = (char *)calloc(3, sizeof(char));
						pcOut[1] = (acString[iLoopIndex] >> 6) | 0xC0;
						pcOut[0] = (acString[iLoopIndex] & 0x3F) | 0x80;
						pcOut[2] = '\0';
					}
					/* Wide character file operation */
					fprintf(pFile, "%s", pcOut);
					printf("%s",pcOut);
					free(pcOut);
				}
				fclose(pFile);
			}
		}
			break;
		case 4:
		/*  Wide Character to UTF8 */
		{
			/* Write to file */
			pFile = fopen("wide.txt", "r");
			if (pFile != NULL) {
				/* Wide character file operation */
				while(fgetws(awArr, WCMAXLEN, pFile)){
					printf("\n Wide character string: %ls", awArr);
					if (setlocale(LC_ALL, "en_US.utf-8") == NULL) {
						printf("\n Locale failed");
					}
					/* Convert to multibyte */
					iLength = wcstombs(acString, awArr, WCMAXLEN);
					printf("\n UTF8 String : ");
					/* Convert to UTF */
					for(iLoopIndex=0;iLoopIndex<iLength-1;iLoopIndex++){
						/* if the character is less than 128 then leave it as it is since anything less than 128 is represented in binary as 0xxxxxxx */
						if(awArr[iLoopIndex] < 128){
							pcOut = (char *)calloc(2, sizeof(char));
							pcOut[0] = acString[iLoopIndex];
							pcOut[1] = '\0';
						}
						else{
							/* If the character is greater than 128, then it is represented as 110xxxxx 10xxxxxx  ie.e. 2 bytes. So for getting the first byte we right shift the character 6 times  and or it with 0xC0 (11000000) i.e. c>>6 = 000xxx, then 000xxxxx OR 11000000 = 110xxxxx. For the second byte we need the lower 6 bits, so just block the first 2 bits  i.e. (00111111 AND xxxxxxxx) OR 10000000 = 10xxxxxx */
							pcOut = (char *)calloc(3, sizeof(char));
							pcOut[1] = (awArr[iLoopIndex] >> 6) | 0xC0;
							pcOut[0] = (awArr[iLoopIndex] & 0x3F) | 0x80;
							pcOut[2] = '\0';
						}
						printf("%s",pcOut);
					}
				}
			}
			else{
				printf("\n Error in opening...");
			}
		}
			break;
		case 5:
		/* UTF8 to Wide Character */
		{
			pFile = fopen("utf.txt", "r");
			if (pFile != NULL) {
				/* Reading directly as wide character */
				while(fgetws(awArr, WCMAXLEN, pFile)){
					printf("\n Wide Character : %ls",awArr);
				}
				fclose(pFile);
			}
			else{
				printf("\n Error in opening...");
			}
		}
			break;
		default:
			;
			break;
		}
		printf("\n\n Press any key to continue...");
		getchar();
	} while (iChoice != 0);
	return 0;
}
Be a part of the DaniWeb community

We're a friendly, industry-focused community of developers, IT pros, digital marketers, and technology enthusiasts meeting, networking, learning, and sharing knowledge.