for those of you who seeking the answer of this question:
you should do some manipulation with Unicode characters.
Every Persian character that you normally know (for example س ش ت ظ), actually has 4 different forms and each form has its own Unicode characters.
- initial form
- medial form
- final form
- isolated form
Let's say the word سا
in this case, the initial س has a Unicode character which is different than the س in راس which comes at the end of the word.
for better understanding, look at the picture below
![enter image description here]()
how can you get Unicode characters of each form?
just go to this website https://www.compart.com/en/unicode/ and search your character.
please note that, Arabic and Persian almost use the same script, that's why in the picture that I uploaded, it says "Arabic letter Seen isolated form" for س character.
Here is a class that you can use to calculate 4 different forms of Persian characters:
public class PersianCharachtersUnicode {
char c;
private String InitialFom_Unicode;
private String MedialForm_Unicode;
private String FinalForm_Unicode;
private String IsolatedForm_Unicode;
public void setCharc (char c) {
this.c = c;
calculate();
}
private void calculate() {
switch (c) {
case 'آ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\0";
IsolatedForm_Unicode = "\uFE81";
break;
case 'ا':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFE8E";
IsolatedForm_Unicode = "\uFE8D";
break;
case 'ب':
InitialFom_Unicode = "\uFE91";
MedialForm_Unicode = "\uFE92";
FinalForm_Unicode = "\uFE90";
IsolatedForm_Unicode = "\uFE8F";
break;
case 'پ':
InitialFom_Unicode = "\uFB58";
MedialForm_Unicode = "\uFB59";
FinalForm_Unicode = "\uFB57";
IsolatedForm_Unicode = "\uFB56";
break;
case 'ت':
InitialFom_Unicode = "\uFE97";
MedialForm_Unicode = "\uFE98";
FinalForm_Unicode = "\uFE96";
IsolatedForm_Unicode = "\uFE95";
break;
case 'ث':
InitialFom_Unicode = "\uFE9B";
MedialForm_Unicode = "\uFE9C";
FinalForm_Unicode = "\uFE9A";
IsolatedForm_Unicode = "\uFE99";
break;
case 'ج':
InitialFom_Unicode = "\uFE9F";
MedialForm_Unicode = "\uFEA0";
FinalForm_Unicode = "\uFE9E";
IsolatedForm_Unicode = "\uFE9D";
break;
case 'چ':
InitialFom_Unicode = "\uFB7C";
MedialForm_Unicode = "\uFB7D";
FinalForm_Unicode = "\uFE9B";
IsolatedForm_Unicode = "\uFB7A";
break;
case 'ح':
InitialFom_Unicode = "\uFEA3";
MedialForm_Unicode = "\uFEA4";
FinalForm_Unicode = "\uFEA2";
IsolatedForm_Unicode = "\uFEA1";
break;
case 'خ':
InitialFom_Unicode = "\uFEA7";
MedialForm_Unicode = "\uFEA8";
FinalForm_Unicode = "\uFEA6";
IsolatedForm_Unicode = "\uFEA5";
break;
case 'د':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAA";
IsolatedForm_Unicode = "\uFEA9";
break;
case 'ذ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAC";
IsolatedForm_Unicode = "\uFEAB";
break;
case 'ر':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEAE";
IsolatedForm_Unicode = "\uFEAD";
break;
case 'ز':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEB0";
IsolatedForm_Unicode = "\uFEAF";
break;
case 'ژ':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFB8B";
IsolatedForm_Unicode = "\uFB8A";
break;
case 'س':
InitialFom_Unicode = "\uFEB3";
MedialForm_Unicode = "\uFEB4";
FinalForm_Unicode = "\uFEB2";
IsolatedForm_Unicode = "\uFEB1";
break;
case 'ش':
InitialFom_Unicode = "\uFEB7";
MedialForm_Unicode = "\uFEB8";
FinalForm_Unicode = "\uFEB6";
IsolatedForm_Unicode = "\uFEB5";
break;
case 'ص':
InitialFom_Unicode = "\uFEBB";
MedialForm_Unicode = "\uFEBC";
FinalForm_Unicode = "\uFEBA";
IsolatedForm_Unicode = "\uFEB9";
break;
case 'ض':
InitialFom_Unicode = "\uFEBF";
MedialForm_Unicode = "\uFEC0";
FinalForm_Unicode = "\uFEBE";
IsolatedForm_Unicode = "\uFEBD";
break;
case 'ط':
InitialFom_Unicode = "\uFEC3";
MedialForm_Unicode = "\uFEC4";
FinalForm_Unicode = "\uFEC2";
IsolatedForm_Unicode = "\uFEC1";
break;
case 'ظ':
InitialFom_Unicode = "\uFEC7";
MedialForm_Unicode = "\uFEC8";
FinalForm_Unicode = "\uFEC6";
IsolatedForm_Unicode = "\uFEC5";
break;
case 'ع':
InitialFom_Unicode = "\uFECB";
MedialForm_Unicode = "\uFECC";
FinalForm_Unicode = "\uFECA";
IsolatedForm_Unicode = "\uFEC9";
break;
case 'غ':
InitialFom_Unicode = "\uFECF";
MedialForm_Unicode = "\uFED0";
FinalForm_Unicode = "\uFECE";
IsolatedForm_Unicode = "\uFECD";
break;
case 'ف':
InitialFom_Unicode = "\uFED3";
MedialForm_Unicode = "\uFED4";
FinalForm_Unicode = "\uFED2";
IsolatedForm_Unicode = "\uFED1";
break;
case 'ق':
InitialFom_Unicode = "\uFED7";
MedialForm_Unicode = "\uFED8";
FinalForm_Unicode = "\uFED6";
IsolatedForm_Unicode = "\uFED5";
break;
case 'ک':
InitialFom_Unicode = "\uFB90";
MedialForm_Unicode = "\uFB91";
FinalForm_Unicode = "\uFB8F";
IsolatedForm_Unicode = "\uFB8E";
break;
case 'گ':
InitialFom_Unicode = "\uFB94";
MedialForm_Unicode = "\uFB95";
FinalForm_Unicode = "\uFB93";
IsolatedForm_Unicode = "\uFB92";
break;
case 'ل':
InitialFom_Unicode = "\uFEDF";
MedialForm_Unicode = "\uFEE0";
FinalForm_Unicode = "\uFEDE";
IsolatedForm_Unicode = "\uFEDD";
break;
case 'م':
InitialFom_Unicode = "\uFEE3";
MedialForm_Unicode = "\uFEE4";
FinalForm_Unicode = "\uFEE2";
IsolatedForm_Unicode = "\uFEE1";
break;
case 'ن':
InitialFom_Unicode = "\uFEE7";
MedialForm_Unicode = "\uFEE8";
FinalForm_Unicode = "\uFEE6";
IsolatedForm_Unicode = "\uFEE5";
break;
case 'و':
InitialFom_Unicode = "\0";
MedialForm_Unicode = "\0";
FinalForm_Unicode = "\uFEEE";
IsolatedForm_Unicode = "\uFEED";
break;
case 'ه':
InitialFom_Unicode = "\uFEEB";
MedialForm_Unicode = "\uFEEC";
FinalForm_Unicode = "\uFEEA";
IsolatedForm_Unicode = "\uFEE9";
break;
case 'ی':
InitialFom_Unicode = "\uFBFE";
MedialForm_Unicode = "\uFBFF";
FinalForm_Unicode = "\uFBFD";
IsolatedForm_Unicode = "\uFBFC";
break;
default:
break;
}
}
/**
* @return the initialFom_Unicode
*/
public String getInitialFom_Unicode() {
return InitialFom_Unicode;
}
/**
* @return the finalForm_Unicode
*/
public String getFinalForm_Unicode() {
return FinalForm_Unicode;
}
/**
* @return the isolatedForm_Unicode
*/
public String getIsolatedForm_Unicode() {
return IsolatedForm_Unicode;
}
/**
* @return the medialForm_Unicode
*/
public String getMedialForm_Unicode() {
return MedialForm_Unicode;
}
}