個人計算機(PC)の文書を計算機網(internet)形式の文書への変換道具
/* PC符号対広域通信符号変換具 */
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define COPYRIGHT     "(C)Rick Wakatori(若鳥陸夫),1988"
#define VERSION       "[Version 4.82 (1995-01-09)]"
#define NUL       0x00
#define SOH       0x01
#define ETX       0x03
#define EOT       0x04
#define ENQ       0x05
#define ACK       0x06
#define BS        0x08
#define HT        0x09
#define LF        0x0A
#define CR        0x0D
#define SO        0x0E
#define SI        0x0F
#define DLE       0x10
#define DC1       0x11
#define DC3       0x13
#define SYN       0x16
#define SUB       0x1A
#define ESC       0x1B
#define FS        0x1C
#define SP        0x20
#define PLUS      0x2C
#define PERIOD    0x38
#define DEL       0x7F
#define EOL        NUL
#define LINELENGTH  80
#define StrMax    4000
#define BUFSIZE  32000
#define LTAB         8
#define FALSE        0
#define TRUE         1
#define SENDC0       0
#define SENDG0       1
#define TESTKBD      1
#define SHIFTJIS     0
#define G1KANJI      1
#define ANNOUNCE_G0_G1              "\033\040\104"
#define ANNOUNCE_SS2                "\033\040\132"
#define ISO646_TO_C0                "\033\041\100"
#define OLDJISKANJI_TO_G0           "\033\044\100"
#define JISKANJI_TO_G0              "\033\044\102"
#define NJISKANJI_TO_G0             "\033\044\050\102"
#define JISKANJI_TO_G1              "\033\044\051\102"
#define JISKANA_TO_G2               "\033\044\052\111"
#define ASCII_TO_G0                 "\033\050\100"
#define SWEDISHNAME_TO_G0           "\033\050\110"
#define JISROMAN_TO_G0              "\033\050\112"
#define JISKANA_TO_G1               "\033\051\111"
#define SS2                         "\033\116"
#define ANNOUNCE                      0x20
#define C0                            0x21
#define G0                            0x28
#define G1                            0x29
#define G2                            0x2A
#define TwoByteG0                     0x24
#define OLDJISkanji                   0x40
#define ASCII                         0x42
#define JISkanji                      0x42
#define SwedishName                   0x48
#define JISkana                       0x49
#define JISroman                      0x4A
#define isKatakana(c)                 (isRange(c,0xA1,0xDF))
#define isG1kanjiByte1(c)             (isRange(c,0xA1,0xF4))
#define isG1kanjiByte2(c)             (isRange(c,0xA1,0xFF))
#define isSjkanjiSecondOrder(c)       (isRange(c,0x4F,0xF4))
#define isG1kanjiSecondOrder(c)       (isRange(c,0xCF,0xF4))
#define isSjByte1(c)                  ((isRange(c,0x0081,0x009F) ||\
					isRange(c,0x00E0,0x00EA)) ? 1 : 0)
#define isSjByte2(c)                  (isRange(c,0x40,0xFE) ? 1 : 0)
#define isSjSpace(c)                  ((c==0x8140) ? 1 : 0)
#define isOdd(c)                      ((c) % 2 != 0)
struct status {
	 int    soExist, kanji, k, jis1, jis2, sf1, sf2;
       };
struct status sd;
char es4[6] = {0,0,0,0,0,0};
int  announcess2   = FALSE;
int  fileCode      = SHIFTJIS;
int  g1Kanji       = FALSE;
int  g2kana        = FALSE;
int  jisKanji      = TRUE;
unsigned tmp;                                  /* temporary storage */
FILE *fi;

int help()
{
  printf(" SJtoJIS, Version 1.1, Copyright 1994 Rick Wakatori.         \n");
  printf(" Usage (指令文) : SJtoJIS  sourceFile [ > objectFile ]       \n");
  printf("     Where the symbols are specified as follows(意味):       \n");
  printf("     SJtoJIS    : The command header(指令語),                \n");
  printf("     sourceFile : Source file in Shiftjis code(原始ファイル),\n");
  printf("     objectFile : Object file in JIS(G0) code(目的ファイル). \n");
  printf(" Functions (機能) : 網間通信のためのファイル変換             \n");
  printf("     (a)Shiftjis code          --> JIS(G0)Kanji(符号変換),   \n");
  printf("     (b)Non-standard codes     --> Dummy character(外字削除),\n");
  printf("     (c)2 byte Alphbet/Numeric --> 1 byte ASCII,             \n");
  printf("     (d)1 byte Katakana        --> 2 byte Katakana,          \n");
  printf("     (e)Tab code               --> Maximum 8 space codes,    \n");
  printf("     (f)Kanji space            --> 2 spaces in ASCII.        \n");
  printf(" ----------------------------------------------------------- \n");
  return 1;
} /* help */

int isRange(unsigned x,unsigned y,unsigned z)
{
   return ((x)>=(y) && (x)<=(z));
}

writeChar(int c)
{
   if (c!=DEL) printf("%c",c);
}

sendAlphDesignator()
{
    if (! g1Kanji){
	writeChar(ESC); writeChar(G0); writeChar(ASCII);
    }
    else writeChar(SI);
    sd.kanji = FALSE;
} /* sendAlphDesignator */

sendKanjiDesignator()
{
   if (! g1Kanji){
	writeChar(ESC); writeChar(TwoByteG0); writeChar(JISkanji);
   }
   else writeChar(SO);
   sd.kanji = TRUE; sd.k = 1;
} /* sendKanjiDesignator */

sendC(int method,int c)
{
    writeChar(c);
    switch (method) {
      case SENDC0   : sd.k = 0; break;
      case SENDG0   : sd.sf1 = sd.sf2 = NUL; sd.k = 0; break;
    } /* switch method*/
} /* sendC */

sendAnnounceG0()
{   sendC(SENDC0, SI);
    sendStr(ANNOUNCE_G0_G1);             /* ESC 2/0 4/4      */
    sendStr(ISO646_TO_C0);               /* ESC 2/1 4/0      */
    sendStr(JISROMAN_TO_G0);             /* ESC 2/8 4/10     */
    sendStr(JISKANA_TO_G1);              /* ESC 2/9 4/9      */
    g1Kanji = FALSE;
} /* sendAnnounceG0 */

sendAnnounce_G1()
{   sendC(SENDC0, SI);
    sendStr(ANNOUNCE_G0_G1);            /* ESC 2/0 4/4      */
    sendStr(ANNOUNCE_SS2);              /* ESC 2/0 5/10     */
    sendStr(ISO646_TO_C0);              /* ESC 2/1 4/0      */
    sendStr(JISROMAN_TO_G0);            /* ESC 2/8 4/10     */
    sendStr(JISKANJI_TO_G1);            /* ESC 2/4 2/9 4/2  */
    sendStr(JISKANA_TO_G2);             /* ESC 2/4 2/10 4/9 */
} /* sendAnnounce_G1 */

sendKanji(int c)
{
   writeChar((c & 0x7F00)>>8); writeChar(c & 0x007F);
   c = NUL; sd.k = 1;
} /* sendKanji */

send(int c)
{
    if (! jisKanji)
	 sendC(SENDG0, c);
    else if (isG1kanjiByte1((c & 0xFF00)>>8)){
	 if (! sd.kanji) sendKanjiDesignator();
	 sendKanji(c);
    }
    else if (iscntrl(c) || isspace(c)){
	 if (sd.kanji) sendAlphDesignator(); /* for RFC 1468 only */
	 sendC(SENDC0, c);
    }
    else {
	 if (sd.kanji) sendAlphDesignator();
	 sendC(SENDG0, c);
    }
} /* send */

sendStr(char *s)
{ char *i;

  i = s;
  while (*i!=NUL){
    send(*i); i++;
  }
} /* sendStr */

int error(int err, char *s)      /* display error status      */
{
  int r;

  r = 1;
  switch(err){
  case 0 : printf("[WARNING: %s,%d]\n",s); break;
  case 1 : printf("[ERROR: %s]\n",s);
	   r =0;
	   break;
  }
  return r;
} /* error */

int s2j(unsigned s, unsigned *j)/* convert shiftjis into JIS(G1) code */
{
  unsigned s1, s2;

  s1 = s & 0xFF00; s2 = s & 0x00FF;

  if (s1 <= 0x9F00)
       *j = (s1 - 0x7000) + (s1 - 0x7000);
  else *j = (s1 - 0xB000) + (s1 - 0xB000);
  if (s2 < 0x009F){
       if ((s2 >= 0x0040) && (s2 <= 0x007E))
	    *j += (s2 - 0x001F);
       else if ((s2 >= 0x0080) && (s2 < 0x009F))
	    *j += (s2 - 0x0020);
       *j -= 0x0100;
     }
  else *j += (s2 - 0x007E);
  *j |= 0x8080;
  return 1;
} /* s2j */

int insertSP(int i, int j, unsigned s[StrMax])/* tab code into spaces */
{
    int k;
    
    if (((j % LTAB) == 1) && ((j + LTAB) < StrMax)){
	for (k=1;k<=LTAB;k++){
	  s[j++] = SP; 
        }
    }
    else {
         while (((j % LTAB) != 1) && (j <= s[0] + LTAB)){
	    s[j++] = SP;
         }
    }
    s[0] = j - 1;
    return j;
} /*insertSP*/

int readLine(FILE *fi, unsigned s[StrMax])       /* read a line into s */
{
  int i=0, IOresult, j=1, k, r;
  unsigned c1, c2, c, jis[1], tmp;
  char b[StrMax];

  s[0] = 0;
  if (!feof(fi)){
      r=(char)fgets(b,StrMax,fi); k=strlen(b); s[0]=0;/* read a line   */
      while ((i= StrMax) r=error(1,"String overflow.");
      else s[0]=j-1;                               /* put the length */
  }
  return r;
} /*readLine*/

int alph2ASCII(unsigned s[StrMax],unsigned t[StrMax])  /* to 2 byte AN */
{   int i=1, j=1;
    unsigned k;

    t[0] = 0;
    while ( i <= s[0]) {
	if (!isRange(s[i],0xA3B0,0xA3FA)){         /* copy a non AN */
		t[j] = s[i];
	}
	else {  k = s[i] & 0x00FF;
		if (isRange(k,0xB0,0xB9))          /* 0..9 */
				  t[j] = k - 0x80;
		else if (isRange(k,0xC1,0xDA))     /* Capital A..Z */
				  t[j] = k - 0x80;
		else if (isRange(k,0xE1,0xFA))     /* Small a..z   */
				  t[j] = k - 0x80;
	}
	i++; j++;
    }
    t[0] = j-1;                        /* put the length             */
    return 1;
} /* alph2ASCII */

int kana2Kanji(unsigned s[StrMax],unsigned t[StrMax]) /* to 2 byte kana*/
{   int i=1, j=1;
    unsigned k, next;

    t[0] = 0;
    while ( i <= s[0]) {
	if ((s[i] <= 0xA0) || (0xE0 <= s[i])){/* copy a non kana code */
		t[j] = s[i];
	}
	else {  k = s[i];
		if ((i <= s[0]-1) && (s[i + 1] <= 0x00FF))
		    next = s[i + 1];
		else next = NULL;
		if (k==0xA1)      t[j] = 0x2123;        /* ku_ten    */
		else if (k==0xA2) t[j] = 0x2156;        /* kagi_ue   */
		else if (k==0xA3) t[j] = 0x2157;        /* kagi_shita*/
		else if (k==0xA4) t[j] = 0x2122;        /* tou_ten   */
		else if (k==0xA5) t[j] = 0x2126;        /* naka_ten  */
		else if (k==0xA6) t[j] = 0x2572;        /* wo        */
		else if (isRange(k,0xA7,0xAB))          /* small_a..o*/
				  t[j] = 0x2521 + 2 * (k - 0xA7);
		else if (isRange(k,0xAC,0xAE))       /* small ya..yo */
				  t[j] = 0x2563 + 2 * (k - 0xAC);
		else if (k==0xAF) t[j] = 0x2543;        /* small_tsu */
		else if (k==0xB0) t[j] = 0x213C;        /* chouon    */
		else if (isRange(k,0xB1,0xB5))          /* a..o      */
				  t[j] = 0x2522 + 2 * (k - 0xB1);
		else if (isRange(k,0xB6,0xC1)) {        /* ka..chi   */
				  t[j] = 0x252B + 2 * (k - 0xB6);
				  if (next==0xDE){      /* ga..di    */
					t[j]++; i++;
				  }
		}
		else if (isRange(k,0xC2,0xC4)) {        /* tsu..to   */
				 t[j] = 0x2544 + 2 * (k - 0xC2);
				 if (next==0xDE){       /* zu..do    */
					t[j]++; i++;
				 }
		}
		else if (isRange(k,0xC5,0xC9))          /* na..nu    */
				 t[j] = 0x254A + k - 0xC5;
		else if (isRange(k,0xCA,0xCE)){         /* ha..ho    */
				 t[j] = 0x254F + 3 * (k - 0xCA);
				 if (next == 0xDE){     /* ba..bo    */
				     t[j]++; i++;
				 }
				 else if (next == 0xDF){/* pa..po    */
				     t[j]+=2; i++;
				 }
		}
		else if (isRange(k,0xCF,0xD3))
				 t[j]=0x255E + k - 0xCF;/* ma..mo    */
		else if (isRange(k,0xD4,0xD6))
				 t[j]=0x2564 + 2*(k - 0xD4);/* ya..yo*/
		else if (isRange(k,0xD7,0xDB))
				 t[j]=0x2569 + k - 0xD7;/* ra..ro    */
		else if (k==0xDC)      t[j] = 0x256F;   /* wa        */
		else if (k==0xDD)      t[j] = 0x2573;   /* n'        */
	     }
	     if (0x2121 <= t[j])       /* if kanji code then         */
		  t[j] |= 0x8080;      /* invoke the code into right */
	i++; j++;
	}
    t[0] = j-1;                        /* put the length             */
    return 1;
} /* kana2Kanji */

int deleteNon(unsigned s[StrMax], unsigned t[StrMax])/*delete non standard*/
{
  int i, k, standard;

  for (i=1; i<=s[0];i++){
	standard = 1;
	k=s[i] & 0x7F7F;                        /* strip 0x 8080 off */
	if (s[i] <= 0x00FF) t[i]=s[i];          /* copy ASCII        */
	else {
	   switch (k & 0x7F00){      /* Ranges of non JIS kanji code */
	   case 0x2200 : if (isRange(k,0x222F,0x2239)||
			     isRange(k,0x2242,0x2249)||
			     isRange(k,0x2251,0x225B)||
			     isRange(k,0x226B,0x2271)||
			     isRange(k,0x227A,0x227D))
			     standard=0;
			 break;
	   case 0x2300 : if (isRange(k,0x2321,0x232F)||
			     isRange(k,0x233A,0x2340)||
			     isRange(k,0x235B,0x2360)||
			     isRange(k,0x237B,0x237E))
			     standard=0;
			 break;
	   case 0x2400 : if (isRange(k,0x2474,0x247E))
			     standard=0;
			 break;
	   case 0x2500 : if (isRange(k,0x2577,0x257E))
			     standard=0;
			 break;
	   case 0x2600 : if (isRange(k,0x2639,0x2640)||
			     isRange(k,0x2659,0x267E))
			     standard=0;
			 break;
	   case 0x2700 : if (isRange(k,0x2742,0x2750)||
			     isRange(k,0x2772,0x277E))
			     standard=0;
			 break;
	   case 0x2800 : if (isRange(k,0x2841,0x287E))
			     standard=0;
			 break;
	   case 0x2900 : case 0x2A00 : case 0x2B00 :
	   case 0x2C00 : case 0x2D00 : case 0x2E00 :
	   case 0x2F00 : standard=0;
		     	 break;
	   case 0x4F00 : if (isRange(k,0x4F54,0x4F7E))
			     standard=0;
			 break;
	   case 0x7400 : if (isRange(k,0x7427,0x747E))
			     standard=0;
			 break;
	   case 0x7500 : case 0x7600 : case 0x7700 :
	   case 0x7800 : case 0x7900 : case 0x7A00 :
	   case 0x7B00 : case 0x7C00 : case 0x7D00 :
	   case 0x7E00 : standard=0;
	   }
	   if (standard==1) t[i] = s[i];/* if standard then copy char */
	   else t[i] = (0x222E | 0x8080);/* else replace by alternative.*/
	}
    }
    t[0] = i-1;                                  /* put the length */
    return 1;
} /* deleteNon */

int convert(unsigned s[StrMax], unsigned t[StrMax])   /* converts into t */
{
  unsigned u[4000], v[StrMax];

  deleteNon(s, u);  /* change non standard code into dummy character. */
  alph2ASCII(u, v); /* 2 byte alphbet and numeric to ASCII            */
  kana2Kanji(v, t); /* 1 byte kana char to 2 bytes kana character.    */
  return 1;
} /* convert */

int writeResult(unsigned t[StrMax])
{ int i;

  for (i=1;i<=t[0];i++)
    if (t[i]!=SUB) send(t[i]);
  return 1;
} /* writeResult */

int main(int argc,char *argv[])      /* usage : sj2jis source object */
{
  unsigned s[StrMax];              /* s[0] : number of source elements */
  unsigned t[StrMax];              /* t[0] : number of object elements */

  switch (argc) {
    case 0  : case 1  : help(); break;            /* call help message */
    case 2  : break;                              /* success testing   */
    default : error(0,"A souce file shall be specified.");
  }
  if (argc >1) {
	if ((fi = fopen(argv[1],"r"))==NULL)    /* open source file        */
	  error(1,strcat(strcat("Source ", argv[1])," could not open."));
    else {
	  while (!feof(fi)){
            if (readLine(fi,s)!=0){             /* read a line into euc    */
	       convert(s,t);                    /* adjust interchange code */
	       writeResult(t);                  /* write the result        */
	    }
	  }
	  fclose(fi);                           /* close source file       */
    }
  }
  return 0;
} /* main */