共通なSGML文書の生成の模型プログラム

/*****
*  MAKE_SGM.C - Sample program for making a SGML file for JEIDA-IC.
*
*        Version 1, Release 1 (1994.08.03T00.45.00UTC)
*
*  Copyright (c) 1994 Rick Wakatori(Nihon Unisys,Ltd). All rights reserved.
*
*  NOTE: This program is aimed to provide an initial simple tool
*        for CALS users which adds tags in any non-sgml-document for
*        making the common document for JEIDA-Information Center.
*        The source file code-sets is assumed to be "Shiftjis" or
*        a combination of ASCII and JIS(G1) which is nearly equal
*        to EUC(Extended Unix Code). The code set selection can be
*        made by automatically by this program.
*        The object file code is the combination of ASCII for left
*        side and JIS(G1) Kanji for right side of the code table.
*        Each code is converted into two or more bytes of "unsigned
*        integer type called "wChar" in the program. 
*        Out of JIS codes are be substituted by a dummy character
*        to keep an open interchange of document.
*        Alphabet and Numeric characters in JIS kanji code are converted
*        by the program to ASCII. 
*        It intentionally contains bugs.
*   ============= Comment in Japanese ================
*   著作権者  : 若鳥陸夫(日本ユニシス株,ただし,改良部分を除く。)
*   配布形式  : 配布は,次の(1)~(3)の条件を満たすこと。
*                     (1)無料配布とする。
*                     (2)すべての原始軟件を添付する。
*                     (3)一次著作権表示を表示する。
*   外部仕様概要: 1.文書記述言語SGML(JIS X 4151)によるタグを追加する。
*                   2.適用する文書型定義(DTD)は,日本電子工業振興協会
*                     CALS情報センターの一般文書であって,公開識別子
*                     "JEIDA c_document"とする。 
*                   3.原始文書の符号は,無指定の場合EUC,特に指定した
*                     場合だけシフトジスとする。
*                   4.目的ファイル(交換用)の符号系は,JIS(G1)漢字符号
*                     に統一する。これは,英数字及び漢字に関する限り,
*                     EUCとほぼ等しい。
*                   5.原始文書中の漢字符号の文字の扱いは,次の(a)~(g)
*                     のとおりとする。
*                     (a)標準外の文字(外字)は,埋め文字“〓”に置換
*                        する。
*                     (b)漢字間隔は,ASCIIの間隔2文字に置換する。
*                     (c)1バイト片仮名は,2バイト片仮名に置換する。
*                     (d)漢字符号中の英数字は,ASCIIの英数字に置換する。 
*                     (e)タブ符号は,8個の間隔に置換する。
*                     (f)CR符号は,削除しLF符号だけとする。
*                     (g)SUB符号(EOFに流用のことがある。)は,削除する。
*   免責       : 著者は,このプログラムの如何なる誤り及び使用によっ
*                   て利用者が被る如何なる損害も責任を持たない。
*   参考    : この軟件は,Unisys 6000シリーズのANSI Cによって,
*                   開発し,簡単な文例での試験だけを終了している。
*   ================ unquote in Japanese ====================  
******/

#include <stdio.h>
#include <string.h>
#include <ctype.h>

#define MAXBUF     1024
#define wChar      unsigned int
#define isSJkanji1(x)(((((x)>= 0x81) && ((x)<= 0x98)) || \
                       (((x)>= 0x98) && ((x)<= 0xEA))) ? 1 : 0)
#define isJISkanji1(x)((((x)>= 0xA1) && ((x)<= 0xF4))  ? 1 : 0)
#define isSJkanji2(x) ((((x)>= 0x40) && ((x)<= 0xFC))  ? 1 : 0)
#define isJISkanji2(x)((((x)>= 0xA1) && ((x)<= 0xFE))  ? 1 : 0)
#define isKanjiSpace(x)(((sj)&&(x==0x8140))||((!sj)&&(x==0xA1A1)) ? 1:0)

int debug          = 0;          /* Flag for debugging mode             */
int sj             = 0;          /* Flag for Shiftjis code              */
char  bf[MAXBUF];                /* Single byte buffer for reading file */
char  bf1[MAXBUF];               /* Single byte buffer for arguments    */
FILE *fi;                        /* Source document file                */
FILE *fo;                        /* Object documentt file               */

int wStrLen(wChar a[MAXBUF])
{ int i = 0;

  while ((a[i] != (wChar)NULL)&&(i < MAXBUF)) i++;
  return i;
} /* wStrLen */

void printWchar(FILE *fo, wChar w)
{ 
  if (((sj)&&(isSJkanji1((int)((0xFF00 & w) >> 8)))) ||
      ((!sj)&&(isJISkanji1((int)((0xFF00 & w) >> 8))))) 
       fprintf(fo,"%c%c",(0xFF00 & w) >> 8, (0x00FF & w));
  else fprintf(fo,"%c",(0x00FF & w)); 
} /* printWchar */

int wPrintLine(FILE *fo, wChar s[MAXBUF])
{  int i, limit;

   limit = wStrLen(s);
   for (i=0;i < limit; i++)
     printWchar(fo,s[i]);
   fputs("",fo);
} /* wPrintLine */

int help_msg(void)
{
  printf(" Usage : make_sgm {[-d] | [-s]}  \n");
  printf(" where -d : Trace the program as option\n");
  printf("       -s : Source document file code is Shiftjis as option\n");
  return 1;
} /* help_msg */

int insertSP(int wp, int *j, wChar t[MAXBUF]) /*tab code into spaces */
{
    int i;

    if (((wp % 8) == 1) && ((wp + 8) < MAXBUF))
	for (i=1;i<=8;i++)
		t[*++j] = 0x20;
    else while (((wp % 8) != 1) && (*j < MAXBUF))
	t[*++j] = 0x20;
    return wp - 1;
} /*insertSP*/

int s2j(wChar s, wChar *j)/* convert shiftjis into JIS(G1) code */
{
  wChar s1, s2;

  s1 = s & 0xFF00; s2 = s & 0x00FF;
  if (s1 <= 0x9F00)
       *j = (s1 - 0x7000) + (s1 - 0x7000);
  else *j = (s1 - 0xB000) + (s1 - 0xB000);
  if (s2 < 0x009F){
       if ((s2 >= 0x0040) && (s2 <= 0x007E))
	    *j += (s2 - 0x001F);
       else if ((s2 >= 0x0080) && (s2 < 0x009F))
	    *j += (s2 - 0x0020);
       *j -=0x0100;
     }
  else *j += (s2 - 0x007E);
  *j |= 0x8080;
  return 1;
} /* s2j */

int isRange(wChar x,wChar y,wChar z)
{
   return ((x)>=(y) && (x)<=(z));
}

int getWstr(FILE *fi,wChar s[MAXBUF])      /* read a line into s */
{
  int i=0, j=0, k, r=0;
  wChar c1, c2, jis[2], tmp;
  char b[MAXBUF];

  if ((r = feof(fi)) == NULL){
      fgets(b, MAXBUF,fi);  k=strlen(b); s[0]=0;   /* read a line   */
      while ((i < k) && (i < MAXBUF)){
	    c1 = b[i++] & 0x00FF;                  /* strip off     */
	    if (c1==0x09) insertSP(i,&j,s);        /* tab to spaces */
	    else if (isRange(c1,0x0000,0x001F) &&  /* control codes */
		    (c1!=0x0A))
		    i++;
	    else if (((sj)&&(isSJkanji1(c1)))|| /* first half of kanji code */
                     (!sj)&&(isJISkanji1(c1))){ 
		       tmp=c1<<8 & 0xFF00;
		       c2=b[i++] & 0x00FF;
		       tmp |= c2;
		       if ((i < k) && isKanjiSpace(tmp)){
			  s[j++]=0x20; s[j++]=0x20; /* 2 spaces */
		       }
		       else {
			  if (((sj)&&(!isSJkanji2(c2)))||
                              ((!sj)&&(!isJISkanji2(c2))))
				s[j++]=0xA2AE;     /* alt character */
			  else {
				if (sj)
                                   s2j(tmp, &s[j++]);  /* convert a code */
                                else 
                                   s[j++] = tmp;  
			  }
		      }
	    }
	    else s[j++]=c1;                        /* store the code */
      }
      if (j >= MAXBUF) puts("FATAL, String overflow.");
      else s[j]=NULL;                               /* put the length */
  }
  return r;
} /* getWstr */

int kanji2ASCII(wChar s[MAXBUF], wChar t[MAXBUF]) /*to 1 byte number*/
{ int i, limit;
  wChar k;

  limit = wStrLen(s);
  for (i=0; i < limit;i++){
	k=s[i] & 0x7F7F;                        /* strip 0x8080 off  */
	if (0x2330<=k && k<=0x2339)             /* numbers           */
	    t[i] = k & 0x00FF;
	else if (0x2341 <=k && k <= 0x235A)     /* capital alphabets */
	    t[i] = k & 0x00FF;
	else if (0x2361 <= k && k <= 0x237A)    /* small alphabets   */
	    t[i] = k & 0x00FF;
	else t[i] = s[i];                       /* copy as is        */
  }
  t[i] = NULL;
  return 1;
} /* kanji2ASCII */

int kana2Kanji(wChar s[MAXBUF],wChar t[MAXBUF])  /* to 2 byte kana */
{   int i=0, j=0, limit;
    wChar k, next;

    limit = wStrLen(s);
    while (i < limit) {
	if ((s[i] <= 0xA0) || (0xE0 <= s[i])){/* copy a non kana code */
		t[j] = s[i];
	}
	else {  k = s[i];
		if ((i <= limit-1) && (s[i + 1] <= 0x00FF))
		    next = s[i + 1];
		else next = 0;
		if (k==0xA1)      t[j] = 0x2123;        /* ku_ten    */
		else if (k==0xA2) t[j] = 0x2156;        /* kagi_ue   */
		else if (k==0xA3) t[j] = 0x2157;        /* kagi_shita*/
		else if (k==0xA4) t[j] = 0x2122;        /* tou_ten   */
		else if (k==0xA5) t[j] = 0x2126;        /* naka_ten  */
		else if (k==0xA6) t[j] = 0x2572;        /* wo        */
		else if (isRange(k,0xA7,0xAB))          /* small_a..o*/
				  t[j] = 0x2521 + 2 * (k-0xA7);
		else if (isRange(k,0xAC,0xAE))        /* small ya..yo */
				  t[j] = 0x2563 + 2 * (k-0xAC);
		else if (k==0xAF) t[j] = 0x2543;        /* small_tsu */
		else if (k==0xB0) t[j] = 0x213C;        /* chouon    */
		else if (isRange(k,0xB1,0xB5))          /* a..o      */
				  t[j] = 0x2522 + 2 * (k-0xB1);
		else if (isRange(k,0xB6,0xC1)) {        /* ka..chi   */
				  t[j] = 0x252B + 2 * (k-0xB6);
				  if (next==0xDE){      /* ga..di    */
					t[j]++; i++;
				  }
		}
		else if (isRange(k,0xC2,0xC4)) {        /* tsu..to   */
				 t[j] = 0x2544 + 2 * (k-0xC2);
				 if (next==0xDE){       /* zu..do    */
					t[j]++; i++;
				 }
		}
		else if (isRange(k,0xC5,0xC9))          /* na..nu    */
				 t[j] = 0x254A + k - 0xC5;
		else if (isRange(k,0xCA,0xCE)){         /* ha..ho    */
				 t[j] = 0x254F + 3 * (k - 0xCA);
				 if (next == 0xDE){     /* ba..bo    */
				     t[j]++; i++;
				 }
				 else if (next == 0xDF){/* pa..po    */
				     t[j]+=2; i++;
				 }
		}
		else if (isRange(k,0xCF,0xD3))
				 t[j]=0x255E + k-0xCF;  /* ma..mo    */
        	else if (isRange(k,0xD4,0xD6))
				 t[j]=0x2564 + 2 * (k-0xD4);/* ya..yo    */
		else if (isRange(k,0xD7,0xDB))
				 t[j]=0x2569 + k - 0xD7;/* ra..ro    */
		else if (k==0xDC)      t[j] = 0x256F;   /* wa        */
		else if (k==0xDD)      t[j] = 0x2573;   /* n'        */
	     }
	     if (0x2121 <= t[j])       /* if kanji code then         */
		  t[j] |= 0x8080;      /* invoke the code into right */
	i++; j++;
	}
    t[j] = NULL;
    return 1;
} /* kana2Kanji */

int deleteNon(wChar s[MAXBUF],wChar t[MAXBUF])/*delete non standard*/
{
  int i, limit, standard;
  wChar k;

  limit = wStrLen(s);
  for (i=0; i < limit;i++){
	standard = 1;
	k=s[i] & 0x7F7F;                       /* strip 0x 8080 off */
	if (s[i] <= 0x00FF) t[i]=s[i];         /* copy ASCII        */
	else {
	   switch (k & 0x7F00){     /* Ranges of non JIS kanji code */
	   case 0x2200 : if (isRange(k,0x222F,0x2239)||
			     isRange(k,0x2242,0x2249)||
			     isRange(k,0x2251,0x225B)||
			     isRange(k,0x226B,0x2271)||
			     isRange(k,0x227A,0x227D))
			     standard=0;
			 break;
	   case 0x2300 : if (isRange(k,0x2321,0x232F)||
			     isRange(k,0x233A,0x2340)||
			     isRange(k,0x235B,0x2360)||
			     isRange(k,0x237B,0x237E))
			     standard=0;
			 break;
	   case 0x2400 : if (isRange(k,0x2474,0x247E))
			     standard=0;
			 break;
	   case 0x2500 : if (isRange(k,0x2577,0x257E))
			     standard=0;
			 break;
	   case 0x2600 : if (isRange(k,0x2639,0x2640)||
			     isRange(k,0x2659,0x267E))
			     standard=0;
			 break;
	   case 0x2700 : if (isRange(k,0x2742,0x2750)||
			     isRange(k,0x2772,0x277E))
			     standard=0;
			 break;
	   case 0x2800 : if (isRange(k,0x2841,0x287E))
			     standard=0;
			 break;
	   case 0x2900 : case 0x2A00 : case 0x2B00 :
	   case 0x2C00 : case 0x2D00 : case 0x2E00 :
	   case 0x2F00 : standard=0;
			 break;
	   case 0x4F00 : if (isRange(k,0x4F54,0x4F7E))
			     standard=0;
			 break;
	   case 0x7400 : if (isRange(k,0x7427,0x747E))
			     standard=0;
			 break;
	   case 0x7500 : case 0x7600 : case 0x7700 :
	   case 0x7800 : case 0x7900 : case 0x7A00 :
	   case 0x7B00 : case 0x7C00 : case 0x7D00 :
	   case 0x7E00 : standard=0;
	   }
	   if (standard==1) t[i] = s[i]; /*if standard then copy char*/
	   else t[i] = (0x222E | 0x8080);/* else replace by alternative. */
	}
    }
    t[i] = NULL;                       
    return 1;
} /* deleteNon */

int convert(wChar s[MAXBUF], wChar t[MAXBUF]) /* convert s into t */
{
  wChar u[MAXBUF], v[MAXBUF];
  int i;

  deleteNon(s, u);  /* change non standard code into dummy character. */
  kana2Kanji(u, v); /* 1 byte kana char to 2 bytes kana character.    */
  kanji2ASCII(v, t);/* 2 byte kanji to 1 byte ASCII.                  */
  return 1;
} /* convert */

int get_profile(wChar *date,wChar *author,wChar *subject,
                wChar *abstract,wChar *ending,wChar *attachment)
{ wChar s[MAXBUF];

  printf("Enter the following reference data in the form specified.\n");
  printf("      EOF[Return] for the end of a data.\n");
  printf("      Date issued [in the form dd.mm.yyyy ] : \n"); 
  getWstr(stdin,s); convert(s,date);
  printf("      Author(s) [256 characters in Japanese (and English)] : \n"); 
  getWstr(stdin,s); convert(s,author);
  printf("      Subject   [256 characters in Japanese (and English)] : \n"); 
  getWstr(stdin,s); convert(s,subject);
  printf("      Abstract  [1024 characters] : \n"); 
  getWstr(stdin,s); convert(s,abstract);
  printf("      Ending if any   [1024 characters] : \n"); 
  getWstr(stdin,s); convert(s,ending);
  printf("      Attachments if any [1024 characters] : \n"); 
  getWstr(stdin,s); convert(s,attachment); 
} /* get_profile */

int put_DTD(wChar *date,wChar *author,wChar *subject,
            wChar *abstract,wChar *ending,wChar *attachment)
{ int i;
  wChar s[MAXBUF], t[MAXBUF];
  
  fputs("\n",fo);
  fputs("\n",fo);
  fputs("\n",fo);
  fputs("]>\n",fo);
  fputs("\n",fo);
  fputs("\n",fo); 
  fputs("\n",fo);
  fputs("\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,subject);
  fputs("\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,abstract);
  fputs("\n",fo);
  fputs("\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,author);
  fputs("\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,date);
  fputs("\n",fo);
  fputs("\n",fo);

  while (getWstr(fi,s)==NULL) {
    for (i=0;i\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,ending);
  fputs("\n",fo);
  fputs("\n",fo);
  wPrintLine(fo,attachment);
  fputs("\n",fo);
  fputs("\n",fo);
} /* put_DTD */

void main(int argc, char *argv[])
{
  wChar subject[MAXBUF], date[MAXBUF],author[MAXBUF],abstract[MAXBUF],
        ending[MAXBUF],attachment[MAXBUF];
  
  switch (argc) {
  case 0 :
  case 1 :
  case 2 : help_msg(); exit();
  case 3 : strcpy(bf,argv[1]); strcpy(bf1,argv[2]); break;
  case 4 : if (strcmp(argv[1],"-d")==0){
		debug = 1;
		strcpy(bf,argv[2]); strcpy(bf1,argv[3]);
	   }
	   else if (strcmp(argv[1],"-s")==0){
		sj = 1;
		strcpy(bf,argv[2]); strcpy(bf1,argv[3]);
	   }
	   else help_msg(); break;
  case 5 : if ((strcmp(argv[1],"-d")==0)||(strcmp(argv[2],"-d")==0))
                debug = 1;
	   if ((strcmp(argv[1],"-s")==0)||(strcmp(argv[2],"-s")==0))
		sj = 1;
	   strcpy(bf,argv[3]); strcpy(bf1,argv[4]);
  }

  fi = fopen( bf, "r" ); fo = fopen(bf1, "w");
  if (!fi) {
    printf( "Can't open input file %s!\n",bf);
    exit();
  }
  else {
     get_profile(subject,date,author,abstract,ending,attachment);
     put_DTD(subject,date,author,abstract,ending,attachment);
  }
  fclose(fi); fclose(fo);
} /* make_sgm */