共通なSGML文書の生成の模型プログラム
/*****
* MAKE_SGM.C - Sample program for making a SGML file for JEIDA-IC.
*
* Version 1, Release 1 (1994.08.03T00.45.00UTC)
*
* Copyright (c) 1994 Rick Wakatori(Nihon Unisys,Ltd). All rights reserved.
*
* NOTE: This program is aimed to provide an initial simple tool
* for CALS users which adds tags in any non-sgml-document for
* making the common document for JEIDA-Information Center.
* The source file code-sets is assumed to be "Shiftjis" or
* a combination of ASCII and JIS(G1) which is nearly equal
* to EUC(Extended Unix Code). The code set selection can be
* made by automatically by this program.
* The object file code is the combination of ASCII for left
* side and JIS(G1) Kanji for right side of the code table.
* Each code is converted into two or more bytes of "unsigned
* integer type called "wChar" in the program.
* Out of JIS codes are be substituted by a dummy character
* to keep an open interchange of document.
* Alphabet and Numeric characters in JIS kanji code are converted
* by the program to ASCII.
* It intentionally contains bugs.
* ============= Comment in Japanese ================
* 著作権者 : 若鳥陸夫(日本ユニシス株,ただし,改良部分を除く。)
* 配布形式 : 配布は,次の(1)~(3)の条件を満たすこと。
* (1)無料配布とする。
* (2)すべての原始軟件を添付する。
* (3)一次著作権表示を表示する。
* 外部仕様概要: 1.文書記述言語SGML(JIS X 4151)によるタグを追加する。
* 2.適用する文書型定義(DTD)は,日本電子工業振興協会
* CALS情報センターの一般文書であって,公開識別子
* "JEIDA c_document"とする。
* 3.原始文書の符号は,無指定の場合EUC,特に指定した
* 場合だけシフトジスとする。
* 4.目的ファイル(交換用)の符号系は,JIS(G1)漢字符号
* に統一する。これは,英数字及び漢字に関する限り,
* EUCとほぼ等しい。
* 5.原始文書中の漢字符号の文字の扱いは,次の(a)~(g)
* のとおりとする。
* (a)標準外の文字(外字)は,埋め文字“〓”に置換
* する。
* (b)漢字間隔は,ASCIIの間隔2文字に置換する。
* (c)1バイト片仮名は,2バイト片仮名に置換する。
* (d)漢字符号中の英数字は,ASCIIの英数字に置換する。
* (e)タブ符号は,8個の間隔に置換する。
* (f)CR符号は,削除しLF符号だけとする。
* (g)SUB符号(EOFに流用のことがある。)は,削除する。
* 免責 : 著者は,このプログラムの如何なる誤り及び使用によっ
* て利用者が被る如何なる損害も責任を持たない。
* 参考 : この軟件は,Unisys 6000シリーズのANSI Cによって,
* 開発し,簡単な文例での試験だけを終了している。
* ================ unquote in Japanese ====================
******/
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define MAXBUF 1024
#define wChar unsigned int
#define isSJkanji1(x)(((((x)>= 0x81) && ((x)<= 0x98)) || \
(((x)>= 0x98) && ((x)<= 0xEA))) ? 1 : 0)
#define isJISkanji1(x)((((x)>= 0xA1) && ((x)<= 0xF4)) ? 1 : 0)
#define isSJkanji2(x) ((((x)>= 0x40) && ((x)<= 0xFC)) ? 1 : 0)
#define isJISkanji2(x)((((x)>= 0xA1) && ((x)<= 0xFE)) ? 1 : 0)
#define isKanjiSpace(x)(((sj)&&(x==0x8140))||((!sj)&&(x==0xA1A1)) ? 1:0)
int debug = 0; /* Flag for debugging mode */
int sj = 0; /* Flag for Shiftjis code */
char bf[MAXBUF]; /* Single byte buffer for reading file */
char bf1[MAXBUF]; /* Single byte buffer for arguments */
FILE *fi; /* Source document file */
FILE *fo; /* Object documentt file */
int wStrLen(wChar a[MAXBUF])
{ int i = 0;
while ((a[i] != (wChar)NULL)&&(i < MAXBUF)) i++;
return i;
} /* wStrLen */
void printWchar(FILE *fo, wChar w)
{
if (((sj)&&(isSJkanji1((int)((0xFF00 & w) >> 8)))) ||
((!sj)&&(isJISkanji1((int)((0xFF00 & w) >> 8)))))
fprintf(fo,"%c%c",(0xFF00 & w) >> 8, (0x00FF & w));
else fprintf(fo,"%c",(0x00FF & w));
} /* printWchar */
int wPrintLine(FILE *fo, wChar s[MAXBUF])
{ int i, limit;
limit = wStrLen(s);
for (i=0;i < limit; i++)
printWchar(fo,s[i]);
fputs("",fo);
} /* wPrintLine */
int help_msg(void)
{
printf(" Usage : make_sgm {[-d] | [-s]} \n");
printf(" where -d : Trace the program as option\n");
printf(" -s : Source document file code is Shiftjis as option\n");
return 1;
} /* help_msg */
int insertSP(int wp, int *j, wChar t[MAXBUF]) /*tab code into spaces */
{
int i;
if (((wp % 8) == 1) && ((wp + 8) < MAXBUF))
for (i=1;i<=8;i++)
t[*++j] = 0x20;
else while (((wp % 8) != 1) && (*j < MAXBUF))
t[*++j] = 0x20;
return wp - 1;
} /*insertSP*/
int s2j(wChar s, wChar *j)/* convert shiftjis into JIS(G1) code */
{
wChar s1, s2;
s1 = s & 0xFF00; s2 = s & 0x00FF;
if (s1 <= 0x9F00)
*j = (s1 - 0x7000) + (s1 - 0x7000);
else *j = (s1 - 0xB000) + (s1 - 0xB000);
if (s2 < 0x009F){
if ((s2 >= 0x0040) && (s2 <= 0x007E))
*j += (s2 - 0x001F);
else if ((s2 >= 0x0080) && (s2 < 0x009F))
*j += (s2 - 0x0020);
*j -=0x0100;
}
else *j += (s2 - 0x007E);
*j |= 0x8080;
return 1;
} /* s2j */
int isRange(wChar x,wChar y,wChar z)
{
return ((x)>=(y) && (x)<=(z));
}
int getWstr(FILE *fi,wChar s[MAXBUF]) /* read a line into s */
{
int i=0, j=0, k, r=0;
wChar c1, c2, jis[2], tmp;
char b[MAXBUF];
if ((r = feof(fi)) == NULL){
fgets(b, MAXBUF,fi); k=strlen(b); s[0]=0; /* read a line */
while ((i < k) && (i < MAXBUF)){
c1 = b[i++] & 0x00FF; /* strip off */
if (c1==0x09) insertSP(i,&j,s); /* tab to spaces */
else if (isRange(c1,0x0000,0x001F) && /* control codes */
(c1!=0x0A))
i++;
else if (((sj)&&(isSJkanji1(c1)))|| /* first half of kanji code */
(!sj)&&(isJISkanji1(c1))){
tmp=c1<<8 & 0xFF00;
c2=b[i++] & 0x00FF;
tmp |= c2;
if ((i < k) && isKanjiSpace(tmp)){
s[j++]=0x20; s[j++]=0x20; /* 2 spaces */
}
else {
if (((sj)&&(!isSJkanji2(c2)))||
((!sj)&&(!isJISkanji2(c2))))
s[j++]=0xA2AE; /* alt character */
else {
if (sj)
s2j(tmp, &s[j++]); /* convert a code */
else
s[j++] = tmp;
}
}
}
else s[j++]=c1; /* store the code */
}
if (j >= MAXBUF) puts("FATAL, String overflow.");
else s[j]=NULL; /* put the length */
}
return r;
} /* getWstr */
int kanji2ASCII(wChar s[MAXBUF], wChar t[MAXBUF]) /*to 1 byte number*/
{ int i, limit;
wChar k;
limit = wStrLen(s);
for (i=0; i < limit;i++){
k=s[i] & 0x7F7F; /* strip 0x8080 off */
if (0x2330<=k && k<=0x2339) /* numbers */
t[i] = k & 0x00FF;
else if (0x2341 <=k && k <= 0x235A) /* capital alphabets */
t[i] = k & 0x00FF;
else if (0x2361 <= k && k <= 0x237A) /* small alphabets */
t[i] = k & 0x00FF;
else t[i] = s[i]; /* copy as is */
}
t[i] = NULL;
return 1;
} /* kanji2ASCII */
int kana2Kanji(wChar s[MAXBUF],wChar t[MAXBUF]) /* to 2 byte kana */
{ int i=0, j=0, limit;
wChar k, next;
limit = wStrLen(s);
while (i < limit) {
if ((s[i] <= 0xA0) || (0xE0 <= s[i])){/* copy a non kana code */
t[j] = s[i];
}
else { k = s[i];
if ((i <= limit-1) && (s[i + 1] <= 0x00FF))
next = s[i + 1];
else next = 0;
if (k==0xA1) t[j] = 0x2123; /* ku_ten */
else if (k==0xA2) t[j] = 0x2156; /* kagi_ue */
else if (k==0xA3) t[j] = 0x2157; /* kagi_shita*/
else if (k==0xA4) t[j] = 0x2122; /* tou_ten */
else if (k==0xA5) t[j] = 0x2126; /* naka_ten */
else if (k==0xA6) t[j] = 0x2572; /* wo */
else if (isRange(k,0xA7,0xAB)) /* small_a..o*/
t[j] = 0x2521 + 2 * (k-0xA7);
else if (isRange(k,0xAC,0xAE)) /* small ya..yo */
t[j] = 0x2563 + 2 * (k-0xAC);
else if (k==0xAF) t[j] = 0x2543; /* small_tsu */
else if (k==0xB0) t[j] = 0x213C; /* chouon */
else if (isRange(k,0xB1,0xB5)) /* a..o */
t[j] = 0x2522 + 2 * (k-0xB1);
else if (isRange(k,0xB6,0xC1)) { /* ka..chi */
t[j] = 0x252B + 2 * (k-0xB6);
if (next==0xDE){ /* ga..di */
t[j]++; i++;
}
}
else if (isRange(k,0xC2,0xC4)) { /* tsu..to */
t[j] = 0x2544 + 2 * (k-0xC2);
if (next==0xDE){ /* zu..do */
t[j]++; i++;
}
}
else if (isRange(k,0xC5,0xC9)) /* na..nu */
t[j] = 0x254A + k - 0xC5;
else if (isRange(k,0xCA,0xCE)){ /* ha..ho */
t[j] = 0x254F + 3 * (k - 0xCA);
if (next == 0xDE){ /* ba..bo */
t[j]++; i++;
}
else if (next == 0xDF){/* pa..po */
t[j]+=2; i++;
}
}
else if (isRange(k,0xCF,0xD3))
t[j]=0x255E + k-0xCF; /* ma..mo */
else if (isRange(k,0xD4,0xD6))
t[j]=0x2564 + 2 * (k-0xD4);/* ya..yo */
else if (isRange(k,0xD7,0xDB))
t[j]=0x2569 + k - 0xD7;/* ra..ro */
else if (k==0xDC) t[j] = 0x256F; /* wa */
else if (k==0xDD) t[j] = 0x2573; /* n' */
}
if (0x2121 <= t[j]) /* if kanji code then */
t[j] |= 0x8080; /* invoke the code into right */
i++; j++;
}
t[j] = NULL;
return 1;
} /* kana2Kanji */
int deleteNon(wChar s[MAXBUF],wChar t[MAXBUF])/*delete non standard*/
{
int i, limit, standard;
wChar k;
limit = wStrLen(s);
for (i=0; i < limit;i++){
standard = 1;
k=s[i] & 0x7F7F; /* strip 0x 8080 off */
if (s[i] <= 0x00FF) t[i]=s[i]; /* copy ASCII */
else {
switch (k & 0x7F00){ /* Ranges of non JIS kanji code */
case 0x2200 : if (isRange(k,0x222F,0x2239)||
isRange(k,0x2242,0x2249)||
isRange(k,0x2251,0x225B)||
isRange(k,0x226B,0x2271)||
isRange(k,0x227A,0x227D))
standard=0;
break;
case 0x2300 : if (isRange(k,0x2321,0x232F)||
isRange(k,0x233A,0x2340)||
isRange(k,0x235B,0x2360)||
isRange(k,0x237B,0x237E))
standard=0;
break;
case 0x2400 : if (isRange(k,0x2474,0x247E))
standard=0;
break;
case 0x2500 : if (isRange(k,0x2577,0x257E))
standard=0;
break;
case 0x2600 : if (isRange(k,0x2639,0x2640)||
isRange(k,0x2659,0x267E))
standard=0;
break;
case 0x2700 : if (isRange(k,0x2742,0x2750)||
isRange(k,0x2772,0x277E))
standard=0;
break;
case 0x2800 : if (isRange(k,0x2841,0x287E))
standard=0;
break;
case 0x2900 : case 0x2A00 : case 0x2B00 :
case 0x2C00 : case 0x2D00 : case 0x2E00 :
case 0x2F00 : standard=0;
break;
case 0x4F00 : if (isRange(k,0x4F54,0x4F7E))
standard=0;
break;
case 0x7400 : if (isRange(k,0x7427,0x747E))
standard=0;
break;
case 0x7500 : case 0x7600 : case 0x7700 :
case 0x7800 : case 0x7900 : case 0x7A00 :
case 0x7B00 : case 0x7C00 : case 0x7D00 :
case 0x7E00 : standard=0;
}
if (standard==1) t[i] = s[i]; /*if standard then copy char*/
else t[i] = (0x222E | 0x8080);/* else replace by alternative. */
}
}
t[i] = NULL;
return 1;
} /* deleteNon */
int convert(wChar s[MAXBUF], wChar t[MAXBUF]) /* convert s into t */
{
wChar u[MAXBUF], v[MAXBUF];
int i;
deleteNon(s, u); /* change non standard code into dummy character. */
kana2Kanji(u, v); /* 1 byte kana char to 2 bytes kana character. */
kanji2ASCII(v, t);/* 2 byte kanji to 1 byte ASCII. */
return 1;
} /* convert */
int get_profile(wChar *date,wChar *author,wChar *subject,
wChar *abstract,wChar *ending,wChar *attachment)
{ wChar s[MAXBUF];
printf("Enter the following reference data in the form specified.\n");
printf(" EOF[Return] for the end of a data.\n");
printf(" Date issued [in the form dd.mm.yyyy ] : \n");
getWstr(stdin,s); convert(s,date);
printf(" Author(s) [256 characters in Japanese (and English)] : \n");
getWstr(stdin,s); convert(s,author);
printf(" Subject [256 characters in Japanese (and English)] : \n");
getWstr(stdin,s); convert(s,subject);
printf(" Abstract [1024 characters] : \n");
getWstr(stdin,s); convert(s,abstract);
printf(" Ending if any [1024 characters] : \n");
getWstr(stdin,s); convert(s,ending);
printf(" Attachments if any [1024 characters] : \n");
getWstr(stdin,s); convert(s,attachment);
} /* get_profile */
int put_DTD(wChar *date,wChar *author,wChar *subject,
wChar *abstract,wChar *ending,wChar *attachment)
{ int i;
wChar s[MAXBUF], t[MAXBUF];
fputs("\n",fo);
fputs("\n",fo);
fputs("\n",fo);
fputs("]>\n",fo);
fputs("\n",fo);
fputs("\n",fo);
fputs("\n",fo);
fputs("\n",fo);
fputs("\n",fo);
wPrintLine(fo,subject);
fputs(" \n",fo);
fputs("\n",fo);
wPrintLine(fo,abstract);
fputs(" \n",fo);
fputs("\n",fo);
fputs("\n",fo);
wPrintLine(fo,author);
fputs(" \n",fo);
fputs("\n",fo);
wPrintLine(fo,date);
fputs(" \n",fo);
fputs("\n",fo);
while (getWstr(fi,s)==NULL) {
for (i=0;i\n",fo);
fputs("\n",fo);
wPrintLine(fo,ending);
fputs(" \n",fo);
fputs("\n",fo);
wPrintLine(fo,attachment);
fputs(" \n",fo);
fputs(" \n",fo);
} /* put_DTD */
void main(int argc, char *argv[])
{
wChar subject[MAXBUF], date[MAXBUF],author[MAXBUF],abstract[MAXBUF],
ending[MAXBUF],attachment[MAXBUF];
switch (argc) {
case 0 :
case 1 :
case 2 : help_msg(); exit();
case 3 : strcpy(bf,argv[1]); strcpy(bf1,argv[2]); break;
case 4 : if (strcmp(argv[1],"-d")==0){
debug = 1;
strcpy(bf,argv[2]); strcpy(bf1,argv[3]);
}
else if (strcmp(argv[1],"-s")==0){
sj = 1;
strcpy(bf,argv[2]); strcpy(bf1,argv[3]);
}
else help_msg(); break;
case 5 : if ((strcmp(argv[1],"-d")==0)||(strcmp(argv[2],"-d")==0))
debug = 1;
if ((strcmp(argv[1],"-s")==0)||(strcmp(argv[2],"-s")==0))
sj = 1;
strcpy(bf,argv[3]); strcpy(bf1,argv[4]);
}
fi = fopen( bf, "r" ); fo = fopen(bf1, "w");
if (!fi) {
printf( "Can't open input file %s!\n",bf);
exit();
}
else {
get_profile(subject,date,author,abstract,ending,attachment);
put_DTD(subject,date,author,abstract,ending,attachment);
}
fclose(fi); fclose(fo);
} /* make_sgm */