個人計算機(PC)の文書を計算機網(internet)形式の文書への変換道具
/* PC符号対広域通信符号変換具 */
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#define COPYRIGHT "(C)Rick Wakatori(若鳥陸夫),1988"
#define VERSION "[Version 4.82 (1995-01-09)]"
#define NUL 0x00
#define SOH 0x01
#define ETX 0x03
#define EOT 0x04
#define ENQ 0x05
#define ACK 0x06
#define BS 0x08
#define HT 0x09
#define LF 0x0A
#define CR 0x0D
#define SO 0x0E
#define SI 0x0F
#define DLE 0x10
#define DC1 0x11
#define DC3 0x13
#define SYN 0x16
#define SUB 0x1A
#define ESC 0x1B
#define FS 0x1C
#define SP 0x20
#define PLUS 0x2C
#define PERIOD 0x38
#define DEL 0x7F
#define EOL NUL
#define LINELENGTH 80
#define StrMax 4000
#define BUFSIZE 32000
#define LTAB 8
#define FALSE 0
#define TRUE 1
#define SENDC0 0
#define SENDG0 1
#define TESTKBD 1
#define SHIFTJIS 0
#define G1KANJI 1
#define ANNOUNCE_G0_G1 "\033\040\104"
#define ANNOUNCE_SS2 "\033\040\132"
#define ISO646_TO_C0 "\033\041\100"
#define OLDJISKANJI_TO_G0 "\033\044\100"
#define JISKANJI_TO_G0 "\033\044\102"
#define NJISKANJI_TO_G0 "\033\044\050\102"
#define JISKANJI_TO_G1 "\033\044\051\102"
#define JISKANA_TO_G2 "\033\044\052\111"
#define ASCII_TO_G0 "\033\050\100"
#define SWEDISHNAME_TO_G0 "\033\050\110"
#define JISROMAN_TO_G0 "\033\050\112"
#define JISKANA_TO_G1 "\033\051\111"
#define SS2 "\033\116"
#define ANNOUNCE 0x20
#define C0 0x21
#define G0 0x28
#define G1 0x29
#define G2 0x2A
#define TwoByteG0 0x24
#define OLDJISkanji 0x40
#define ASCII 0x42
#define JISkanji 0x42
#define SwedishName 0x48
#define JISkana 0x49
#define JISroman 0x4A
#define isKatakana(c) (isRange(c,0xA1,0xDF))
#define isG1kanjiByte1(c) (isRange(c,0xA1,0xF4))
#define isG1kanjiByte2(c) (isRange(c,0xA1,0xFF))
#define isSjkanjiSecondOrder(c) (isRange(c,0x4F,0xF4))
#define isG1kanjiSecondOrder(c) (isRange(c,0xCF,0xF4))
#define isSjByte1(c) ((isRange(c,0x0081,0x009F) ||\
isRange(c,0x00E0,0x00EA)) ? 1 : 0)
#define isSjByte2(c) (isRange(c,0x40,0xFE) ? 1 : 0)
#define isSjSpace(c) ((c==0x8140) ? 1 : 0)
#define isOdd(c) ((c) % 2 != 0)
struct status {
int soExist, kanji, k, jis1, jis2, sf1, sf2;
};
struct status sd;
char es4[6] = {0,0,0,0,0,0};
int announcess2 = FALSE;
int fileCode = SHIFTJIS;
int g1Kanji = FALSE;
int g2kana = FALSE;
int jisKanji = TRUE;
unsigned tmp; /* temporary storage */
FILE *fi;
int help()
{
printf(" SJtoJIS, Version 1.1, Copyright 1994 Rick Wakatori. \n");
printf(" Usage (指令文) : SJtoJIS sourceFile [ > objectFile ] \n");
printf(" Where the symbols are specified as follows(意味): \n");
printf(" SJtoJIS : The command header(指令語), \n");
printf(" sourceFile : Source file in Shiftjis code(原始ファイル),\n");
printf(" objectFile : Object file in JIS(G0) code(目的ファイル). \n");
printf(" Functions (機能) : 網間通信のためのファイル変換 \n");
printf(" (a)Shiftjis code --> JIS(G0)Kanji(符号変換), \n");
printf(" (b)Non-standard codes --> Dummy character(外字削除),\n");
printf(" (c)2 byte Alphbet/Numeric --> 1 byte ASCII, \n");
printf(" (d)1 byte Katakana --> 2 byte Katakana, \n");
printf(" (e)Tab code --> Maximum 8 space codes, \n");
printf(" (f)Kanji space --> 2 spaces in ASCII. \n");
printf(" ----------------------------------------------------------- \n");
return 1;
} /* help */
int isRange(unsigned x,unsigned y,unsigned z)
{
return ((x)>=(y) && (x)<=(z));
}
writeChar(int c)
{
if (c!=DEL) printf("%c",c);
}
sendAlphDesignator()
{
if (! g1Kanji){
writeChar(ESC); writeChar(G0); writeChar(ASCII);
}
else writeChar(SI);
sd.kanji = FALSE;
} /* sendAlphDesignator */
sendKanjiDesignator()
{
if (! g1Kanji){
writeChar(ESC); writeChar(TwoByteG0); writeChar(JISkanji);
}
else writeChar(SO);
sd.kanji = TRUE; sd.k = 1;
} /* sendKanjiDesignator */
sendC(int method,int c)
{
writeChar(c);
switch (method) {
case SENDC0 : sd.k = 0; break;
case SENDG0 : sd.sf1 = sd.sf2 = NUL; sd.k = 0; break;
} /* switch method*/
} /* sendC */
sendAnnounceG0()
{ sendC(SENDC0, SI);
sendStr(ANNOUNCE_G0_G1); /* ESC 2/0 4/4 */
sendStr(ISO646_TO_C0); /* ESC 2/1 4/0 */
sendStr(JISROMAN_TO_G0); /* ESC 2/8 4/10 */
sendStr(JISKANA_TO_G1); /* ESC 2/9 4/9 */
g1Kanji = FALSE;
} /* sendAnnounceG0 */
sendAnnounce_G1()
{ sendC(SENDC0, SI);
sendStr(ANNOUNCE_G0_G1); /* ESC 2/0 4/4 */
sendStr(ANNOUNCE_SS2); /* ESC 2/0 5/10 */
sendStr(ISO646_TO_C0); /* ESC 2/1 4/0 */
sendStr(JISROMAN_TO_G0); /* ESC 2/8 4/10 */
sendStr(JISKANJI_TO_G1); /* ESC 2/4 2/9 4/2 */
sendStr(JISKANA_TO_G2); /* ESC 2/4 2/10 4/9 */
} /* sendAnnounce_G1 */
sendKanji(int c)
{
writeChar((c & 0x7F00)>>8); writeChar(c & 0x007F);
c = NUL; sd.k = 1;
} /* sendKanji */
send(int c)
{
if (! jisKanji)
sendC(SENDG0, c);
else if (isG1kanjiByte1((c & 0xFF00)>>8)){
if (! sd.kanji) sendKanjiDesignator();
sendKanji(c);
}
else if (iscntrl(c) || isspace(c)){
if (sd.kanji) sendAlphDesignator(); /* for RFC 1468 only */
sendC(SENDC0, c);
}
else {
if (sd.kanji) sendAlphDesignator();
sendC(SENDG0, c);
}
} /* send */
sendStr(char *s)
{ char *i;
i = s;
while (*i!=NUL){
send(*i); i++;
}
} /* sendStr */
int error(int err, char *s) /* display error status */
{
int r;
r = 1;
switch(err){
case 0 : printf("[WARNING: %s,%d]\n",s); break;
case 1 : printf("[ERROR: %s]\n",s);
r =0;
break;
}
return r;
} /* error */
int s2j(unsigned s, unsigned *j)/* convert shiftjis into JIS(G1) code */
{
unsigned s1, s2;
s1 = s & 0xFF00; s2 = s & 0x00FF;
if (s1 <= 0x9F00)
*j = (s1 - 0x7000) + (s1 - 0x7000);
else *j = (s1 - 0xB000) + (s1 - 0xB000);
if (s2 < 0x009F){
if ((s2 >= 0x0040) && (s2 <= 0x007E))
*j += (s2 - 0x001F);
else if ((s2 >= 0x0080) && (s2 < 0x009F))
*j += (s2 - 0x0020);
*j -= 0x0100;
}
else *j += (s2 - 0x007E);
*j |= 0x8080;
return 1;
} /* s2j */
int insertSP(int i, int j, unsigned s[StrMax])/* tab code into spaces */
{
int k;
if (((j % LTAB) == 1) && ((j + LTAB) < StrMax)){
for (k=1;k<=LTAB;k++){
s[j++] = SP;
}
}
else {
while (((j % LTAB) != 1) && (j <= s[0] + LTAB)){
s[j++] = SP;
}
}
s[0] = j - 1;
return j;
} /*insertSP*/
int readLine(FILE *fi, unsigned s[StrMax]) /* read a line into s */
{
int i=0, IOresult, j=1, k, r;
unsigned c1, c2, c, jis[1], tmp;
char b[StrMax];
s[0] = 0;
if (!feof(fi)){
r=(char)fgets(b,StrMax,fi); k=strlen(b); s[0]=0;/* read a line */
while ((i= StrMax) r=error(1,"String overflow.");
else s[0]=j-1; /* put the length */
}
return r;
} /*readLine*/
int alph2ASCII(unsigned s[StrMax],unsigned t[StrMax]) /* to 2 byte AN */
{ int i=1, j=1;
unsigned k;
t[0] = 0;
while ( i <= s[0]) {
if (!isRange(s[i],0xA3B0,0xA3FA)){ /* copy a non AN */
t[j] = s[i];
}
else { k = s[i] & 0x00FF;
if (isRange(k,0xB0,0xB9)) /* 0..9 */
t[j] = k - 0x80;
else if (isRange(k,0xC1,0xDA)) /* Capital A..Z */
t[j] = k - 0x80;
else if (isRange(k,0xE1,0xFA)) /* Small a..z */
t[j] = k - 0x80;
}
i++; j++;
}
t[0] = j-1; /* put the length */
return 1;
} /* alph2ASCII */
int kana2Kanji(unsigned s[StrMax],unsigned t[StrMax]) /* to 2 byte kana*/
{ int i=1, j=1;
unsigned k, next;
t[0] = 0;
while ( i <= s[0]) {
if ((s[i] <= 0xA0) || (0xE0 <= s[i])){/* copy a non kana code */
t[j] = s[i];
}
else { k = s[i];
if ((i <= s[0]-1) && (s[i + 1] <= 0x00FF))
next = s[i + 1];
else next = NULL;
if (k==0xA1) t[j] = 0x2123; /* ku_ten */
else if (k==0xA2) t[j] = 0x2156; /* kagi_ue */
else if (k==0xA3) t[j] = 0x2157; /* kagi_shita*/
else if (k==0xA4) t[j] = 0x2122; /* tou_ten */
else if (k==0xA5) t[j] = 0x2126; /* naka_ten */
else if (k==0xA6) t[j] = 0x2572; /* wo */
else if (isRange(k,0xA7,0xAB)) /* small_a..o*/
t[j] = 0x2521 + 2 * (k - 0xA7);
else if (isRange(k,0xAC,0xAE)) /* small ya..yo */
t[j] = 0x2563 + 2 * (k - 0xAC);
else if (k==0xAF) t[j] = 0x2543; /* small_tsu */
else if (k==0xB0) t[j] = 0x213C; /* chouon */
else if (isRange(k,0xB1,0xB5)) /* a..o */
t[j] = 0x2522 + 2 * (k - 0xB1);
else if (isRange(k,0xB6,0xC1)) { /* ka..chi */
t[j] = 0x252B + 2 * (k - 0xB6);
if (next==0xDE){ /* ga..di */
t[j]++; i++;
}
}
else if (isRange(k,0xC2,0xC4)) { /* tsu..to */
t[j] = 0x2544 + 2 * (k - 0xC2);
if (next==0xDE){ /* zu..do */
t[j]++; i++;
}
}
else if (isRange(k,0xC5,0xC9)) /* na..nu */
t[j] = 0x254A + k - 0xC5;
else if (isRange(k,0xCA,0xCE)){ /* ha..ho */
t[j] = 0x254F + 3 * (k - 0xCA);
if (next == 0xDE){ /* ba..bo */
t[j]++; i++;
}
else if (next == 0xDF){/* pa..po */
t[j]+=2; i++;
}
}
else if (isRange(k,0xCF,0xD3))
t[j]=0x255E + k - 0xCF;/* ma..mo */
else if (isRange(k,0xD4,0xD6))
t[j]=0x2564 + 2*(k - 0xD4);/* ya..yo*/
else if (isRange(k,0xD7,0xDB))
t[j]=0x2569 + k - 0xD7;/* ra..ro */
else if (k==0xDC) t[j] = 0x256F; /* wa */
else if (k==0xDD) t[j] = 0x2573; /* n' */
}
if (0x2121 <= t[j]) /* if kanji code then */
t[j] |= 0x8080; /* invoke the code into right */
i++; j++;
}
t[0] = j-1; /* put the length */
return 1;
} /* kana2Kanji */
int deleteNon(unsigned s[StrMax], unsigned t[StrMax])/*delete non standard*/
{
int i, k, standard;
for (i=1; i<=s[0];i++){
standard = 1;
k=s[i] & 0x7F7F; /* strip 0x 8080 off */
if (s[i] <= 0x00FF) t[i]=s[i]; /* copy ASCII */
else {
switch (k & 0x7F00){ /* Ranges of non JIS kanji code */
case 0x2200 : if (isRange(k,0x222F,0x2239)||
isRange(k,0x2242,0x2249)||
isRange(k,0x2251,0x225B)||
isRange(k,0x226B,0x2271)||
isRange(k,0x227A,0x227D))
standard=0;
break;
case 0x2300 : if (isRange(k,0x2321,0x232F)||
isRange(k,0x233A,0x2340)||
isRange(k,0x235B,0x2360)||
isRange(k,0x237B,0x237E))
standard=0;
break;
case 0x2400 : if (isRange(k,0x2474,0x247E))
standard=0;
break;
case 0x2500 : if (isRange(k,0x2577,0x257E))
standard=0;
break;
case 0x2600 : if (isRange(k,0x2639,0x2640)||
isRange(k,0x2659,0x267E))
standard=0;
break;
case 0x2700 : if (isRange(k,0x2742,0x2750)||
isRange(k,0x2772,0x277E))
standard=0;
break;
case 0x2800 : if (isRange(k,0x2841,0x287E))
standard=0;
break;
case 0x2900 : case 0x2A00 : case 0x2B00 :
case 0x2C00 : case 0x2D00 : case 0x2E00 :
case 0x2F00 : standard=0;
break;
case 0x4F00 : if (isRange(k,0x4F54,0x4F7E))
standard=0;
break;
case 0x7400 : if (isRange(k,0x7427,0x747E))
standard=0;
break;
case 0x7500 : case 0x7600 : case 0x7700 :
case 0x7800 : case 0x7900 : case 0x7A00 :
case 0x7B00 : case 0x7C00 : case 0x7D00 :
case 0x7E00 : standard=0;
}
if (standard==1) t[i] = s[i];/* if standard then copy char */
else t[i] = (0x222E | 0x8080);/* else replace by alternative.*/
}
}
t[0] = i-1; /* put the length */
return 1;
} /* deleteNon */
int convert(unsigned s[StrMax], unsigned t[StrMax]) /* converts into t */
{
unsigned u[4000], v[StrMax];
deleteNon(s, u); /* change non standard code into dummy character. */
alph2ASCII(u, v); /* 2 byte alphbet and numeric to ASCII */
kana2Kanji(v, t); /* 1 byte kana char to 2 bytes kana character. */
return 1;
} /* convert */
int writeResult(unsigned t[StrMax])
{ int i;
for (i=1;i<=t[0];i++)
if (t[i]!=SUB) send(t[i]);
return 1;
} /* writeResult */
int main(int argc,char *argv[]) /* usage : sj2jis source object */
{
unsigned s[StrMax]; /* s[0] : number of source elements */
unsigned t[StrMax]; /* t[0] : number of object elements */
switch (argc) {
case 0 : case 1 : help(); break; /* call help message */
case 2 : break; /* success testing */
default : error(0,"A souce file shall be specified.");
}
if (argc >1) {
if ((fi = fopen(argv[1],"r"))==NULL) /* open source file */
error(1,strcat(strcat("Source ", argv[1])," could not open."));
else {
while (!feof(fi)){
if (readLine(fi,s)!=0){ /* read a line into euc */
convert(s,t); /* adjust interchange code */
writeResult(t); /* write the result */
}
}
fclose(fi); /* close source file */
}
}
return 0;
} /* main */