/* * * * unistd, Verzija 3.1 (c) 1996-2000 Vlado Ke"selj * * Programski jezik: C * Datum: 22. juni 2000. * * */ char *unistdVerzija = "unistd, Verzija 3.1 (c) 1996-2000 Vlado Ke\"selj"; #include #include #include #include #include #define MAX_RED 10000 typedef unsigned char znak_t; typedef znak_t prihvatnik_t[MAX_RED+2]; typedef znak_t *prihvatnik_rt; /* tip za uop"stenu nisku, p pokazuje na po"cetak, k pokazuje na zadnji znak u niski, f na ,,fizi"cki'' kraj (toliko je alocirano) */ typedef struct gen_niska_t { znak_t *p, *k, *f; } *gen_niska_tp; gen_niska_tp GenNiska; prihvatnik_t prihvatnik; typedef enum { STD_IPP, STD_V, STD_ASCII, STD_ASKETOV, STD_DUAL, STD_CP1250, STD_CP1250P, STD_CP852, STD_CP852P, STD_LATIN2, STD_LATIN2P, STD_POSTV, STD_SLIKOVNI, STD_SRPSCII, STD_TANJUG, STD_YUSCII, STD_TEXL, STD_KRASCII, STD_DUAL1, STD_BQ, STD_XQ, STD_QQ, STD_DYQ, STD_QDL, STD_QWYX, STD_TEXC, STD_SORTC, STD_SORTC2, STD_SORTL, STD_SORTL2, STD_ISO, STD_KOI, STD_IBM866, STD_WIN, STD_UTF8, STD_UNICODEI, STD_GRANICNIK, STD_Q } std_t; std_t ulazniStd, izlazniStd; char *NazivIzlaznogStd=NULL; #define SLOVO_CX 0 #define SLOVO_CY 1 #define SLOVO_Dx 2 #define SLOVO_DX 3 #define SLOVO_Dy 4 #define SLOVO_DY 5 #define SLOVO_Ly 6 #define SLOVO_LY 7 #define SLOVO_Ny 8 #define SLOVO_NY 9 #define SLOVO_SX 10 #define SLOVO_ZX 11 #define SLOVO_cx 12 #define SLOVO_cy 13 #define SLOVO_dx 14 #define SLOVO_dy 15 #define SLOVO_ly 16 #define SLOVO_ny 17 #define SLOVO_sx 18 #define SLOVO_zx 19 #define BR_SLOVA 20 struct stdTabela { std_t id; znak_t *slovo[BR_SLOVA]; znak_t *cSlovo; } stdT[] = { {STD_V, {"\"C", "\'C", "D\"z", "D\"Z", "Dj", "DJ", "Lj", "LJ", "Nj", "NJ", "\"S", "\"Z", "\"c", "\'c", "d\"z", "dj", "lj", "nj", "\"s", "\"z"}, NULL}, {STD_SLIKOVNI, {"C^", "C~", "Dz^", "DZ^", "D~", "D~", "Lj", "LJ", "Nj", "NJ", "S^", "Z^", "c^", "c~", "dz^", "d~", "lj", "nj", "s^", "z^"},NULL}, {STD_ASKETOV, {"Ch", "Cz", "Dzh", "DZh", "Dj", "DJ", "Lj", "LJ", "Nj", "NJ", "Sh", "Zh", "ch", "cz", "dzh", "dj", "lj", "nj", "sh", "zh"},NULL}, {STD_KRASCII, {"C", "C", "Dz", "DZ", "Dj", "DJ", "Lj", "LJ", "Nj", "NJ", "S", "Z", "c", "c", "dz", "dj", "lj", "nj", "s", "z"}, NULL}, {STD_YUSCII, {"^", "]", "D`", "D@", "\\", "\\", "Lj", "LJ", "Nj", "NJ", "[", "@", "~", "}", "d`", "|", "lj", "nj", "{", "`"},NULL}, {STD_TANJUG, {"CC", "CH", "Dzz", "DZZ", "Dd", "DD", "Lj", "LJ", "Nj", "NJ", "SS", "ZZ", "cc", "ch", "dzz", "dd", "lj", "nj", "ss", "zz"},NULL}, {STD_SRPSCII, {"^", "]", "X", "X", "\\", "\\", "Q", "Q", "W", "W", "[", "@", "~", "}", "x", "|", "q", "w", "{", "`"},NULL}, {STD_DUAL, {"Cx", "Cy", "Dx", "DX", "Dy", "DY", "Ly", "LY", "Ny", "NY", "Sx", "Zx", "cx", "cy", "dx", "dy", "ly", "ny", "sx", "zx"},NULL}, {STD_POSTV, {"C\"", "C\'", "Dz\"", "DZ\"", "Dj", "DJ", "Lj", "LJ", "Nj", "NJ", "S\"", "Z\"", "c\"", "c\'", "dz\"", "dj", "lj", "nj", "s\"", "z\""},NULL}, {STD_LATIN2, {"\xC8", "\xC6", "D\xBE", "D\xAE", "\xD0", "\xD0", "Lj", "LJ", "Nj", "NJ", "\xA9", "\xAE", "\xE8", "\xE6", "d\xBE", "\xF0", "lj", "nj", "\xB9", "\xBE"},NULL}, {STD_CP852, {"\xAC", "\x8F", "D\xA7", "D\xA6", "\xD1", "\xD1", "Lj", "LJ", "Nj", "NJ", "\xE6", "\xA6", "\x9F", "\x86", "d\xA7", "\xD0", "lj", "nj", "\xE7", "\xA7"},NULL}, {STD_CP1250, {"\xC8", "\xC6", "D\x9E", "D\x8E", "\xD0", "\xD0", "Lj", "LJ", "Nj", "NJ", "\x8A", "\x8E", "\xE8", "\xE6", "d\x9E", "\xF0", "lj", "nj", "\x9A", "\x9E"},NULL}, {STD_TEXL, {"\\v{C}", "\\\'{C}", "D\\v{z}", "D\\{v}Z", "{\\leavevmode\\setbox0=\\hbox{D}\\kern0pt" "\\rlap{\\kern.04em\\raise.188\\ht0\\hbox{-}}D}", "{\\leavevmode\\setbox0=\\hbox{D}\\kern0pt" "\\rlap{\\kern.04em\\raise.188\\ht0\\hbox{-}}D}", "Lj", "LJ", "Nj", "NJ", "\\v{S}", "\\v{Z}", "\\v{c}", "\\\'{c}", "d\\v{z}", "{\\leavevmode\\setbox0=\\hbox{d}\\kern0pt" "\\rlap{\\kern.215em\\raise.46\\ht0\\hbox{-}}d}", "lj", "nj", "\\v{s}", "\\v{z}"},NULL}, {STD_BQ, {"`C", "`K", "`D", "`D`J", "`G", "`G", "`L", "`L`J", "`N", "`N`J", "`S", "`Z", "`c", "`k", "`d", "`g", "`l", "`n", "`s", "`z"},NULL}, {STD_Q, {"C", "K", "D", "D", "G", "G", "L", "L", "N", "N", "S", "Z", "c", "k", "d", "g", "l", "n", "s", "z"},NULL}, {STD_DYQ, {"Y", "Q", "Dx", "DX", "Dq", "DQ", "Lj", "LJ", "Nj", "NJ", "W", "X", "y", "q", "dx", "dq", "lj", "nj", "w", "x"}, NULL}, {STD_TEXC, {"CH", "C1", "D2", "D2", "Dj", "DJ", "Lj", "LJ", "Nj", "NJ", "SH", "ZH", "ch", "c1", "d2", "dj", "lj", "nj", "sh", "zh"}, NULL}, {STD_SORTL2, {"CX", "CY", "Dx", "DX", "Dy", "DY", "Ly", "LY", "Ny", "NY", "SX", "ZX", "cx", "cy", "dx", "dy", "ly", "ny", "sx", "zx"}, NULL}, {STD_ISO, {"\xC7", "\xAB", "\xAF", "\xAF", "\xA2", "\xA2", "\xA9", "\xA9", "\xAA", "\xAA", "\xC8", "\xB6", "\xE7", "\xFB", "\xFF", "\xF2", "\xF9", "\xFA", "\xE8", "\xD6"}, /* A B C D E F G H I J K L M N O P Q */ "\xB0\xB1\xC6\xB4\xB5\xC4\xB3\xC5\xB8\xA8\xBA\xBB\xBC\xBD\xBE\xBFQ" /* R S T U V WXY Z a b c d e f g h i j*/ "\xC0\xC1\xC2\xC3\xB2WXY\xB7\xD0\xD1\xE6\xD4\xD5\xE4\xD3\xE5\xD8\xF8" /* k l m n o p q r s t u vwxy z */ "\xDA\xDB\xDC\xDD\xDE\xDFq\xE0\xE1\xE2\xE3\xD2wxy\xD7"}, {STD_KOI, {"\xFE", "\xFE\xF8", "\xE4\xD6", "\xE4\xF6", "\xE4j", "\xE4J", "\xEC\xF8", "\xEC\xF8","\xEE\xF8", "\xEE\xF8", "\xFB", "\xF6", "\xDE", "\xDE\xD8", "\xC4\xD6", "\xC4j", "\xCC\xD8", "\xCE\xD8", "\xDB", "\xD6"}, "\xE1\xE2\xE3\xE4\xE5\xE6\xE7\xE8\xE9J\xEB\xEC\xED\xEE\xEF\xF0Q" "\xF2\xF3\xF4\xF5\xF7WXY\xFA\xC1\xC2\xC3\xC4\xC5\xC6\xC7\xC8\xC9j" "\xCB\xCC\xCD\xCE\xCF\xD0q\xD2\xD3\xD4\xD5\xD7wxy\xDA"}, {STD_IBM866, {"\x97", "\x97\x9C", "\x84\xA6", "\x84\x86", "\x84j", "\x84J", "\x8B\xEC", "\x8B\x9C","\x8D\xEC", "\x8D\x9C", "\x98", "\x86", "\xE7", "\xE7\xEC", "\xA4\xA6", "\xA4j", "\xAB\xEC", "\xAD\xEC", "\xE8", "\xA6"}, "\x80\x81\x96\x84\x85\x94\x83\x95\x88J\x8A\x8B\x8C\x8D\x8E\x8FQ" "\x90\x91\x92\x93\x82WXY\x87\xA0\xA1\xE6\xA4\xA5\xE4\xA3\xE5\xA8j" "\xAA\xAB\xAC\xAD\xAE\xAFq\xE0\xE1\xE2\xE3\xA2wxy\xA7"}, {STD_WIN, {"\xD7", "\x8E", "\x8F", "\x8F", "\x80", "\x80", "\x8A", "\x8A", "\x8C", "\x8C", "\xD8", "\xC6", "\xF7", "\x9E", "\x9F", "\x90", "\x9A", "\x9C", "\xF8", "\xE6"}, "\xC0\xC1\xD6\xC4\xC5\xD4\xC3\xD5\xC8\xA3\xCA\xCB\xCC\xCD\xCE\xCFQ" "\xD0\xD1\xD2\xD3\xC2WXY\xC7\xE0\xE1\xF6\xE4\xE5\xF4\xE3\xF5\xE8" "\xBC\xEA\xEB\xEC\xED\xEE\xEFq\xF0\xF1\xF2\xF3\xE2wxy\xE7"}, {STD_GRANICNIK}}; struct stdTabela *pstdTu, *pstdTi, *pstdTsortl2, *pstdTiso, *pstdTv; struct stdEtiketa { std_t id; znak_t *etiketa; } stdE[] = { {STD_V, "v"}, {STD_V, "vizuelni"}, {STD_IPP, "ipp"}, {STD_IPP, "internetica++"}, {STD_IPP, "i++"}, {STD_SLIKOVNI, "slikovni"}, {STD_ASKETOV, "asketov"}, {STD_KRASCII, "internetica"}, {STD_KRASCII, "krascii"}, {STD_KRASCII, "sascii"}, {STD_KRASCII, "kraski"}, {STD_KRASCII, "saski"}, {STD_ASCII, "ascii"}, {STD_CP1250, "cp1250"}, {STD_CP1250P, "cp1250p"}, {STD_CP852, "cp852"}, {STD_CP852, "latinii"}, {STD_CP852P, "cp852p"}, {STD_CP852P, "latiniip"}, {STD_YUSCII, "yuscii"}, {STD_TANJUG, "tanjug"}, {STD_SRPSCII, "srpscii"}, {STD_DUAL, "dual"}, {STD_DUAL, "borin"}, {STD_POSTV, "postv"}, {STD_LATIN2, "latin2"}, {STD_LATIN2, "iso8859-2"}, {STD_LATIN2, "iso-8859-2"}, {STD_LATIN2P, "latin2p"}, {STD_TEXL, "texl"}, {STD_DUAL1, "dual1"}, {STD_DUAL1, "borin11"}, {STD_BQ, "bq"}, {STD_BQ, "bqs"}, {STD_XQ, "xq"}, {STD_XQ, "xqs"}, {STD_QQ, "qq"}, {STD_QQ, "qqs"}, {STD_DYQ, "dyq"}, {STD_QWYX, "qwyx"}, {STD_QDL, "qwyx-de-luxe"}, {STD_QDL, "qwyx-dl"}, {STD_QDL, "qdl"}, {STD_TEXC, "texc"}, {STD_SORTL, "sortl"}, {STD_SORTL2, "sortl2"}, {STD_SORTC, "sortc"}, {STD_SORTC2, "sortc2"}, {STD_ISO, "iso"}, {STD_ISO, "iso-8859-5"}, {STD_KOI, "koi"}, {STD_KOI, "koi8-r"}, {STD_IBM866, "ibm866"}, {STD_IBM866, "cp866"}, {STD_IBM866, "alternativny"}, {STD_WIN, "win"}, {STD_WIN, "Windows-1251"}, {STD_UTF8, "utf8"}, {STD_UTF8, "utf-8"}, {STD_UNICODEI, "interniUnicode"}, {STD_GRANICNIK, NULL}}; /**** Opcije - tipovi, podaci ****/ struct opcija { znak_t znak; znak_t *opis; int argument; } tabelaOpcija[] = { {'h',"HTML dokument", 0}, {'e',"e-mail poruka", 0}, {'p',"od\"stampaj detaljno uputstvo", 0}, {'P',"od\"stampaj detaljno uputstvo bez prekida nakon svake strane", 0}, {'i',"ignori\"si gre\"ske koja se mogu prevazi\'ci", 0}, {'s',"uklju\"ci startnu etiketu", 0}, {'t',"prevedi \"cak i kada su isti ulazni i izlazni standardi", 0}, {'u',"zadaje se inicijalni ulazni standard ", 1}, {'\0',"",-1}}; struct { unsigned Uputstva : 2; unsigned PrijaveGreske : 1; unsigned StartneEtikete : 1; unsigned Temeljno : 1; unsigned HTML : 1; unsigned email : 1; } ind = {0,1,0,0,0,0}; /**** Opcije - prototipovi funkcija ****/ void obradiOpciju(znak_t *op); /**** Makroi ****/ #define int_izmedju(i,d,g) ((i)>=(unsigned int)(d)&&(i)<=(unsigned int)(g)) #define greska(g) (!(ind.PrijaveGreske) ||\ fprintf(stderr,"Gre\"ska:%s\a\n",(g))) #define staviZnak(p,i,c)\ (((i)<1 ? (fprintf(stderr,"Predug red!\a\n"),exit(1)) :\ (*((p)++) = (c), --(i))),1) /* Definisanje up, upStatic i ipLen */ #define init_up_ip \ prihvatnik_t upStatic;\ prihvatnik_rt up;\ int ipLen = MAX_RED+2, slovo;\ up = upStatic;\ strcpy(up,ip); #define do_kraja_niske while (*up != '\0') #define vidisIspred(n,a) (up[n] == (a)) #define vidis(a) vidisIspred(0,(a)) #define prepisi_znak staviZnak(ip,ipLen,*(up++)) #define pisiZnak(a) staviZnak(ip,ipLen,(a)) #define pisi2znaka(a) (pisiZnak(a), pisiZnak(a)) #define kopirajZnak staviZnak(ip,ipLen,*up) #define pisi0 pisiZnak('\0') #define nase_slovo(a) ((slovo=jelNase(&up,pstdTu->slovo,(a)))>-1) #define pisiNaseInterstd (staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo])) #define pisiCZnak(a) ((a) < 'A' ? pisiZnak(a) :\ ((a) <= 'Z' ? pisiZnak(*((pstdTi->cSlovo)+(a)-'A')) :\ ((a) < 'a' ? pisiZnak(a) :\ ((a) <= 'z' ? pisiZnak(*((pstdTi->cSlovo)+(a)-'A'-'a'+'Z'+1)) :\ pisiZnak(a) )))) #define interstd_pisi (pisiZnak(*up),\ ((*up=='Z') ? pisiZnak('W') :\ ((*up=='z') ? pisiZnak('w') :\ ((*up!='\n') && pisiZnak(*up)))),++up) /**** Prototipovi funkcija ****/ int strPodudara(znak_t *s1, znak_t *s2); int strPodudarA(znak_t *s1, znak_t *s2); int strPrazno(znak_t *s); int strCmp(znak_t *s1, znak_t *s2); int dalijeEtiketa(prihvatnik_rt prih); char *standardnaEtiketa(std_t s); void stampajUputstvo(); void prevedi(std_t uStd, std_t iStd); void prevediT(); int prevedi_HTML(); int prevedi_email(); void staviNisku(znak_t**, int*, znak_t*); struct stdTabela *nadjiTabelu(std_t); int jelNase(znak_t**, znak_t *[], std_t); void staviTab16(prihvatnik_rt); void skiniTab16(prihvatnik_rt); void staviHeks(prihvatnik_rt); void skiniHeks(prihvatnik_rt); void u_utf8(znak_t* s, znak_t* d); void iso_u_unicode(znak_t* s, znak_t* d); int unistd_main(int argc, char *argv[]); /**** Prototipovi funkcija ****/ int strPodudara(znak_t *s1, znak_t *s2); int strPodudarA(znak_t *s1, znak_t *s2); int strPrazno(znak_t *s); int strCmp(znak_t *s1, znak_t *s2); int dalijeEtiketa(prihvatnik_rt prih); char *standardnaEtiketa(std_t s); void stampajUputstvo(); void prevedi(std_t uStd, std_t iStd); void prevediT(); int prevedi_HTML(); int prevedi_email(); void staviNisku(znak_t**, int*, znak_t*); struct stdTabela *nadjiTabelu(std_t); int jelNase(znak_t**, znak_t *[], std_t); void staviTab16(prihvatnik_rt); void skiniTab16(prihvatnik_rt); void staviHeks(prihvatnik_rt); void skiniHeks(prihvatnik_rt); void u_utf8(znak_t* s, znak_t* d); void iso_u_unicode(znak_t* s, znak_t* d); int unistd_main(int argc, char *argv[]); void ipp_u_krascii(prihvatnik_rt ip); void unicodeI_u_v(gen_niska_tp gn, prihvatnik_rt ip); void v_u_sortl2(prihvatnik_rt ip); void sortl2_u_v(prihvatnik_rt ip); void ipp_u_v(prihvatnik_rt ip); void ipp_u_iso(prihvatnik_rt ip); void ipp_u_utf8(prihvatnik_rt ip); void ipp_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn); void v_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn); void unicodeI_u_ipp(gen_niska_tp gn, prihvatnik_rt ip1); void prostiL_u_unicodeI(prihvatnik_rt p, gen_niska_tp gn, std_t s); void unicodeI_u_prostiL(gen_niska_tp gn, prihvatnik_rt p); void prostiL_u_sortl2(prihvatnik_rt ip, std_t s); void sortl2_u_prostiL(prihvatnik_rt ip); void dual1_u_sortl2(prihvatnik_rt ip); void sortl2_u_dual1(prihvatnik_rt ip); void dual1_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_dual1(gen_niska_tp gn, prihvatnik_rt ip); void bq_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska); void unicodeI_u_bq(gen_niska_tp gn, prihvatnik_rt ip); void q_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska, znak_t c); void unicodeI_u_q(gen_niska_tp gn, prihvatnik_rt ip, znak_t c); void bq_u_sortl2(prihvatnik_rt ip); void q_u_sortl2(prihvatnik_rt ip, znak_t c); void sortl2_u_bq(prihvatnik_rt ip); void sortl2_u_q(prihvatnik_rt ip, znak_t c); void dyq_u_sortl2(prihvatnik_rt ip); void sortl2_u_dyq(prihvatnik_rt ip); void qdl_u_sortl2(prihvatnik_rt ip); void sortl2_u_qdl(prihvatnik_rt ip); void dyq_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_dyq(gen_niska_tp gn, prihvatnik_rt ip); void qdl_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_qdl(gen_niska_tp gn, prihvatnik_rt ip); void texc_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_texc(gen_niska_tp gn, prihvatnik_rt ip); void texc_u_sortl2(prihvatnik_rt ip); void sortl2_u_texc(prihvatnik_rt ip); void sortl_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void sortc_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void sortc2_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_sortl(gen_niska_tp gn, prihvatnik_rt ip); void unicodeI_u_sortc2(gen_niska_tp gn, prihvatnik_rt ip); void unicodeI_u_sortc(gen_niska_tp gn, prihvatnik_rt ip); void sortc2_u_sortl2(prihvatnik_rt ip); void sortl2_u_sortl2(prihvatnik_rt ip); void sortl2_u_sortc2(prihvatnik_rt ip); void sortl2_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska); void unicodeI_u_sortl2(gen_niska_tp gn, prihvatnik_rt ip); void prostiC_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_prostiC(gen_niska_tp gn, prihvatnik_rt ip); void ipp_u_prostiC(prihvatnik_rt ip); void utf8_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn); void unicodeI_u_utf8(gen_niska_tp gn, prihvatnik_rt ip); void u_utf8(znak_t* s, znak_t* d); void iso_u_unicode(znak_t* s, znak_t* d); /*********************************************************************** * Regularni izrazi, zaglavlje * Preuzeto iz koda Henry Spencer-a, modifikovano. * Modifed code by Henry Spencer. * * Definitions etc. for regexp(3) routines. * * Caveat: this is V8 regexp(3) [actually, a reimplementation thereof], * not the System V one. */ #define NSUBEXP 10 typedef struct regexp { char *startp[NSUBEXP]; char *endp[NSUBEXP]; char regstart; /* Internal use only. */ char reganch; /* Internal use only. */ char *regmust; /* Internal use only. */ int regmlen; /* Internal use only. */ char program[1]; /* Unwarranted chumminess with compiler. */ } regexp; regexp *regcomp(); int regexec(); void regsub(); void regerror(); /* * The first byte of the regexp internal "program" is actually this magic * number; the start node begins in the second byte. */ #define MAGIC 0234 /* kraj zaglavlja regularnih izraza ***********************************************************************/ /* Glavna funkcija */ int main(int argc, char *argv[]) { return unistd_main(argc, argv); } int unistd_main(int argc, char *argv[]) { int argStd=0, i, j; ulazniStd = STD_ASCII; izlazniStd = STD_V; /* rezervi"si prostor za radne promjenljive */ GenNiska = (gen_niska_tp) malloc(sizeof(struct gen_niska_t)); GenNiska->p = GenNiska->k = (znak_t*) malloc((sizeof(znak_t))*(MAX_RED+2)); if (GenNiska->p == NULL) { fprintf(stderr,"Nema dovoljno memorije!\n"); exit(1); } GenNiska->f = GenNiska->p + MAX_RED+1; for (i = ind.Uputstva = 1; i\n", NazivIzlaznogStd); stampajUputstvo(); if (ind.Uputstva==1) return(1); return(0); } if (ind.HTML) { return prevedi_HTML(); } else if (ind.email) { return prevedi_email(); } else { if (ind.StartneEtikete) printf("\n", NazivIzlaznogStd); while (1) { znak_t *p; int indEtikete; if (fgets(prihvatnik,MAX_RED+2,stdin)==NULL) break; if (strlen(prihvatnik) > MAX_RED) { znak_t b[80]; sprintf(b, "Ulazna datoteka sadr\"zi red du\"zi od " "%u znakova.", MAX_RED); greska(b); greska("Treba pove\'cati veli\"cinu prihvatnika.\a"); if (ind.PrijaveGreske) return(1); } indEtikete = dalijeEtiketa(prihvatnik); if (indEtikete == 2) { znak_t *p=prihvatnik; do {p[0] = p[1]; ++p;} while(*p != '\0'); indEtikete = 0; } if (indEtikete == 0) { if (ind.Temeljno || ulazniStd!=izlazniStd) { prevediT(); } printf("%s",prihvatnik); } } /* kraj od while (1) */ } /* kraj od else (if ind.HTML) */ return 0; } /* kraj funkcije unistd_main */ /* Indirektno prevodjenje */ void prevediT() { prevedi(ulazniStd, STD_UNICODEI); if (izlazniStd != STD_UNICODEI) prevedi(STD_UNICODEI, izlazniStd); else { fprintf(stderr, "Interni Unicode nije podr\"zan kao " "izlazni standard, za sada...\n"); exit(1); } } /* Direktno prevodjenje iz standarda u standard */ void prevedi(std_t uStd, std_t iStd) { if (iStd == STD_UNICODEI) { switch (uStd) { case STD_V: v_u_unicodeI(prihvatnik, GenNiska); break; case STD_IPP: ipp_u_unicodeI(prihvatnik, GenNiska); break; case STD_QWYX: case STD_SLIKOVNI: case STD_ASKETOV: case STD_YUSCII: case STD_TANJUG: case STD_SRPSCII: case STD_DUAL: case STD_POSTV: case STD_TEXL: case STD_KRASCII: case STD_ASCII: case STD_LATIN2: case STD_CP852: case STD_CP1250: prostiL_u_unicodeI(prihvatnik, GenNiska, uStd); break; case STD_ISO: case STD_KOI: case STD_IBM866: case STD_WIN: prostiC_u_unicodeI(prihvatnik, GenNiska); break; case STD_LATIN2P: case STD_CP852P: case STD_CP1250P: skiniHeks(prihvatnik); prostiL_u_unicodeI(prihvatnik, GenNiska, uStd); break; case STD_BQ: bq_u_unicodeI(prihvatnik, GenNiska); break; case STD_XQ: q_u_unicodeI (prihvatnik, GenNiska, 'x'); break; case STD_QQ: q_u_unicodeI (prihvatnik, GenNiska, 'q'); break; case STD_DUAL1: dual1_u_unicodeI(prihvatnik, GenNiska); break; case STD_DYQ: dyq_u_unicodeI(prihvatnik, GenNiska); break; case STD_QDL: qdl_u_unicodeI(prihvatnik, GenNiska); break; case STD_SORTL: sortl_u_unicodeI(prihvatnik, GenNiska); break; case STD_SORTL2: sortl2_u_sortl2(prihvatnik); sortl2_u_unicodeI(prihvatnik, GenNiska); break; case STD_SORTC: sortc_u_unicodeI(prihvatnik, GenNiska); break; case STD_SORTC2:sortc2_u_unicodeI(prihvatnik, GenNiska); break; case STD_TEXC: texc_u_unicodeI(prihvatnik, GenNiska); break; case STD_UTF8: utf8_u_unicodeI(prihvatnik, GenNiska); break; default: greska("Nepodr\"zana konverzija (2)"); exit(2); } } else if (uStd == STD_UNICODEI) { switch (iStd) { case STD_IPP: unicodeI_u_ipp(GenNiska, prihvatnik); break; case STD_V: unicodeI_u_v (GenNiska, prihvatnik); break; case STD_QWYX: case STD_SLIKOVNI: case STD_ASKETOV: case STD_YUSCII: case STD_TANJUG: case STD_SRPSCII: case STD_DUAL: case STD_POSTV: case STD_TEXL: case STD_KRASCII: case STD_ASCII: case STD_LATIN2: case STD_CP852: case STD_CP1250: unicodeI_u_prostiL(GenNiska, prihvatnik); break; case STD_ISO: case STD_KOI: case STD_IBM866: case STD_WIN: unicodeI_u_prostiC(GenNiska, prihvatnik); break; case STD_LATIN2P: case STD_CP852P: case STD_CP1250P: unicodeI_u_prostiL(GenNiska, prihvatnik); staviHeks(prihvatnik); break; case STD_BQ: unicodeI_u_bq(GenNiska, prihvatnik); break; case STD_XQ: unicodeI_u_q (GenNiska, prihvatnik, 'x'); break; case STD_QQ: unicodeI_u_q (GenNiska, prihvatnik, 'q'); break; case STD_DUAL1: unicodeI_u_dual1(GenNiska, prihvatnik); break; case STD_DYQ: unicodeI_u_dyq (GenNiska, prihvatnik); break; case STD_QDL: unicodeI_u_qdl (GenNiska, prihvatnik); break; case STD_SORTL2: unicodeI_u_sortl2(GenNiska, prihvatnik); break; case STD_SORTL: unicodeI_u_sortl (GenNiska, prihvatnik); break; case STD_SORTC2: unicodeI_u_sortc2(GenNiska, prihvatnik); break; case STD_SORTC: unicodeI_u_sortc (GenNiska, prihvatnik); break; case STD_TEXC: unicodeI_u_texc (GenNiska, prihvatnik); break; case STD_UTF8: unicodeI_u_utf8 (GenNiska, prihvatnik); break; default: greska("Nepodr\"zana konverzija (4)"); exit(1); } } else if (uStd==STD_IPP && iStd==STD_KRASCII) { ipp_u_krascii(prihvatnik); } else if (uStd==STD_IPP && iStd==STD_UTF8) { ipp_u_utf8(prihvatnik); } else if (uStd==STD_UNICODEI && iStd==STD_IPP) { unicodeI_u_ipp(GenNiska, prihvatnik); } else { greska("Nepodr\"zana konverzija (5)"); exit(1); } } /* kraj funkcije prevedi */ void v_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn) { v_u_sortl2(up); sortl2_u_unicodeI(up, gn); } void ipp_u_krascii(prihvatnik_rt ip) { static int globalnoIzuzece=0; init_up_ip; do_kraja_niske { int i; if (strPodudarA("",up)) { globalnoIzuzece = 0; up += 6; continue; } if (strPodudarA("",up)) { globalnoIzuzece = 1; up += 7; continue; } if (globalnoIzuzece) { prepisi_znak; continue; } for (i=0; islovo[i]; if (*p != *q) continue; else { do ++p, ++q; while ( *p==*q && *q!='\0'); if (*q == '\0') { staviNisku(&ip,&ipLen,pstdTi->slovo[i]); up = p; break; } else if (*p != '\\') continue; else { do ++p; while (*p == '\\'); while (*p==*q && *q!='\0') ++p, ++q; if (*q != '\0') continue; else { do pisiZnak(*up), ++up; while(*up!='\\'); ++up; break; } } } } if (i==BR_SLOVA) { pisiZnak(*up); ++up; } } pisi0; } /* Kraj ipp_u_krascii */ void unicodeI_u_v(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_ipp(gn, ip); ipp_u_v(ip); } void v_u_sortl2(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { int i; for (i=0; islovo[i]; if (*p != *q) continue; else { do ++p, ++q; while ( *p==*q && *q!='\0'); if (*q == '\0') { staviNisku(&ip,&ipLen,pstdTsortl2->slovo[i]); up = p; break; } else if (*p != '\\') continue; else { do ++p; while (*p == '\\'); while (*p==*q && *q!='\0') ++p, ++q; if (*q != '\0') continue; else { do pisi2znaka(*up), ++up; while(*up!='\\'); ++up; break; } } } } if (i==BR_SLOVA) interstd_pisi; } pisi0; } void sortl2_u_v(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) staviNisku(&ip, &ipLen, pstdTv->slovo[slovo]); else { kopirajZnak; if vidis('\"') { znak_t *tmp=up+2, c; while (*tmp == '\\') tmp += 2; c = toupper(*tmp); if (((c=='C' || c=='S') && *tmp==tmp[1]) || (c=='Z' && toupper(tmp[1])=='W')) pisiZnak('\\'); } else if (*up == '\'') { znak_t *tmp=up+2; while (*tmp == '\\') tmp += 2; if ((*tmp=='C' || *tmp=='c') && *tmp==tmp[1]) staviZnak(ip, ipLen, '\\'); } else if (*up == 'D' || *up == 'd') { znak_t *tmp=up+2; while (*tmp == '\\') tmp += 2; if ((*tmp=='j' && tmp[1]=='j') || (*tmp=='J' && tmp[1]=='J') || (*tmp=='z' && tmp[1]=='x') || (*up=='D' && *tmp=='Z' && tmp[1]=='X')) staviZnak(ip, ipLen, '\\'); } else if (*up=='L' || *up=='l' || *up=='N' || *up=='n') { char *tmp=up+2; while (*tmp == '\\') tmp += 2; if (*tmp=='j' || (isupper(*up) && *tmp=='J')) staviZnak(ip, ipLen, '\\'); } if (*up != '\n') ++up; ++up; } } pisi0; } void ipp_u_v(prihvatnik_rt ip) { static int globalnoIzuzece = 0; static regexp *re1=NULL; init_up_ip; if (re1==NULL) re1=regcomp("^'\\\\*[Cc]|^\"\\\\*[CcSsZz]|" "^[DNL]\\\\*[Jj]|^[dnl]\\\\*j|" "^D\\\\*\"[Zz]|^d\\\\*\"z"); do_kraja_niske { int i; if (strPodudarA("",up)) { globalnoIzuzece = 0; up += 6; continue; } if (strPodudarA("",up)) { globalnoIzuzece = 1; up += 7; continue; } if (globalnoIzuzece && regexec(re1,up)) { prepisi_znak; pisiZnak('\\'); continue; } prepisi_znak; } pisi0; } /* Kraj ipp_u_v */ void ipp_u_utf8(prihvatnik_rt ip) { static int globalnoIzuzece = 0; init_up_ip; do_kraja_niske { int i; if (strPodudarA("",up)) { globalnoIzuzece = 0; up += 6; continue; } if (strPodudarA("",up)) { globalnoIzuzece = 1; up += 7; continue; } if (globalnoIzuzece) { prepisi_znak; continue; } for (i=0; islovo[i]; if (*p != *q) continue; else { do ++p, ++q; while ( *p==*q && *q!='\0'); if (*q == '\0') { znak_t s[20], s1[20]; iso_u_unicode(pstdTi->slovo[i], s); u_utf8(s,s1); staviNisku(&ip,&ipLen,s1); up = p; break; } else if (*p != '\\') continue; else { do ++p; while (*p == '\\'); while (*p==*q && *q!='\0') ++p, ++q; if (*q != '\0') continue; else { do { znak_t s[2], s1[10], s2[20]; s[0] = ((*up) < 'A' ? (*up) : ((*up) <= 'Z' ? (*((pstdTi->cSlovo)+(*up)-'A')) : ((*up) < 'a' ? (*up) : ((*up) <= 'z' ? (*((pstdTi->cSlovo)+ (*up)-'A'-'a'+'Z'+1)) :(*up) )))); s[1] = '\0'; iso_u_unicode(s,s1); u_utf8(s1,s2); staviNisku(&ip,&ipLen,s2); ++up; } while(*up!='\\'); ++up; break; } } } } if (i==BR_SLOVA) { znak_t s[2], s1[10], s2[20]; s[0] = ((*up) < 'A' ? (*up) : ((*up) <= 'Z' ? (*((pstdTi->cSlovo)+(*up)-'A')) : ((*up) < 'a' ? (*up) : ((*up) <= 'z' ? (*((pstdTi->cSlovo)+ (*up)-'A'-'a'+'Z'+1)) :(*up) )))); s[1] = '\0'; iso_u_unicode(s,s1); u_utf8(s1,s2); staviNisku(&ip,&ipLen,s2); ++up; } } pisi0; } /* Kraj ippUiso */ void ipp_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn) { static int globalnoIzuzece=0; int slovo, i; znak_t *ip = gn->p; int ipLen = MAX_RED+2; do_kraja_niske { if (gn->f - ip < 2) { fprintf(stderr, "Nedovoljno memorije!\n"); exit(1); } *ip = *(ip+1) = '\0'; if (strPodudarA("",up)) { globalnoIzuzece = 0; up += 6; continue; } if (strPodudarA("",up)) { globalnoIzuzece = 1; up += 7; continue; } if (globalnoIzuzece) { ip+=2; prepisi_znak; continue; } for (i=0; islovo[i]; if (*p != *q) continue; else { do ++p, ++q; while ( *p==*q && *q!='\0'); if (*q == '\0') { iso_u_unicode(pstdTiso->slovo[i], ip+1); if (pstdTv->slovo[i][1]=='J' || pstdTv->slovo[i][2]=='Z') *ip = 'J'; ip+=3; up = p; break; } else if (*p != '\\') continue; else { do ++p; while (*p == '\\'); while (*p==*q && *q!='\0') ++p, ++q; if (*q != '\0') continue; else { do { znak_t s[2]; s[0] = ((*up) < 'A' ? (*up) : ((*up) <= 'Z' ? (*((pstdTiso->cSlovo)+(*up)-'A')) : ((*up) < 'a' ? (*up) : ((*up) <= 'z' ? (*((pstdTiso->cSlovo)+ (*up)-'A'-'a'+'Z'+1)) :(*up) )))); s[1] = '\0'; iso_u_unicode(s,ip+1); ++up; ip+=3; } while(*up!='\\'); ++up; break; } } } } if (i==BR_SLOVA) { znak_t s[2]; s[0] = ((*up) < 'A' ? (*up) : ((*up) <= 'Z' ? (*((pstdTiso->cSlovo)+(*up)-'A')) : ((*up) < 'a' ? (*up) : ((*up) <= 'z' ? (*((pstdTiso->cSlovo)+ (*up)-'A'-'a'+'Z'+1)) :(*up) )))); s[1] = '\0'; iso_u_unicode(s,ip+1); ++up; ip+=3; } } gn->k = ip-1; } /* Kraj ipp_u_unicodeI */ /* Obratiti pa"znju da je ulazna niska u formatu gen_niska_tp po"sto mo"ze da sadr"zi \0, a izlazni format je prihvatnik_rt (prelazna faza). */ void unicodeI_u_ipp(gen_niska_tp gn, prihvatnik_rt ip1) { int izuzmi=0; int ipLen = MAX_RED+2, slovo; znak_t *up = gn->p, *ip = ip1; /* regularni izrazi */ static regexp *re1=NULL, *re2=NULL; if (re1==NULL) re1=regcomp("^[\"']\\\\\\\\*[Cc]|^\"\\\\\\\\*[SsZz]|" "^[DLN]\\\\\\\\*[Jj]|^[dln]\\\\\\\\*j|" "^D\\\\\\\\*\"[Zz]|^d\\\\\\\\*\"z"); if (re2==NULL) re2=regcomp("^[\"'DdLlNn]\\\\"); while (up < gn->k) { unsigned int i = (*(up+1))*0x100 + (*(up+2)); ++up; if (int_izmedju(i,'A','P')||int_izmedju(i,'R','V')|| i==(unsigned int)'Z'||int_izmedju(i,'a','p')|| int_izmedju(i,'r','v')||i==(unsigned int)'Z') { if (!izuzmi) { staviNisku(&ip, &ipLen, ""); izuzmi = 1; } ++up; prepisi_znak; } else if (i>=0x400 && i<=0x45F) { znak_t c[2]; znak_t *pc=c; c[0] = (znak_t)(i - 0x360); c[1] = '\0'; if ((slovo = jelNase(&pc, pstdTiso->slovo, STD_UNICODEI)) > -1) { up+=2; if (izuzmi) { staviNisku(&ip, &ipLen, ""); izuzmi = 0; } if (*(up-3)=='J') ++slovo; staviNisku(&ip, &ipLen, pstdTv->slovo[slovo]); } else { znak_t *pc = pstdTiso->cSlovo; znak_t *pl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; while (*pl!='\0' && c[0]!=*pc) { ++pc; ++pl; } if (*pl != '\0') { if (izuzmi) { staviNisku(&ip, &ipLen, ""); izuzmi = 0; } pisiZnak(*pl); if (*pl=='D' || *pl=='d' || *pl=='L' || *pl=='l' || *pl=='N' || *pl=='n') pisiZnak('\\'); up+=2; } else { greska("(?)"); staviNisku(&ip, &ipLen, "(?)"); up+=2; } } } /* kraj od if i izmedju 400 i 45F */ else if (*up=='\0') { ++up; prepisi_znak; if (!izuzmi && (*(up-1)=='\'' || *(up-1)=='"')) pisiZnak('\\'); } else { greska("(?)"); staviNisku(&ip, &ipLen, "(?)"); up+=2; } } if (izuzmi) { if (ip>ip1 && *(ip-1)=='\n') { --ip; --ipLen; staviNisku(&ip, &ipLen, "\n"); } else { staviNisku(&ip,&ipLen, ""); } } pisi0; ip = up = ip1; izuzmi=0; ipLen = MAX_RED+2; while (*up!='\0') { if (strPodudarA("",up)) { izuzmi=0; up += 6; staviNisku(&ip, &ipLen, ""); } else if (strPodudarA("",up)) { izuzmi=1; up+=7; staviNisku(&ip, &ipLen, ""); } else if (izuzmi || regexec(re1,up)) { prepisi_znak; } else if (regexec(re2,up)) { prepisi_znak; ++up; } else { prepisi_znak; } } pisi0; } /* kraj funkcije unicodeI_u_ipp */ void prostiL_u_unicodeI(prihvatnik_rt p, gen_niska_tp gn, std_t s) { prostiL_u_sortl2(p, s); sortl2_u_unicodeI(p, gn); } void unicodeI_u_prostiL(gen_niska_tp gn, prihvatnik_rt p){ unicodeI_u_sortl2(gn, p); sortl2_u_prostiL(p); } void prostiL_u_sortl2(prihvatnik_rt ip, std_t s) { init_up_ip; do_kraja_niske { if nase_slovo(s) pisiNaseInterstd; else interstd_pisi; } pisi0; } void sortl2_u_prostiL(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); else { staviZnak(ip, ipLen, *up); if (*up != '\n') ++up; ++up; } } pisi0; } void dual1_u_sortl2(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { int slovo; if ((slovo = jelNase(&up, pstdTu->slovo, STD_DUAL)) > -1) { register znak_t *p = up-2, *q = up; int i=1; for (; p[1] == *q; ++i, ++q) ; if (i % 2 != 0) { staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo]); } else { pisi2znaka(p[0]); pisi2znaka(p[1]); ++up; } while vidis(p[1]) { prepisi_znak; } } else { interstd_pisi; } } pisi0; } void sortl2_u_dual1(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) { znak_t *p=up-1; staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); while vidis(*p) { prepisi_znak; } } else { znak_t *p=ip; pisiZnak(*up); if (up > upStatic && ((p[0]=='x' && (p[-1]=='C' || p[-1]=='c' || p[-1]=='D' || p[-1]=='d' || p[-1]=='S' || p[-1]=='s' || p[-1]=='Z' || p[-1]=='z')) || (p[0]=='X' && (p[-1]=='C' || p[-1]=='D' || p[-1]=='S' || p[-1]=='Z')) || (p[0]=='y' && (p[-1]=='C' || p[-1]=='c' || p[-1]=='D' || p[-1]=='d' || p[-1]=='N' || p[-1]=='n' || p[-1]=='L' || p[-1]=='l')) || (p[0]=='Y' && (p[-1]=='C' || p[-1]=='D' || p[-1]=='N' || p[-1]=='L')))) { for (++up; *up==*p;) { prepisi_znak; } } else { if (*up != '\n') ++up; ++up; } } } pisi0; } void dual1_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { dual1_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void unicodeI_u_dual1(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_dual1(ip); } void bq_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska) { bq_u_sortl2(up); sortl2_u_unicodeI(up, genNiska); } void unicodeI_u_bq(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn,ip); sortl2_u_bq(ip); } void q_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska, znak_t c) { q_u_sortl2(up, c); sortl2_u_unicodeI(up, genNiska); } void unicodeI_u_q(gen_niska_tp gn, prihvatnik_rt ip, znak_t c) { unicodeI_u_sortl2(gn,ip); sortl2_u_q(ip, c); } void bq_u_sortl2(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTu->slovo, STD_BQ)) > -1) { if ((slovo==SLOVO_Dx || slovo==SLOVO_Dy || slovo==SLOVO_Ny || slovo==SLOVO_Ly) && *up=='`' && up[1]=='J') { ++slovo; up +=2;} staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo]); } else { if (up[0] == '`') { if (up[1] == '`') ++up; else greska("BQ"); } staviZnak(ip, ipLen, *up); if (*up == 'Z') {staviZnak(ip, ipLen, 'W');} else if (*up == 'z') {staviZnak(ip, ipLen, 'w');} else if (*up != '\n') {staviZnak(ip, ipLen, *up);} ++up; } } pisi0; } void q_u_sortl2(prihvatnik_rt ip, znak_t c) { int slovo; prihvatnik_t upStatic; prihvatnik_rt up, tmp; int ipLen=MAX_RED+2; up = upStatic; strcpy(up,ip); while (*up != '\0') { if (up[0]==tolower(c)) { for (slovo=0; slovoslovo[slovo][0] == up[1]) break; if (slovoslovo[slovo]); up +=2; continue; } } if (up[0]==toupper(c)) { slovo = -1; switch (up[1]) { case 'D': slovo = SLOVO_DX; break; case 'L': slovo = SLOVO_LY; break; case 'N': slovo = SLOVO_NY; break; } if (slovo > -1) { staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo]); up += 2; continue; } } if (up[0]==up[1] && toupper(c)==toupper(up[0])) ++up; staviZnak(ip, ipLen, *up); if (*up == 'Z') {staviZnak(ip, ipLen, 'W');} else if (*up == 'z') {staviZnak(ip, ipLen, 'w');} else if (*up != '\n') {staviZnak(ip, ipLen, *up);} ++up; } staviZnak(ip, ipLen, '\0'); } void sortl2_u_bq(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); else { staviZnak(ip, ipLen, *up); if (*up == '`') staviZnak(ip, ipLen, *up); if (*up != '\n') ++up; ++up; } } pisi0; } void sortl2_u_q(prihvatnik_rt ip, znak_t c) { int slovo; prihvatnik_t upStatic; prihvatnik_rt up; int ipLen=MAX_RED+2; up = upStatic; strcpy(up,ip); while (*up != '\0') { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) { switch (slovo) { case SLOVO_DX: case SLOVO_LY: case SLOVO_NY: staviZnak(ip, ipLen, toupper(c)); break; default: staviZnak(ip, ipLen, tolower(c)); } staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); } else { staviZnak(ip, ipLen, *up); if (toupper(*up) == toupper(c)) staviZnak(ip, ipLen, *up); if (*up != '\n') ++up; ++up; } } staviZnak(ip, ipLen, '\0'); } void dyq_u_sortl2(prihvatnik_rt ip) { static int globalnoIzuzece=0; init_up_ip; do_kraja_niske { if (!globalnoIzuzece && nase_slovo(STD_DYQ)) pisiNaseInterstd; else { if (vidis('_') && vidisIspred(1,'_')) ++up; else if (vidis('_')) { ++up; globalnoIzuzece = !globalnoIzuzece; continue; } interstd_pisi; } } pisi0; } void sortl2_u_dyq(prihvatnik_rt ip) { int izuzmi=0; init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) { if (izuzmi) { pisiZnak('_'); izuzmi = 0; } staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); } else { if (!izuzmi && (toupper(*up) == 'Q' || toupper(*up) == 'X' || toupper(*up) == 'Y' || toupper(*up) == 'W' || (toupper(*up) == 'D' && toupper(up[2]) == 'C' && toupper(up[3]) == 'Y') || (toupper(*up) == 'N' && toupper(up[2]) == 'J') || (toupper(*up) == 'L' && toupper(up[2]) == 'J') || (toupper(*up) == 'D' && toupper(up[2]) == 'Z' && toupper(up[3]) == 'X'))) { pisiZnak('_'); izuzmi = 1; } else if (izuzmi && !isalpha(*up) && !isdigit(*up)) { pisiZnak('_'); izuzmi = 0; } pisiZnak(*up); if (*up == '_') pisiZnak('_'); if (*up != '\n') ++up; ++up; } } pisi0; } void qdl_u_sortl2(prihvatnik_rt ip) { static int globalnoIzuzece=0; init_up_ip; do_kraja_niske { if (!globalnoIzuzece && (slovo = jelNase(&up, pstdTu->slovo, STD_DYQ)) > -1) staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo]); else { if (up[0] == '`' && up[1] == '`' && up[2] == '`') { pisi2znaka('`'); up+=3; } else if (up[0]=='`' && up[1]=='`') { up+=2; } else if (up[0]=='`') { globalnoIzuzece = !globalnoIzuzece; ++up; } else { interstd_pisi; } } } pisi0; } void sortl2_u_qdl(prihvatnik_rt ip) { int izuzmi=0; init_up_ip; do_kraja_niske { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) { if (izuzmi) { pisiZnak('`'); izuzmi = 0; } staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); } else { if (!izuzmi && (toupper(*up) == 'Q' || toupper(*up) == 'X' || toupper(*up) == 'Y' || toupper(*up) == 'W' || (toupper(*up) == 'D' && toupper(up[2]) == 'C' && toupper(up[3]) == 'Y') || (toupper(*up) == 'N' && toupper(up[2]) == 'J') || (toupper(*up) == 'L' && toupper(up[2]) == 'J') || (toupper(*up) == 'D' && toupper(up[2]) == 'Z' && toupper(up[3]) == 'X'))) { pisiZnak('`'); izuzmi = 1; } else if (izuzmi && !isalpha(*up) && !isdigit(*up)) { pisiZnak('`'); izuzmi = 0; } pisiZnak(*up); if (*up == '`') { pisi2znaka('`'); } if (*up != '\n') ++up; ++up; } } if (izuzmi) { pisiZnak('`'); } pisi0; } void dyq_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { dyq_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void unicodeI_u_dyq(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_dyq(ip); } void qdl_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { qdl_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void unicodeI_u_qdl(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_qdl(ip); } znak_t *texRazbijac="{\\kern0pt}"; void texc_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { texc_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void unicodeI_u_texc(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_texc(ip); } void texc_u_sortl2(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { int slovo; if ((slovo = jelNase(&up, pstdTu->slovo, STD_TEXC)) > -1) staviNisku(&ip, &ipLen, pstdTsortl2->slovo[slovo]); else { interstd_pisi; } if (strPodudara(texRazbijac, up)) { int i, razDuz; razDuz = strlen(texRazbijac); for (i=0; islovo[i][0] && up[razDuz] == pstdTu->slovo[i][1]) break; } if ((i < BR_SLOVA) || (toupper(up[-1])=='T' && toupper(up[razDuz])=='S') || (up-upStatic > 1 && toupper(up[-2])=='S' && toupper(up[-1])=='H' && toupper(up[razDuz])=='C' && (toupper(up[razDuz+1])=='H' || up[razDuz+1]=='1'))) up += razDuz; } } pisi0; } void sortl2_u_texc(prihvatnik_rt ip) { int slovo; prihvatnik_t upStatic; prihvatnik_rt up; prihvatnik_rt ipStatic=ip; int ipLen=MAX_RED+2, i; up = upStatic; strcpy(up,ip); while (*up != '\0') { if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) staviNisku(&ip, &ipLen, pstdTi->slovo[slovo]); else { staviZnak(ip, ipLen, *up); if (*up != '\n') ++up; ++up; } for (i=0; islovo[i][0] && *up == pstdTi->slovo[i][1] && *up == up[1]) break; } if ((i < BR_SLOVA) || (toupper(ip[-1])=='T' && toupper(*up)=='S' && *up==up[1]) || (ip-ipStatic > 1 && toupper(ip[-2])=='S' && toupper(ip[-1])=='H' && toupper(*up)=='C' && (toupper(up[1])=='X' || toupper(up[1])=='Y'))) staviNisku(&ip,&ipLen,texRazbijac); } pisi0; } void sortl_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { skiniTab16(ip); sortl2_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void sortc_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { skiniTab16(ip); sortc2_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void sortc2_u_unicodeI(prihvatnik_rt ip, gen_niska_tp gn) { sortc2_u_sortl2(ip); sortl2_u_unicodeI(ip, gn); } void unicodeI_u_sortl(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); staviTab16(ip); } void unicodeI_u_sortc2(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_sortc2(ip); } void unicodeI_u_sortc(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_sortl2(gn, ip); sortl2_u_sortc2(ip); staviTab16(ip); } znak_t *sortcTabela = "AA" "SA" "BB" "SB" "VV" "SC" "GG" "SD" "DD" "SE" "DY" "SF" "EE" "SG" "ZX" "SH" "ZW" "SI" "II" "SJ" "JJ" "SK" "KK" "SL" "LL" "SM" "LY" "SN" "MM" "SO" "NN" "SP" "NY" "SQ" "OO" "SR" "PP" "SS" "RR" "ST" "SS" "SU" "TT" "SV" "CY" "SW" "UU" "SX" "FF" "SY" "HH" "SZ" "CC" "TA" "CX" "TB" "DX" "TC" "SX" "TD" "QQ" "TE" "WW" "TF" "XX" "TG" "YY" "TH" "...."; void sortc2_u_sortl2(prihvatnik_rt ip) { znak_t *t; init_up_ip; do_kraja_niske { if vidis('\n') { prepisi_znak; continue; } if (!isalpha(*up)) { if (up[0] != up[1]) greska("SortC"); pisi2znaka(*up); up+=2; continue; } for (t=sortcTabela; *t!='.'; t+=4) if (t[2]==toupper(up[0]) && t[3]==toupper(up[1])) break; if (*t=='.') greska("SortC"); pisiZnak(isupper(up[0]) ? t[0] : tolower(t[0])); pisiZnak(isupper(up[1]) ? t[1] : tolower(t[1])); up +=2; } pisi0; } void sortl2_u_sortl2(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if vidis('\n') { prepisi_znak; continue; } if nase_slovo(STD_SORTL2) { pisiNaseInterstd; continue; } if (toupper(up[0])=='Z') { if (toupper(up[1])=='W' && isupper(up[0])==isupper(up[1])) { prepisi_znak; prepisi_znak; continue; } else { kopirajZnak; pisiZnak(isupper(*up) ? 'W' : 'w'); ++up; } } else { kopirajZnak; kopirajZnak; if (up[0]==up[1]) { up += 2; continue; } up+=1; } greska("SortL"); if (up[0]=='\0' || up[0]==up[1]) continue; kopirajZnak; kopirajZnak; ++up; } pisi0; } void sortl2_u_sortc2(prihvatnik_rt ip) { znak_t *t; init_up_ip; do_kraja_niske { if vidis('\n') { prepisi_znak; continue; } if (!isalpha(*up)) { *ip = ip[1] = *up; ip += 2; up += 2; continue; } for (t=sortcTabela; *t!='.'; t+=4) if (t[0]==toupper(up[0]) && t[1]==toupper(up[1])) break; assert(*t != '.'); ip[0] = (isupper(up[0]) ? t[2] : tolower(t[2])); ip[1] = (isupper(up[1]) ? t[3] : tolower(t[3])); up +=2; ip +=2; } pisi0; } void sortl2_u_unicodeI(prihvatnik_rt up, gen_niska_tp genNiska) { int slovo; znak_t *ip = genNiska->p; do_kraja_niske { if (genNiska->f - ip < 2) { fprintf(stderr, "Nedovoljno memorije!\n"); exit(1); } *ip = *(ip+1) = '\0'; if ((slovo = jelNase(&up, pstdTsortl2->slovo, STD_SORTL2)) > -1) { iso_u_unicode(pstdTiso->slovo[slovo],ip+1); if (slovo==SLOVO_DY||slovo==SLOVO_LY||slovo==SLOVO_NY|| slovo==SLOVO_DX) *ip = 'J'; } else { *(ip+2) = ((*up) < 'A' ? (*up) : ((*up) <= 'Z' ? (*((pstdTiso->cSlovo)+(*up)-'A')) : ((*up) < 'a' ? (*up) : ((*up) <= 'z' ? (*((pstdTiso->cSlovo)+(*up)-'A'-'a'+'Z'+1)) : (*up) )))); if (*(ip+2) >= 128) { unsigned int i = *(ip+2); i += 0x360; *(ip+1) = (znak_t)((i >> 8) & 0xFF); *(ip+2) = (znak_t)(i & 0xFF); } if (*(up++)!='\n') ++up; } ip += 3; } genNiska->k = ip-1; } /* kraj funkcije sortl2_u_unicodeI */ void unicodeI_u_sortl2(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_ipp(gn, ip); ipp_u_v(ip); v_u_sortl2(ip); } void prostiC_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn) { znak_t *ip = gn->p; int slovo; while (*up!='\0') { if (ip>gn->f) { fprintf(stderr,"nedovoljno memorije"); exit(1); } *ip = *(ip+1) = '\0'; if nase_slovo(STD_ISO) { /* STD_ISO nije va"zno */ int i = ((int)pstdTiso->slovo[slovo][0])+0x360; *(ip+2) = (znak_t)(i & 0xFF); *(ip+1) = (znak_t)(i>>8); if (slovo==SLOVO_DX||slovo==SLOVO_DY||slovo==SLOVO_LY|| slovo==SLOVO_NY) *ip = 'J'; } else { znak_t *pu = pstdTu->cSlovo; znak_t *pi = pstdTiso->cSlovo; while (*pu!='\0' && *pu!=*up) ++pu, ++pi; if (*pi>127) { int i = ((int)*pi) + 0x360; *(ip+2) = (znak_t)(i & 0xFF); *(ip+1) = (znak_t)(i>>8); } else if (*pi!='\0') *(ip+2) = *pi; else *(ip+2) = *up; ++up; } ip += 3; } gn->k = ip-1; } /* kraj funkcije prostiC_u_unicodeI */ void unicodeI_u_prostiC(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_ipp(gn, ip); ipp_u_prostiC(ip); } void ipp_u_prostiC(prihvatnik_rt ip) { static int globalnoIzuzece=0; init_up_ip; do_kraja_niske { int i; if (strPodudarA("",up)) { globalnoIzuzece = 0; up += 6; continue; } if (strPodudarA("",up)) { globalnoIzuzece = 1; up += 7; continue; } if (globalnoIzuzece) { prepisi_znak; continue; } for (i=0; islovo[i]; if (*p != *q) continue; else { do ++p, ++q; while ( *p==*q && *q!='\0'); if (*q == '\0') { staviNisku(&ip,&ipLen,pstdTi->slovo[i]); up = p; break; } else if (*p != '\\') continue; else { do ++p; while (*p == '\\'); while (*p==*q && *q!='\0') ++p, ++q; if (*q != '\0') continue; else { do pisiCZnak(*up), ++up; while(*up!='\\'); ++up; break; } } } } if (i==BR_SLOVA) { pisiCZnak(*up); ++up; } } pisi0; } /* Kraj ipp_u_prostiC */ void utf8_u_unicodeI(prihvatnik_rt up, gen_niska_tp gn) { znak_t *ip = gn->p; unsigned int c; while (*up!='\0') { if (ip>gn->f) { fprintf(stderr,"nedovoljno memorije"); exit(1); } if (*up < 0x80) c = *(up++); else if ((*up & 0xE0) == 0xC0) { c = (*(up++) & 0x1F) << 6; c += *(up++) & 0x3F; } else if ((*up & 0xF0) == 0xE0) { c = (*(up++) & 0x0F) << 12; c += (*(up++) & 0x3F) << 6; c += (*(up++) & 0x3F); } else if ((*up & 0xF8) == 0xF0) { c = (*(up++) & 0x03) << 18; c += (*(up++) & 0x3F) << 12; c += (*(up++) & 0x3F) << 6; c += (*(up++) & 0x3F); } else { greska("greska u UTF8"); c = *(up++); } *(ip++) = '\0'; *(ip++) = (c>>8); *(ip++) = (c & 0xFF); } gn->k = ip-1; } /* kraj funkcije utf8_u_unicodeI */ void unicodeI_u_utf8(gen_niska_tp gn, prihvatnik_rt ip) { unicodeI_u_ipp(gn, ip); ipp_u_utf8(ip); } void u_utf8(znak_t* s, znak_t* d) { while (*s!='\0' || *(s+1)!='\0') { unsigned int c = *(s++); c = (c<<8) + *(s++); if (c < 0x80) { *(d++) = (znak_t)c; } else if (c < 0x800) { *(d++) = (znak_t) (0xC0 | c>>6); *(d++) = (znak_t) (0x80 | c & 0x3F); } else if (c < 0x10000) { *(d++) = (znak_t) (0xE0 | c>>12); *(d++) = (znak_t) (0x80 | c>>6 & 0x3F); *(d++) = (znak_t) (0x80 | c & 0x3F); } else if (c < 0x200000) { *(d++) = (znak_t) (0xF0 | c>>18); *(d++) = (znak_t) (0x80 | c>>12 & 0x3F); *(d++) = (znak_t) (0x80 | c>>6 & 0x3F); *(d++) = (znak_t) (0x80 | c & 0x3F); } } *d = '\0'; } void iso_u_unicode(znak_t* s, znak_t* d) { while (*s != '\0') { if (*s < 128) { *(d++) = '\0'; *(d++) = *(s++); } else { unsigned int i = *(s++); i += 0x360; *(d++) = (znak_t)((i >> 8) & 0xFF); *(d++) = (znak_t)(i & 0xFF); } } *(d++) = '\0'; *d = '\0'; } /*** Makroi i funkcije za skra'ceni zapis kod prevodjenja *** * * U na"celu, funkcije rade na sljede'ci na"cin (nije garantovano da je * svaka tako implementirana: * * Ulaz u funkcije je niska ip maksimalne du"zine MAX_RED+2, * uklju"cuju'ci i zavr"snu nulu. * Niska se prepisuje u upStatic, i pokaziva"c up pokazuje na po"cetak od * upStatic. Prevodjenje se vr"si prepisivanjem *up u *ip i pomjeranjem * pokaziva"ca up i ip. ipLen sadr"zi broj neiskori"stenih pozicija u * ip. */ void skiniTab16(prihvatnik_rt ip) { int uk=1, ik=1; init_up_ip; do_kraja_niske { if (vidis('\n')) { prepisi_znak; uk = ik = 1; continue; } else if vidis('\t') { do { do ++uk; while (uk % 8 != 1); ++up; } while vidis('\t'); do { pisi2znaka('\t'); do ++ik; while (ik % 8 != 1); } while (2*ik-1 < uk); } else { prepisi_znak; prepisi_znak; uk += 2; ++ik; } } pisi0; } void staviTab16(prihvatnik_rt ip) { int uk=1, ik=1; init_up_ip; do_kraja_niske { if vidis('\n') { prepisi_znak; uk = ik = 1; continue; } if vidis('\t') { do { do ++uk; while (uk % 8 != 1); up += 2; } while vidis('\t'); do { pisiZnak('\t'); do ++ik; while (ik % 8 != 1); } while (ik < 2*uk-1); continue; } prepisi_znak; prepisi_znak; ++uk; ik+=2; } pisi0; } void skiniHeks(prihvatnik_rt ip) { init_up_ip; do_kraja_niske { if (!vidis('=')) prepisi_znak; else if (!isxdigit(up[1]) || !isxdigit(up[2])) { greska("Quoted printable!\a"); prepisi_znak; } else { *(ip++) = ((toupper(up[1]) <= '9') ? (up[1]-'0') : (toupper(up[1])-'A'+10)) * 16 + ((toupper(up[2]) <= '9') ? (up[2]-'0') : toupper(up[2])-'A'+10); up += 3; } } pisi0; } void staviHeks(prihvatnik_rt ip) { prihvatnik_t upStatic; prihvatnik_rt up; int ipLen = MAX_RED+2; up = upStatic; strcpy(up,ip); while (*up != '\0') { if (*up != '=' && (int)*up < 128) { staviZnak(ip,ipLen,*up); ++up; } else { staviZnak(ip,ipLen,'='); staviZnak(ip,ipLen, ((*up/16) <= 9) ? ((*up/16) + '0') : ((*up/16) + 'A' - 10)); staviZnak(ip,ipLen, ((*up%16) <= 9) ? ((*up%16) + '0') : ((*up%16) + 'A' - 10)); ++up; } } staviZnak(ip,ipLen,'\0'); } /**** Funkcije za prevodjenje ****/ /**** Obrada Opcija ****/ void obradiOpciju(znak_t *op) { int i, viseOpcija=0, j; for (i=0; tabelaOpcija[i].znak!='\0'; ++i) if (op[0]==tabelaOpcija[i].znak) { if (tabelaOpcija[i].argument==0 && op[1]!='\0') viseOpcija=1; break; } if (tabelaOpcija[i].znak=='\0') { fprintf(stderr,"Nepoznata opcija -%s\a\n", op); ind.Uputstva = (ind.Uputstva ? ind.Uputstva : 1); return; } j = 0; do { switch(op[j]) { case 'i': ind.PrijaveGreske=0; break; case 'P': ind.Uputstva = 3; break; case 'p': ind.Uputstva = 2; break; case 's': ind.StartneEtikete=1; break; case 't': ind.Temeljno=1; break; case 'h': ind.HTML=1; break; case 'e': ind.email=1; break; case 'u': if (viseOpcija) { fprintf(stderr,"Opcija -u nije dozvoljena u niski " "opcija.\a\n"); return; } for (i=0; stdE[i].id != STD_GRANICNIK && strCmp(op+1,stdE[i].etiketa) != 0; ++i) ; if (stdE[i].id == STD_GRANICNIK) { fprintf(stderr,"Opcija -%s, nepoznat standard.\a\n", op); if (!(ind.Uputstva)) ind.Uputstva = 1; } else ulazniStd = stdE[i].id; break; default: fprintf(stderr,"Nepoznata opcija %c.\a\n",op[j]); } ++j; } while (viseOpcija && op[j]!='\0'); } /*********************************************************************** Uputstvo */ int prevodUputstva=1; #define REDOVA_NA_EKRANU 24 #define uputZnak(z)do{putchar(z);\ if((z)=='\n'){kol=1;++red;if (ind.Uputstva!=3 && red % \ REDOVA_NA_EKRANU==0) {\ char c;printf("------------------P ili p za prekid:");\ c=getchar();if((c=='p')||(c=='P')){exit(1);}\ while(c!='\n') c=getchar();\ ++red;}}else{if((z)=='\t'){do ++kol;\ while(kol%8!=1);}else++kol;}}while(0) void uputNiska(znak_t *s1) { static int kol=1, red=1; znak_t *p, *s; strcpy(prihvatnik, s1); if (izlazniStd != STD_V && prevodUputstva) prevediT(); s = prihvatnik; while (*s!='\0') { for (p=s; *p!='\0' && !isspace(*p); ++p) ; if (s>prihvatnik && isspace(s[-1]) && kol+(p-s)>75) { uputZnak('\n'); } while (s72) { uputZnak('\n'); } else { uputZnak(*s); } ++s; } } } void stampajUputstvo() { if (izlazniStd != STD_V && prevodUputstva) { ulazniStd = STD_V; pstdTu = nadjiTabelu(ulazniStd); pstdTi = nadjiTabelu(izlazniStd); } uputNiska(unistdVerzija); uputNiska("\n\nUpotreba: unistd [] \n"); if (ind.Uputstva > 1) { uputNiska( "\nOsnovna namjena programa je da prevodi tekstove na srpskom jeziku " "iz jednog kodiranja u drugi. Program \"cita standardni ulaz i " "pi\"se standardni izlaz, pa se tipi\"cno upotrebljava kao filter.\n\n" "ARGUMENTI\n\n" "Program prihvata (i obavezan je) jedan argument: izlazni standard " "- ozna\"cen kao . Podr\"zani izlazni standardi su dati u " "dijelu ovog uputstva koji nosi naziv STANDARDI.\n\n" "OPCIJE\n\n" "Opcije mogu ali ne moraju da se navode. Po\"cinju znakom minus (-). " "Neke opcije uzimaju argument koji je niska znakova, a druge ne " "tra\"ze argument. Ako opcije ne tra\"ze argument onda mogu da se " "grupi\"su u jednu nisku koja po\"cinje znakom minus. Spisak opcija " "slijedi:\n\n");} else { uputNiska("Opcije:\n"); }; { int i; for (i=0;tabelaOpcija[i].znak!='\0';++i) { char a[3]={'-',' ','\0'}; a[1] = tabelaOpcija[i].znak; prevodUputstva=0;uputNiska(a);prevodUputstva=1; if(tabelaOpcija[i].argument) uputNiska(" "); else uputNiska(" "); uputNiska(tabelaOpcija[i].opis); uputNiska("\n"); } } if (ind.Uputstva>1) { uputNiska("\nSINTAKSA\n\n" "Unutar teksta koji se prevodi, ulazni standard mo\"ze da se mijenja " "pomo'cu etikete oblika , koja uklju\"cuje odredjeni " "standard. " "Etiketa mora biti usamljena u redu i na po\"cetku reda. Veli\"cina " "slova u etiketi se ignori\"se.\n\n" "STANDARDI\n");}; uputNiska("\nSljede\'ci standardi su podr\"zani:"); prevodUputstva=0; { std_t i; int an=0; for (i=0; stdE[i].id != STD_GRANICNIK; ++i) { if (i==0 || stdE[i].id!=stdE[i-1].id) { if (an==1) uputNiska(")"); if (ind.Uputstva > 1) uputNiska("\n "); else uputNiska(i==0?" ":", "); uputNiska(stdE[i].etiketa); an = 0; } else { if (an == 0) uputNiska(" ("); else uputNiska(", "); uputNiska(stdE[i].etiketa); an = 1; } } if (an==1) uputNiska(")"); } prevodUputstva=1; uputNiska("\n"); if (ind.Uputstva>1) { uputNiska("\nPRAVA AUTORA: Program mo\"zete da koristite besplatno u " "bilo koje svrhe pod sljede'cim uslovima:\n" "1. Ispravnost programa nije garantovana. Autor ne'ce snositi " "odgovornost za bilo kakvu \"stetu nastalu kori\"s'cenjem programa.\n" "2. Autorstvo koda ne smije biti promijenjeno niti izostavljeno.\n" "3. Ako modifikujete kod, obavezni ste da to nazna\"cite.\n"); }} int prevedi_HTML() { int htmlIzuzece = 0; regexp *reK = NULL; prihvatnik_t prviPrihvatnik; int indHead=0, indMeta=0, indBody=0; /* body ili /head */ znak_t *sEtiketa = standardnaEtiketa(izlazniStd); regexp *reTAG = regcomp("<(/|)[^?]"); regexp *reTAGend = regcomp(">"); static regexp *reI=NULL; static regexp *reCharset = NULL; static regexp *reSCRIPT = NULL, *reSCRIPTend = NULL; if (reCharset==NULL) reCharset = regcomp ("^<[Mm][Ee][Tt][Aa] [Hh][Tt][Tt][Pp]-[Ee][Qq][Uu][Ii][Vv]" "=\"[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]\" " "[Cc][Oo][Nn][Tt][Ee][Nn][Tt]=\"[Tt][Ee][Xx][Tt]/[Hh]" "[Tt][Mm][Ll]; [Cc][Hh][Aa][Rr][Ss][Ee][Tt]=" "([-a-zA-Z0-9]*)\">$"); if (reI==NULL) reI=regcomp("&[a-zA-Z0-9][a-zA-Z0-9]*;"); if (reSCRIPT==NULL) reSCRIPT = regcomp("<[Ss][Cc][Rr][Ii][Pp][Tt][^>]*>"); if (reSCRIPTend==NULL) reSCRIPTend = regcomp("]*>"); while (1) { znak_t *p; int indEtikete, i; /* U"citaj red iz ulaza */ if (fgets(prviPrihvatnik,MAX_RED+2,stdin)==NULL) break; if (strlen(prviPrihvatnik) > MAX_RED) { znak_t b[80]; sprintf(b, "Ulazna datoteka sadr\"zi red du\"zi od " "%u znakova.", MAX_RED); greska(b); greska("Treba pove\'cati veli\"cinu prihvatnika.\a"); if (ind.PrijaveGreske) { free(reTAG); free(reTAGend); return(1); } } indEtikete = dalijeEtiketa(prihvatnik); if (indEtikete == 2) { znak_t *p=prviPrihvatnik; do {p[0] = p[1]; ++p;} while(*p != '\0'); indEtikete = 0; } if (indEtikete == 0) { znak_t *p = prviPrihvatnik; /* * Red je u"citan, nije etiketa, i sada mo"zemo da ga * prevodimo. */ while (*p != '\0') { znak_t *q = prihvatnik; /* Ako smo u stanju izuzimanja: */ if (htmlIzuzece || reK != NULL) { /* Prvi prioritet ima reK */ if (reK != NULL) { if (regexec(reK, p)) { while (p < (znak_t*)(reK->endp[0])) *(q++) = *(p++); *q = '\0'; htmlIzuzece = 0; reK = NULL; printf("%s", prihvatnik); } else { printf("%s", p); *p = '\0'; } } /* Tra"zimo kraj etikete (tag-a, >) */ else if (regexec(reTAGend,p)) { while (p <= (znak_t*)(reTAGend->startp[0])) *(q++) = *(p++); *q = '\0'; htmlIzuzece = 0; if (strPodudarA("", prihvatnik)) indHead=1; if (regexec(reCharset, prihvatnik)) { indMeta=1; if (sEtiketa==NULL) *prihvatnik='\0'; else { strcpy(reCharset->startp[1],sEtiketa); strcpy(reCharset->startp[1]+strlen(sEtiketa), "\">"); } } printf("%s", prihvatnik); if (strPodudarA("", prihvatnik) && ind.StartneEtikete) printf("\n\n", NazivIzlaznogStd); } else { printf("%s", p); *p = '\0'; } } /* htmlIzuzece */ /* Ako nije izuze'ce: */ else { int iScript = regexec(reSCRIPT, p); int iAmp = ( regexec(reI, p) && (!iScript || reI->startp[0] < reSCRIPT->startp[0]) ); int iTag = ( regexec(reTAG, p) && (!iScript || reTAG->startp[0] < reSCRIPT->startp[0]) && (!iAmp || reTAG->startp[0] < reI->startp[0]) ); if (iScript && (!iAmp || reSCRIPT->startp[0] < reI->startp[0]) && (reSCRIPT->startp[0] == reTAG->startp[0]) ) { if (p < (znak_t*)reSCRIPT->startp[0]) { while (p < (znak_t*)(reSCRIPT->startp[0])) *(q++) = *(p++); *q = '\0'; } else { while (p < (znak_t*)reSCRIPT->endp[0]) putchar(*(p++)); *q = '\0'; htmlIzuzece = 1; reK = reSCRIPTend; } } else if (iAmp && (!iTag || reI->startp[0] < reTAG->startp[0]) ) { if (p < (znak_t*)reI->startp[0]) { while (p < (znak_t*)(reI->startp[0])) *(q++) = *(p++); *q = '\0'; } else { while (p < (znak_t*)reI->endp[0]) putchar(*(p++)); *q = '\0'; } } else if (iTag) { while (p < (znak_t*)(reTAG->startp[0])) *(q++) = *(p++); *q = '\0'; htmlIzuzece = 1; } else { while (*p != '\0') *(q++) = *(p++); *q = '\0'; } if (indBody && (ind.Temeljno || ulazniStd!=izlazniStd)) { prevediT(); } printf("%s",prihvatnik); /* Da vidimo "sta je to "sto slijedi */ if (htmlIzuzece && ( strPodudarA("", p) || ( strPodudarA("' || isspace(p[5])))) ) { if (!indMeta && sEtiketa!=NULL) { indMeta = 1; printf("\n\n", sEtiketa); } indBody=1; } } } /* kraj od while(*p != 0) */ } } free(reTAG); free(reTAGend); return 0; } /* kraj od prevedi_HTML */ int prevedi_email() { int indMime=0, indType=0, indTransfer=0, indKraj=0; prihvatnik_t prviPrihvatnik; znak_t *sEtiketa = standardnaEtiketa(izlazniStd); /* Zaglavlja */ if (fgets(prviPrihvatnik, MAX_RED+2,stdin)==NULL) indKraj=1; while (!indKraj && strcmp(prviPrihvatnik,"\n")!=0) { strcpy(prihvatnik, prviPrihvatnik); if (fgets(prviPrihvatnik, MAX_RED+2,stdin)==NULL) indKraj=1; while (!indKraj && (prviPrihvatnik[0]==' ' || prviPrihvatnik[0]=='\t')) { strcat(prihvatnik, prviPrihvatnik); if (fgets(prviPrihvatnik, MAX_RED+2,stdin)==NULL) indKraj=1; } if (strPodudarA("MIME-Version:", prihvatnik)) indMime=1; else if (strPodudarA("Content-Type:", prihvatnik)) { znak_t *p=prihvatnik; indType = 1; if (sEtiketa==NULL) *prihvatnik='\0'; else { while (*p!=';') ++p; strcpy(p, "; charset="); strcat(p, sEtiketa); strcat(p, "\n"); } } else if (strPodudarA("Content-Transfer-Encoding:",prihvatnik)) { indTransfer=1; strcpy(prihvatnik, "Content-Transfer-Encoding: 8bit\n"); } printf("%s", prihvatnik); } if (!indMime) printf("MIME-Version: 1.0\n"); if (!indType && sEtiketa!=NULL) printf("Content-Type: TEXT/PLAIN; charset=%s\n", sEtiketa); if (!indTransfer) printf("Content-Transfer-Encoding: 8bit\n"); if (!indKraj || ind.StartneEtikete) { printf("\n"); if (ind.StartneEtikete) printf("\n", NazivIzlaznogStd); } if (indKraj) return 0; while (1) { znak_t *p; int indEtikete; if (fgets(prihvatnik,MAX_RED+2,stdin)==NULL) break; if (strlen(prihvatnik) > MAX_RED) { znak_t b[80]; sprintf(b, "Ulazna datoteka sadr\"zi red du\"zi od " "%u znakova.", MAX_RED); greska(b); greska("Treba pove\'cati veli\"cinu prihvatnika.\a"); if (ind.PrijaveGreske) return(1); } indEtikete = dalijeEtiketa(prihvatnik); if (indEtikete == 2) { znak_t *p=prihvatnik; do {p[0] = p[1]; ++p;} while(*p != '\0'); indEtikete = 0; } if (indEtikete == 0) { if (ind.Temeljno || ulazniStd!=izlazniStd) { prevediT(); } printf("%s",prihvatnik); } } /* kraj od while (1) */ return 0; } /* kraj od prevedi_email */ /**** Elementarne funkcije nad niskama ****/ /* * Vra'ca <>0 ako s2 po"cinje niskom s1 */ int strPodudara(znak_t *s1, znak_t *s2) { for(; *s1!='\0' && *s2!='\0' && *s1==*s2; ++s1, ++s2) ; return (*s1 == '\0'); } /* * Vra'ca <>0 ako s2 po"cinje niskom s1, kapitalizacija se ignori"se */ int strPodudarA(znak_t *s1, znak_t *s2) { for(; *s1!='\0' && *s2!='\0' && toupper(*s1)==toupper(*s2); ++s1, ++s2) ; return (*s1 == '\0'); } int strPrazno(znak_t *s) { for(; *s!='\0' && isspace(*s); ++s) ; return (*s == '\0'); } int strCmp(znak_t *s1, znak_t *s2) { for (;toupper(*s1)==toupper(*s2); ++s1,++s2) if (*s1=='\0') return 0; return toupper(*s1)-toupper(*s2); } void staviNisku(znak_t **pp, int *pi, znak_t *n) { for(; *n!='\0'; ++n) staviZnak(*pp,*pi,*n); } /* pp se pomjera ako je prepoznato na"se slovo */ int jelNase(znak_t **pp, znak_t *s[], std_t st) { int i, slovo=-1, duzina=0; if (st==STD_ASCII) return -1; for (i=0; i' && strPrazno(&(p[strlen(stdE[i].etiketa)+3])) ) { if (p==prihvatnik) { ulazniStd = stdE[i].id; pstdTu = nadjiTabelu(ulazniStd); indEtikete = 1; if (ulazniStd==STD_IPP) prevediT(); } else indEtikete = 2; } } } return indEtikete; } /* kraj funkcije dalijeEtiketa */ /*********************************************************************** * Regularni izrazi, kod * Preuzeto iz koda Henry Spencer-a, modifikovano. * Modifed code by Henry Spencer. */ /* * regcomp and regexec -- regsub and regerror are elsewhere * @(#)regexp.c 1.3 of 18 April 87 * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. * * Permission is granted to anyone to use this software for any * purpose on any computer system, and to redistribute it freely, * subject to the following restrictions: * * 1. The author is not responsible for the consequences of use of * this software, no matter how awful, even if they arise * from defects in it. * * 2. The origin of this software must not be misrepresented, either * by explicit claim or by omission. * * 3. Altered versions must be plainly marked as such, and must not * be misrepresented as being the original software. * * Beware that some of this code is subtly aware of the way operator * precedence is structured in regular expressions. Serious changes in * regular-expression syntax might require a total rethink. */ /* #include #include #include "regmagic.h" */ /* * The "internal use only" fields in regexp.h are present to pass info from * compile to execute that permits the execute phase to run lots faster on * simple cases. They are: * * regstart char that must begin a match; '\0' if none obvious * reganch is the match anchored (at beginning-of-line only)? * regmust string (pointer into program) that match must include, or NULL * regmlen length of regmust string * * Regstart and reganch permit very fast decisions on suitable starting points * for a match, cutting down the work a lot. Regmust permits fast rejection * of lines that cannot possibly match. The regmust tests are costly enough * that regcomp() supplies a regmust only if the r.e. contains something * potentially expensive (at present, the only such thing detected is * or + * at the start of the r.e., which can involve a lot of backup). Regmlen is * supplied because the test in regexec() needs it and regcomp() is computing * it anyway. */ /* * Structure for regexp "program". This is essentially a linear encoding * of a nondeterministic finite-state machine (aka syntax charts or * "railroad normal form" in parsing technology). Each node is an opcode * plus a "next" pointer, possibly plus an operand. "Next" pointers of * all nodes except BRANCH implement concatenation; a "next" pointer with * a BRANCH on both ends of it is connecting two alternatives. (Here we * have one of the subtle syntax dependencies: an individual BRANCH (as * opposed to a collection of them) is never concatenated with anything * because of operator precedence.) The operand of some types of node is * a literal string; for others, it is a node leading into a sub-FSM. In * particular, the operand of a BRANCH node is the first node of the branch. * (NB this is *not* a tree structure: the tail of the branch connects * to the thing following the set of BRANCHes.) The opcodes are: */ /* definition number opnd? meaning */ #define END 0 /* no End of program. */ #define BOL 1 /* no Match "" at beginning of line. */ #define EOL 2 /* no Match "" at end of line. */ #define ANY 3 /* no Match any one character. */ #define ANYOF 4 /* str Match any character in this string. */ #define ANYBUT 5 /* str Match any character not in this string. */ #define BRANCH 6 /* node Match this alternative, or the next... */ #define BACK 7 /* no Match "", "next" ptr points backward. */ #define EXACTLY 8 /* str Match this string. */ #define NOTHING 9 /* no Match empty string. */ #define STAR 10 /* node Match this (simple) thing 0 or more times. */ #define PLUS 11 /* node Match this (simple) thing 1 or more times. */ #define OPEN 20 /* no Mark this point in input as start of #n. */ /* OPEN+1 is number 1, etc. */ #define CLOSE 30 /* no Analogous to OPEN. */ /* * Opcode notes: * * BRANCH The set of branches constituting a single choice are hooked * together with their "next" pointers, since precedence prevents * anything being concatenated to any individual branch. The * "next" pointer of the last BRANCH in a choice points to the * thing following the whole choice. This is also where the * final "next" pointer of each individual branch points; each * branch starts with the operand node of a BRANCH node. * * BACK Normal "next" pointers all implicitly point forward; BACK * exists to make loop structures possible. * * STAR,PLUS '?', and complex '*' and '+', are implemented as circular * BRANCH structures using BACK. Simple cases (one character * per match) are implemented with STAR and PLUS for speed * and to minimize recursive plunges. * * OPEN,CLOSE ...are numbered at compile time. */ /* * A node is one char of opcode followed by two chars of "next" pointer. * "Next" pointers are stored as two 8-bit pieces, high order first. The * value is a positive offset from the opcode of the node containing it. * An operand, if any, simply follows the node. (Note that much of the * code generation knows about this implicit relationship.) * * Using two bytes for the "next" pointer is vast overkill for most things, * but allows patterns to get big without disasters. */ #define OP(p) (*(p)) #define NEXT(p) (((*((p)+1)&0377)<<8) + (*((p)+2)&0377)) #define OPERAND(p) ((p) + 3) /* * See regmagic.h for one further detail of program structure. */ /* * Utility definitions. */ #ifndef CHARBITS #define UCHARAT(p) ((int)*(unsigned char *)(p)) #else #define UCHARAT(p) ((int)*(p)&CHARBITS) #endif #define FAIL(m) { regerror(m); return(NULL); } #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') #define META "^$.[()|?+*\\" /* * Flags to be passed up and down. */ #define HASWIDTH 01 /* Known never to match null string. */ #define SIMPLE 02 /* Simple enough to be STAR/PLUS operand. */ #define SPSTART 04 /* Starts with * or +. */ #define WORST 0 /* Worst case. */ /* * Global work variables for regcomp(). */ static char *regparse; /* Input-scan pointer. */ static int regnpar; /* () count. */ static char regdummy; static char *regcode; /* Code-emit pointer; ®dummy = don't. */ static long regsize; /* Code size. */ /* * Forward declarations for regcomp()'s friends. */ #ifndef STATIC #define STATIC static #endif STATIC char *reg(); STATIC char *regbranch(); STATIC char *regpiece(); STATIC char *regatom(); STATIC char *regnode(); STATIC char *regnext(); STATIC void regc(); STATIC void reginsert(); STATIC void regtail(); STATIC void regoptail(); #ifdef STRCSPN STATIC int strcspn(); #endif /* - regcomp - compile a regular expression into internal code * * We can't allocate space until we know how big the compiled form will be, * but we can't compile it (and thus know how big it is) until we've got a * place to put the code. So we cheat: we compile it twice, once with code * generation turned off and size counting turned on, and once "for real". * This also means that we don't allocate space until we are sure that the * thing really will compile successfully, and we never have to move the * code and thus invalidate pointers into it. (Note that it has to be in * one piece because free() must be able to free it all.) * * Beware that the optimization-preparation code in here knows about some * of the structure of the compiled regexp. */ regexp * regcomp(exp) char *exp; { register regexp *r; register char *scan; register char *longest; register int len; int flags; if (exp == NULL) FAIL("NULL argument"); /* First pass: determine size, legality. */ regparse = exp; regnpar = 1; regsize = 0L; regcode = ®dummy; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ FAIL("regexp too big"); /* Allocate space. */ r = (regexp *)malloc(sizeof(regexp) + (unsigned)regsize); if (r == NULL) FAIL("out of space"); /* Second pass: emit code. */ regparse = exp; regnpar = 1; regcode = r->program; regc(MAGIC); if (reg(0, &flags) == NULL) return(NULL); /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ r->reganch = 0; r->regmust = NULL; r->regmlen = 0; scan = r->program+1; /* First BRANCH. */ if (OP(regnext(scan)) == END) { /* Only one top-level choice. */ scan = OPERAND(scan); /* Starting-point info. */ if (OP(scan) == EXACTLY) r->regstart = *OPERAND(scan); else if (OP(scan) == BOL) r->reganch++; /* * If there's something expensive in the r.e., find the * longest literal string that must appear and make it the * regmust. Resolve ties in favor of later strings, since * the regstart check works with the beginning of the r.e. * and avoiding duplication strengthens checking. Not a * strong reason, but sufficient in the absence of others. */ if (flags&SPSTART) { longest = NULL; len = 0; for (; scan != NULL; scan = regnext(scan)) if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) { longest = OPERAND(scan); len = strlen(OPERAND(scan)); } r->regmust = longest; r->regmlen = len; } } return(r); } /* - reg - regular expression, i.e. main body or parenthesized thing * * Caller must absorb opening parenthesis. * * Combining parenthesis handling with the base level of regular expression * is a trifle forced, but the need to tie the tails of the branches to what * follows makes it hard to avoid. */ static char * reg(paren, flagp) int paren; /* Parenthesized? */ int *flagp; { register char *ret; register char *br; register char *ender; register int parno; int flags; *flagp = HASWIDTH; /* Tentatively. */ /* Make an OPEN node, if parenthesized. */ if (paren) { if (regnpar >= NSUBEXP) FAIL("too many ()"); parno = regnpar; regnpar++; ret = regnode(OPEN+parno); } else ret = NULL; /* Pick up the branches, linking them together. */ br = regbranch(&flags); if (br == NULL) return(NULL); if (ret != NULL) regtail(ret, br); /* OPEN -> first. */ else ret = br; if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; while (*regparse == '|') { regparse++; br = regbranch(&flags); if (br == NULL) return(NULL); regtail(ret, br); /* BRANCH -> BRANCH. */ if (!(flags&HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags&SPSTART; } /* Make a closing node, and hook it on the end. */ ender = regnode((paren) ? CLOSE+parno : END); regtail(ret, ender); /* Hook the tails of the branches to the closing node. */ for (br = ret; br != NULL; br = regnext(br)) regoptail(br, ender); /* Check for proper termination. */ if (paren && *regparse++ != ')') { FAIL("unmatched ()"); } else if (!paren && *regparse != '\0') { if (*regparse == ')') { FAIL("unmatched ()"); } else FAIL("junk on end"); /* "Can't happen". */ /* NOTREACHED */ } return(ret); } /* - regbranch - one alternative of an | operator * * Implements the concatenation operator. */ static char * regbranch(flagp) int *flagp; { register char *ret; register char *chain; register char *latest; int flags; *flagp = WORST; /* Tentatively. */ ret = regnode(BRANCH); chain = NULL; while (*regparse != '\0' && *regparse != '|' && *regparse != ')') { latest = regpiece(&flags); if (latest == NULL) return(NULL); *flagp |= flags&HASWIDTH; if (chain == NULL) /* First piece. */ *flagp |= flags&SPSTART; else regtail(chain, latest); chain = latest; } if (chain == NULL) /* Loop ran zero times. */ (void) regnode(NOTHING); return(ret); } /* - regpiece - something followed by possible [*+?] * * Note that the branching code sequences used for ? and the general cases * of * and + are somewhat optimized: they use the same NOTHING node as * both the endmarker for their branch list and the body of the last branch. * It might seem that this node could be dispensed with entirely, but the * endmarker role is not redundant. */ static char * regpiece(flagp) int *flagp; { register char *ret; register char op; register char *next; int flags; ret = regatom(&flags); if (ret == NULL) return(NULL); op = *regparse; if (!ISMULT(op)) { *flagp = flags; return(ret); } if (!(flags&HASWIDTH) && op != '?') FAIL("*+ operand could be empty"); *flagp = (op != '+') ? (WORST|SPSTART) : (WORST|HASWIDTH); if (op == '*' && (flags&SIMPLE)) reginsert(STAR, ret); else if (op == '*') { /* Emit x* as (x&|), where & means "self". */ reginsert(BRANCH, ret); /* Either x */ regoptail(ret, regnode(BACK)); /* and loop */ regoptail(ret, ret); /* back */ regtail(ret, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '+' && (flags&SIMPLE)) reginsert(PLUS, ret); else if (op == '+') { /* Emit x+ as x(&|), where & means "self". */ next = regnode(BRANCH); /* Either */ regtail(ret, next); regtail(regnode(BACK), ret); /* loop back */ regtail(next, regnode(BRANCH)); /* or */ regtail(ret, regnode(NOTHING)); /* null. */ } else if (op == '?') { /* Emit x? as (x|) */ reginsert(BRANCH, ret); /* Either x */ regtail(ret, regnode(BRANCH)); /* or */ next = regnode(NOTHING); /* null. */ regtail(ret, next); regoptail(ret, next); } regparse++; if (ISMULT(*regparse)) FAIL("nested *?+"); return(ret); } /* - regatom - the lowest level * * Optimization: gobbles an entire sequence of ordinary characters so that * it can turn them into a single node, which is smaller to store and * faster to run. Backslashed characters are exceptions, each becoming a * separate node; the code is simpler that way and it's not worth fixing. */ static char * regatom(flagp) int *flagp; { register char *ret; int flags; *flagp = WORST; /* Tentatively. */ switch (*regparse++) { case '^': ret = regnode(BOL); break; case '$': ret = regnode(EOL); break; case '.': ret = regnode(ANY); *flagp |= HASWIDTH|SIMPLE; break; case '[': { register int class; register int classend; if (*regparse == '^') { /* Complement of range. */ ret = regnode(ANYBUT); regparse++; } else ret = regnode(ANYOF); if (*regparse == ']' || *regparse == '-') regc(*regparse++); while (*regparse != '\0' && *regparse != ']') { if (*regparse == '-') { regparse++; if (*regparse == ']' || *regparse == '\0') regc('-'); else { class = UCHARAT(regparse-2)+1; classend = UCHARAT(regparse); if (class > classend+1) FAIL("invalid [] range"); for (; class <= classend; class++) regc(class); regparse++; } } else regc(*regparse++); } regc('\0'); if (*regparse != ']') FAIL("unmatched []"); regparse++; *flagp |= HASWIDTH|SIMPLE; } break; case '(': ret = reg(1, &flags); if (ret == NULL) return(NULL); *flagp |= flags&(HASWIDTH|SPSTART); break; case '\0': case '|': case ')': FAIL("internal urp"); /* Supposed to be caught earlier. */ break; case '?': case '+': case '*': FAIL("?+* follows nothing"); break; case '\\': if (*regparse == '\0') FAIL("trailing \\"); ret = regnode(EXACTLY); regc(*regparse++); regc('\0'); *flagp |= HASWIDTH|SIMPLE; break; default: { register int len; register char ender; regparse--; len = strcspn(regparse, META); if (len <= 0) FAIL("internal disaster"); ender = *(regparse+len); if (len > 1 && ISMULT(ender)) len--; /* Back off clear of ?+* operand. */ *flagp |= HASWIDTH; if (len == 1) *flagp |= SIMPLE; ret = regnode(EXACTLY); while (len > 0) { regc(*regparse++); len--; } regc('\0'); } break; } return(ret); } /* - regnode - emit a node */ static char * /* Location. */ regnode(op) char op; { register char *ret; register char *ptr; ret = regcode; if (ret == ®dummy) { regsize += 3; return(ret); } ptr = ret; *ptr++ = op; *ptr++ = '\0'; /* Null "next" pointer. */ *ptr++ = '\0'; regcode = ptr; return(ret); } /* - regc - emit (if appropriate) a byte of code */ static void regc(b) char b; { if (regcode != ®dummy) *regcode++ = b; else regsize++; } /* - reginsert - insert an operator in front of already-emitted operand * * Means relocating the operand. */ static void reginsert(op, opnd) char op; char *opnd; { register char *src; register char *dst; register char *place; if (regcode == ®dummy) { regsize += 3; return; } src = regcode; regcode += 3; dst = regcode; while (src > opnd) *--dst = *--src; place = opnd; /* Op node, where operand used to be. */ *place++ = op; *place++ = '\0'; *place++ = '\0'; } /* - regtail - set the next-pointer at the end of a node chain */ static void regtail(p, val) char *p; char *val; { register char *scan; register char *temp; register int offset; if (p == ®dummy) return; /* Find last node. */ scan = p; for (;;) { temp = regnext(scan); if (temp == NULL) break; scan = temp; } if (OP(scan) == BACK) offset = scan - val; else offset = val - scan; *(scan+1) = (offset>>8)&0377; *(scan+2) = offset&0377; } /* - regoptail - regtail on operand of first argument; nop if operandless */ static void regoptail(p, val) char *p; char *val; { /* "Operandless" and "op != BRANCH" are synonymous in practice. */ if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val); } /* * regexec and friends */ /* * Global work variables for regexec(). */ static char *reginput; /* String-input pointer. */ static char *regbol; /* Beginning of input, for ^ check. */ static char **regstartp; /* Pointer to startp array. */ static char **regendp; /* Ditto for endp. */ /* * Forwards. */ STATIC int regtry(); STATIC int regmatch(); STATIC int regrepeat(); #ifdef DEBUG int regnarrate = 0; void regdump(); STATIC char *regprop(); #endif /* - regexec - match a regexp against a string */ int regexec(prog, string) register regexp *prog; register char *string; { register char *s; /* extern char *strchr(); */ /* Be paranoid... */ if (prog == NULL || string == NULL) { regerror("NULL parameter"); return(0); } /* Check validity of program. */ if (UCHARAT(prog->program) != MAGIC) { regerror("corrupted program"); return(0); } /* If there is a "must appear" string, look for it. */ if (prog->regmust != NULL) { s = string; while ((s = strchr(s, prog->regmust[0])) != NULL) { if (strncmp(s, prog->regmust, prog->regmlen) == 0) break; /* Found it. */ s++; } if (s == NULL) /* Not present. */ return(0); } /* Mark beginning of line for ^ . */ regbol = string; /* Simplest case: anchored match need be tried only once. */ if (prog->reganch) return(regtry(prog, string)); /* Messy cases: unanchored match. */ s = string; if (prog->regstart != '\0') /* We know what char it must start with. */ while ((s = strchr(s, prog->regstart)) != NULL) { if (regtry(prog, s)) return(1); s++; } else /* We don't -- general case. */ do { if (regtry(prog, s)) return(1); } while (*s++ != '\0'); /* Failure. */ return(0); } /* - regtry - try match at specific point */ static int /* 0 failure, 1 success */ regtry(prog, string) regexp *prog; char *string; { register int i; register char **sp; register char **ep; reginput = string; regstartp = prog->startp; regendp = prog->endp; sp = prog->startp; ep = prog->endp; for (i = NSUBEXP; i > 0; i--) { *sp++ = NULL; *ep++ = NULL; } if (regmatch(prog->program + 1)) { prog->startp[0] = string; prog->endp[0] = reginput; return(1); } else return(0); } /* - regmatch - main matching routine * * Conceptually the strategy is simple: check to see whether the current * node matches, call self recursively to see whether the rest matches, * and then act accordingly. In practice we make some effort to avoid * recursion, in particular by going through "ordinary" nodes (that don't * need to know whether the rest of the match failed) by a loop instead of * by recursion. */ static int /* 0 failure, 1 success */ regmatch(prog) char *prog; { register char *scan; /* Current node. */ char *next; /* Next node. */ /* extern char *strchr(); */ scan = prog; #ifdef DEBUG if (scan != NULL && regnarrate) fprintf(stderr, "%s(\n", regprop(scan)); #endif while (scan != NULL) { #ifdef DEBUG if (regnarrate) fprintf(stderr, "%s...\n", regprop(scan)); #endif next = regnext(scan); switch (OP(scan)) { case BOL: if (reginput != regbol) return(0); break; case EOL: if (*reginput != '\0') return(0); break; case ANY: if (*reginput == '\0') return(0); reginput++; break; case EXACTLY: { register int len; register char *opnd; opnd = OPERAND(scan); /* Inline the first character, for speed. */ if (*opnd != *reginput) return(0); len = strlen(opnd); if (len > 1 && strncmp(opnd, reginput, len) != 0) return(0); reginput += len; } break; case ANYOF: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) == NULL) return(0); reginput++; break; case ANYBUT: if (*reginput == '\0' || strchr(OPERAND(scan), *reginput) != NULL) return(0); reginput++; break; case NOTHING: break; case BACK: break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: { register int no; register char *save; no = OP(scan) - OPEN; save = reginput; if (regmatch(next)) { /* * Don't set startp if some later * invocation of the same parentheses * already has. */ if (regstartp[no] == NULL) regstartp[no] = save; return(1); } else return(0); } break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: { register int no; register char *save; no = OP(scan) - CLOSE; save = reginput; if (regmatch(next)) { /* * Don't set endp if some later * invocation of the same parentheses * already has. */ if (regendp[no] == NULL) regendp[no] = save; return(1); } else return(0); } break; case BRANCH: { register char *save; if (OP(next) != BRANCH) /* No choice. */ next = OPERAND(scan); /* Avoid recursion. */ else { do { save = reginput; if (regmatch(OPERAND(scan))) return(1); reginput = save; scan = regnext(scan); } while (scan != NULL && OP(scan) == BRANCH); return(0); /* NOTREACHED */ } } break; case STAR: case PLUS: { register char nextch; register int no; register char *save; register int min; /* * Lookahead to avoid useless match attempts * when we know what character comes next. */ nextch = '\0'; if (OP(next) == EXACTLY) nextch = *OPERAND(next); min = (OP(scan) == STAR) ? 0 : 1; save = reginput; no = regrepeat(OPERAND(scan)); while (no >= min) { /* If it could work, try it. */ if (nextch == '\0' || *reginput == nextch) if (regmatch(next)) return(1); /* Couldn't or didn't -- back up. */ no--; reginput = save + no; } return(0); } break; case END: return(1); /* Success! */ break; default: regerror("memory corruption"); return(0); break; } scan = next; } /* * We get here only if there's trouble -- normally "case END" is * the terminating point. */ regerror("corrupted pointers"); return(0); } /* - regrepeat - repeatedly match something simple, report how many */ static int regrepeat(p) char *p; { register int count = 0; register char *scan; register char *opnd; scan = reginput; opnd = OPERAND(p); switch (OP(p)) { case ANY: count = strlen(scan); scan += count; break; case EXACTLY: while (*opnd == *scan) { count++; scan++; } break; case ANYOF: while (*scan != '\0' && strchr(opnd, *scan) != NULL) { count++; scan++; } break; case ANYBUT: while (*scan != '\0' && strchr(opnd, *scan) == NULL) { count++; scan++; } break; default: /* Oh dear. Called inappropriately. */ regerror("internal foulup"); count = 0; /* Best compromise. */ break; } reginput = scan; return(count); } /* - regnext - dig the "next" pointer out of a node */ static char * regnext(p) register char *p; { register int offset; if (p == ®dummy) return(NULL); offset = NEXT(p); if (offset == 0) return(NULL); if (OP(p) == BACK) return(p-offset); else return(p+offset); } #ifdef DEBUG STATIC char *regprop(); /* - regdump - dump a regexp onto stdout in vaguely comprehensible form */ void regdump(r) regexp *r; { register char *s; register char op = EXACTLY; /* Arbitrary non-END op. */ register char *next; extern char *strchr(); s = r->program + 1; while (op != END) { /* While that wasn't END last time... */ op = OP(s); printf("%2d%s", s-r->program, regprop(s)); /* Where, what. */ next = regnext(s); if (next == NULL) /* Next ptr. */ printf("(0)"); else printf("(%d)", (s-r->program)+(next-s)); s += 3; if (op == ANYOF || op == ANYBUT || op == EXACTLY) { /* Literal string, where present. */ while (*s != '\0') { putchar(*s); s++; } s++; } putchar('\n'); } /* Header fields of interest. */ if (r->regstart != '\0') printf("start `%c' ", r->regstart); if (r->reganch) printf("anchored "); if (r->regmust != NULL) printf("must have \"%s\"", r->regmust); printf("\n"); } /* - regprop - printable representation of opcode */ static char * regprop(op) char *op; { register char *p; static char buf[50]; (void) strcpy(buf, ":"); switch (OP(op)) { case BOL: p = "BOL"; break; case EOL: p = "EOL"; break; case ANY: p = "ANY"; break; case ANYOF: p = "ANYOF"; break; case ANYBUT: p = "ANYBUT"; break; case BRANCH: p = "BRANCH"; break; case EXACTLY: p = "EXACTLY"; break; case NOTHING: p = "NOTHING"; break; case BACK: p = "BACK"; break; case END: p = "END"; break; case OPEN+1: case OPEN+2: case OPEN+3: case OPEN+4: case OPEN+5: case OPEN+6: case OPEN+7: case OPEN+8: case OPEN+9: sprintf(buf+strlen(buf), "OPEN%d", OP(op)-OPEN); p = NULL; break; case CLOSE+1: case CLOSE+2: case CLOSE+3: case CLOSE+4: case CLOSE+5: case CLOSE+6: case CLOSE+7: case CLOSE+8: case CLOSE+9: sprintf(buf+strlen(buf), "CLOSE%d", OP(op)-CLOSE); p = NULL; break; case STAR: p = "STAR"; break; case PLUS: p = "PLUS"; break; default: regerror("corrupted opcode"); break; } if (p != NULL) (void) strcat(buf, p); return(buf); } #endif /* * The following is provided for those people who do not have strcspn() in * their C libraries. They should get off their butts and do something * about it; at least one public-domain implementation of those (highly * useful) string routines has been published on Usenet. */ #ifdef STRCSPN /* * strcspn - find length of initial segment of s1 consisting entirely * of characters not from s2 */ static int strcspn(s1, s2) char *s1; char *s2; { register char *scan1; register char *scan2; register int count; count = 0; for (scan1 = s1; *scan1 != '\0'; scan1++) { for (scan2 = s2; *scan2 != '\0';) /* ++ moved down. */ if (*scan1 == *scan2++) return(count); count++; } return(count); } #endif #ifndef CHARBITS #define UCHARAT(p) ((int)*(unsigned char *)(p)) #else #define UCHARAT(p) ((int)*(p)&CHARBITS) #endif /* - regsub - perform substitutions after a regexp match */ void regsub(prog, source, dest) regexp *prog; char *source; char *dest; { register char *src; register char *dst; register char c; register int no; register int len; /* extern char *strncpy(); */ if (prog == NULL || source == NULL || dest == NULL) { regerror("NULL parm to regsub"); return; } if (UCHARAT(prog->program) != MAGIC) { regerror("damaged regexp fed to regsub"); return; } src = source; dst = dest; while ((c = *src++) != '\0') { if (c == '&') no = 0; else if (c == '\\' && '0' <= *src && *src <= '9') no = *src++ - '0'; else no = -1; if (no < 0) { /* Ordinary character. */ if (c == '\\' && (*src == '\\' || *src == '&')) c = *src++; *dst++ = c; } else if (prog->startp[no] != NULL && prog->endp[no] != NULL) { len = prog->endp[no] - prog->startp[no]; (void) strncpy(dst, prog->startp[no], len); dst += len; if (len != 0 && *(dst-1) == '\0') { /* strncpy hit NUL. */ regerror("damaged match string"); return; } } } *dst++ = '\0'; } void regerror(s) char *s; { #ifdef ERRAVAIL error("regexp: %s", s); #else fprintf(stderr, "regexp(3): %s", s); exit(1); #endif /* NOTREACHED */ } /* kraj koda regularnih izraza ***********************************************************************/ /* KRAJ: unistd.c */