00001 #include<stdio.h>
00002 #include<ctype.h>
00003 #include<windows.h>
00004 #include<string>
00005 #include<stack>
00006 #include"bit_stack.cpp"
00007
00008 class Output_interface;
00009
00011 class Options{
00012 public:
00013 FILE *in_file;
00014 FILE *out_file;
00015 Output_interface *out_interface;
00016 public:
00017 Options();
00018 enum {none=0,ident,full}fmt_mode;
00019 int in_tab_size;
00020 int out_tab_size;
00021 char *FindArg(char**,int,char*);
00022 void SetArg(char**,int,char*,int*,int,int);
00023 void ScanOptions(char**,int);
00024 }options;
00025
00026
00027 Options::Options()
00028 {
00029 out_interface=0;
00030 in_tab_size=4;
00031 out_tab_size=4;
00032 }
00033
00034
00036 class Output_interface{
00037 protected:
00038 bool isKeyWord(char*);
00039 virtual void PrintChar(char);
00040 public:
00041 virtual void Start() {}
00042 virtual void Stop() {}
00043 virtual void Ident(int);
00044 virtual void Write(char*);
00045 virtual void ScanOptions(char**,int) {}
00046 };
00047
00049 class Format_engine
00050 {
00051 enum TokType{t_do=0,t_else,t_for,t_if,t_while,t_case,t_class,t_struct,t_enum};
00052 struct _frln{
00053 int ident;
00054 int lines;
00055 char *preview;
00056 std::string buffer;
00057 }freadln;
00058 struct _fstmt{
00059 bool wait_balance;
00060 }fstmt;
00061 struct _v{
00062 enum {
00063 work,
00064 tadd,
00065 pop,
00066 _if,
00067 _else
00068 }status;
00069 int curr_ident;
00070 int temp_ident;
00071 std::stack<int>ident_stack;
00072 }fh;
00073 struct _h{
00074 enum pre_en{
00075 empty,
00076 none,
00077 def,
00078 inc,
00079 unk
00080 }last_pre;
00081 enum {
00082 work,
00083 noline,
00084 noident,
00085 noident2
00086 }status;
00087 BitStack stack;
00088 }fv;
00089 struct _fout{
00090 bool new_line;
00091 int ident;
00092 }foutput;
00093 int arr_count,arr_size;
00094 struct item_st{int ident; int lines; char *text;}*array;
00095 char *sym_table;
00096 int balance1;
00097 int balance2;
00098 void Clear();
00099 void AddItem(int ident,char *text,int lines);
00100 void FormatReadLn(int ident,char *text);
00101 void FormatStmt(int ident,char*,int lines);
00102 void FormatH();
00103 void FormatV();
00104 void Output();
00105 TokType TokenType(char*);
00106 public:
00107 Format_engine();
00108 ~Format_engine();
00109 void Format(int,char);
00110 void Format(int,char*);
00111 void Stop();
00112 };
00113
00114
00115
00116
00118 class Syntax_engine{
00119 enum STATUS{
00120 ready,
00121 work,
00122 str,str1,
00123 ch,ch1,
00124 num,
00125 float1,
00126 float2,
00127 float3,
00128 float4,
00129 float5,
00130 rem,
00131 rem_single,
00132 rem_mul,rem_mul1,
00133 tok,
00134 prepro1,
00135 prepro2,
00136 space
00137 }status;
00138 int in_pos;
00139 char old;
00140 char sym;
00141 int space_count;
00142 std::string buffer;
00143 bool FirstPair(char sym);
00144 bool DetectPair(char old,char sym);
00145 void CheckSymbol();
00146 protected:
00147 Format_engine form_engine;
00148 void Tabulator(char);
00149 void FormatBuffer();
00150 void FormatChar(char);
00151 public:
00152 void Work();
00153 };
00154
00155
00157 class HTML:public Output_interface
00158 {
00159 void PrintChar(char sym);
00160 void Write(char*);
00161 void Start();
00162 void Stop();
00163 };
00164
00166 class CPP:public Output_interface
00167 {
00168 int no_tab;
00169 int out_pos;
00170 public:
00171 CPP();
00172 void Ident(int);
00173 void PrintChar(char);
00174 void ScanOptions(char**,int);
00175 void Start();
00176 };
00177
00179 class TEX:public Output_interface
00180 {
00181 void PrintChar(char sym);
00182 public:
00183 void Write(char*);
00184 void Start();
00185 void Stop();
00186 };
00187
00188
00189
00192 bool Output_interface::isKeyWord(char *text)
00193 {
00194 int a,b,c,n;
00195 static const char *list[]={
00196 "__int64","_asm","bool","break","case","char","class","const",
00197 "continue","default","do","double",
00198 "else","enum","extern","float","for","if","int","long",
00199 "public","private","protected","register","return","short",
00200 "signed","sizeof","static","struct","switch","template","typedef",
00201 "typename","union","unsigned","void","volatile","while"
00202 };
00203 a=0;
00204 b=sizeof(list)/sizeof(char*)-1;
00205 while(a<=b) {
00206 #ifdef _DEBUG
00207 if(strcmp(list[a],list[b])>0) _asm { int 3 }
00208 #endif
00209 c=(a+b)/2;
00210 n=strcmp(list[c],text);
00211 if(!n) return true;
00212 if(n>0) a=c+1; else b=c-1;
00213 }
00214 return false;
00215 }
00216
00218 void Output_interface::Ident(int count)
00219 {
00220 while(count>0) fputc(' ',options.out_file),count--;
00221 }
00222
00224 void Output_interface::Write(char* text)
00225 {
00226 while(*text) PrintChar(*text++);
00227 }
00228
00230 void Output_interface::PrintChar(char sym)
00231 {
00232 fputc(sym,options.out_file);
00233 }
00234
00235
00236
00237
00238 Format_engine::Format_engine()
00239 {
00240 foutput.ident=0;
00241 foutput.new_line=true;
00242 fh.status=fh.work;
00243 fh.curr_ident=fh.temp_ident=0;
00244 fv.status=fv.work;
00245 fv.last_pre=fv.empty;
00246 freadln.preview="";
00247 fstmt.wait_balance=false;
00248 array=0; arr_size=arr_count=0;
00249 balance1=balance2=0;
00250 sym_table=(char*)malloc(512);
00251 for(int n=0; n<256; n++) sym_table[n*2]=n,sym_table[n*2+1]=0;
00252 }
00253
00254
00255 Format_engine::~Format_engine()
00256 {
00257 free(array);
00258 free(sym_table);
00259 }
00260
00264 void Format_engine::AddItem(int ident,char *text,int lines)
00265 {
00266 if(arr_count>=arr_size) {
00267 arr_size+=arr_size/2+1;
00268 array=(item_st*)realloc(array,arr_size*sizeof(item_st));
00269 }
00270 array[arr_count].ident=ident;
00271 array[arr_count].lines=lines;
00272 if(text[1]) array[arr_count].text=strdup(text);
00273 else array[arr_count].text=&sym_table[2*(byte)*text];
00274 arr_count++;
00275 }
00276
00277
00278 void Format_engine::Clear()
00279 {
00280 while(arr_count) {
00281 arr_count--;
00282 if(array[arr_count].text[1]) free(array[arr_count].text);
00283 }
00284 }
00285
00286
00289 void Format_engine::Format(int ident,char sym)
00290 {
00291 char buff[2];
00292 buff[0]=sym; buff[1]=0;
00293 Format(ident,buff);
00294 }
00295
00296
00299 void Format_engine::Format(int ident,char *text)
00300 {
00301 FormatReadLn(ident,text);
00302 }
00303
00304
00307 void Format_engine::FormatReadLn(int ident,char *text)
00308 {
00309 if(text[0]=='\n') freadln.lines++;
00310 else {
00311 freadln.preview=text;
00312 if(!freadln.buffer.empty()) {
00313 FormatStmt(freadln.ident,(char*)freadln.buffer.c_str(),freadln.lines);
00314 }
00315 freadln.lines=0;
00316 freadln.ident=ident;
00317 freadln.buffer=text;
00318 }
00319 }
00320
00323 Format_engine::TokType Format_engine::TokenType(char *txt)
00324 {
00325 int n;
00326 char *list[]={"do","else","for","if","while","case"
00327 ,"class","struct","enum",0};
00328 for(n=0; list[n]; n++) if(!strcmp(txt,list[n])) break;
00329 return (TokType)n;
00330 }
00331
00333 void Format_engine::Stop()
00334 {
00335 if(!freadln.buffer.empty()) {
00336 FormatStmt(freadln.ident,(char*)freadln.buffer.c_str(),freadln.lines);
00337 }
00338 if(arr_count) Output();
00339 }
00340
00341
00345 void Format_engine::FormatStmt(int ident,char *text,int nl)
00346 {
00347 switch(text[0]) {
00348 case '\n': return;
00349 case '(': balance1++; break;
00350 case ')': balance1--; break;
00351 case '{': Output(); balance2++; break;
00352 case '}': Output(); balance2--; break;
00353 }
00354 if(text[0]!='\n') AddItem(ident,text,nl);
00355 switch(text[0]) {
00356 case '/': if(text[1]=='/' || text[1]=='*') Output(); break;
00357 case ';':
00358 case ',': if(!balance1) Output(); break;
00359 case ':': if(balance2 && !text[1]) Output(); break;
00360 case ')': if(!balance1 && fstmt.wait_balance) Output(),fstmt.wait_balance=false; break;
00361 case '#':
00362 case '{':
00363 case '}': Output(); break;
00364 }
00365 switch(TokenType(text)) {
00366 case t_do: case t_else: Output(); break;
00367 case t_while: case t_for: case t_if: fstmt.wait_balance=true; break;
00368 }
00369 }
00370
00371
00372 void Format_engine::FormatH()
00373 {
00374 TokType type=TokenType(array[0].text);
00375 switch(fh.status) {
00376 case fh._if:
00377 case fh.tadd:
00378 if(array[0].text[0]!='{') fh.temp_ident+=options.out_tab_size;
00379 break;
00380 case fh.pop:
00381 if(fh.ident_stack.empty()) break;
00382 fh.curr_ident=fh.ident_stack.top(),fh.ident_stack.pop();
00383 fh.temp_ident=fh.ident_stack.top(),fh.ident_stack.pop();
00384 if(fh.temp_ident) fh.status=fh._if;
00385 else break;
00386 case fh._else:
00387 if(type==t_else && fh.temp_ident) {fh.temp_ident-=options.out_tab_size;break;}
00388 default:
00389 fh.temp_ident=0;
00390 break;
00391 }
00392 foutput.ident=fh.curr_ident+fh.temp_ident;
00393 if(fh.status==fh._if) fh.status=fh._else;
00394 else fh.status=fh.work;
00395 if(arr_count>1 && foutput.ident && !strcmp(array[1].text,":") || type==t_case) foutput.ident-=options.out_tab_size;
00396 if(array[0].text[0]=='{') {
00397 fh.ident_stack.push(fh.status==fh._else?fh.temp_ident+options.out_tab_size:0);
00398 fh.ident_stack.push(fh.curr_ident);
00399 fh.curr_ident=foutput.ident+options.out_tab_size;
00400 fh.status=fh.work;
00401 }
00402 if(array[0].text[0]=='}') {
00403 foutput.ident-=options.out_tab_size;
00404 fh.status=fh.pop;
00405 }
00406 switch(type) {
00407 case t_if: fh.status=fh._if; break;
00408 case t_else:
00409 case t_while: case t_for: fh.status=fh.tadd; break;
00410 }
00411 }
00412
00413
00414 void Format_engine::FormatV()
00415 {
00416 int n; bool newline=true; bool dup=false;
00417 for(n=0; n<arr_count; n++) array[n].lines=0;
00418
00419 _h::pre_en old=fv.last_pre;
00420 if(array[0].text[0]!='#') fv.last_pre=fv.none;
00421 else if(!strncmp(array[0].text,"#define" ,7)) fv.last_pre=fv.def;
00422 else if(!strncmp(array[0].text,"#include",8)) fv.last_pre=fv.inc;
00423 else fv.last_pre=fv.unk;
00424 if(old!=fv.last_pre && old!=fv.empty) options.out_interface->Write("\n");
00425
00426 if(array[0].text[0]=='{') fv.stack.Push(fv.status!=fv.noline);
00427 if(array[0].text[0]=='}') {
00428 newline=fv.stack.Pop();
00429 dup=!balance2;
00430 if(freadln.preview[0]==';') newline=false;
00431 }
00432 switch(fv.status) {
00433 case fv.noident2: dup=true;
00434 case fv.noident: array[0].ident=0;
00435 }
00436 fv.status=fv.work;
00437
00438 switch(TokenType(array[0].text)) {
00439 case t_for: case t_if: case t_else: case t_do: case t_while:
00440 if(freadln.preview[0]==';') newline=false;
00441 break;
00442 case t_class: case t_struct: case t_enum:
00443 fv.status=fv.noline;
00444 break;
00445 }
00446 n--;
00447
00448 if(newline) {
00449 array[n].lines=1;
00450 if(dup) array[n].lines++;
00451 }
00452 else {
00453 fv.status=dup?fv.noident2:fv.noident;
00454 }
00455 }
00456
00457
00458 void Format_engine::Output()
00459 {
00460 int n,m;
00461 if(!arr_count) return;
00462 if(options.fmt_mode!=options.none) FormatH();
00463 if(options.fmt_mode==options.full) FormatV();
00464 for(n=0; n<arr_count; n++) {
00465 options.out_interface->Ident((foutput.new_line && options.fmt_mode!=options.none)?foutput.ident:array[n].ident);
00466 options.out_interface->Write(array[n].text);
00467 for(m=0; m<array[n].lines; m++) options.out_interface->Write("\n");
00468 foutput.new_line=!!m;
00469 }
00470 Clear();
00471 }
00472
00473
00474
00475
00476
00477 void Syntax_engine::Work()
00478 {
00479 char sym;
00480 in_pos=0;
00481 old=0;
00482 space_count=0;
00483 status=ready;
00484 options.out_interface->Start();
00485 for(;;) {
00486 sym=fgetc(options.in_file); if(feof(options.in_file)) break;
00487 Tabulator(sym);
00488 }
00489 if(FirstPair(old)) FormatChar(old);
00490 FormatBuffer();
00491 form_engine.Stop();
00492 }
00493
00494
00495
00497 void Syntax_engine::Tabulator(char s)
00498 {
00499 sym=s;
00500 switch(s) {
00501 case '\t': sym=' '; do{CheckSymbol(),in_pos++;}while(in_pos%options.in_tab_size); break;
00502 case '\r': return;
00503 case '\n': CheckSymbol(); in_pos=0; break;
00504 default: CheckSymbol(); in_pos++; break;
00505 }
00506 old=sym;
00507 }
00508
00511 bool Syntax_engine::FirstPair(char sym)
00512 {
00513 return sym && strchr("<>=!+-:&|",sym)!=0;
00514 }
00515
00519 bool Syntax_engine::DetectPair(char old,char sym)
00520 {
00521 static const char ops[]=">><<<=>=->::++--==&&||!=";
00522 for(int n=0; ops[n]; n+=2) if(ops[n]==old && ops[n+1]==sym) return true;
00523 return false;
00524 }
00525
00526
00527 void Syntax_engine::CheckSymbol()
00528 {
00529 l1:
00530 if(status==rem) {
00531 if(sym=='/') {status=rem_single; buffer+="//";return;}
00532 else if(sym=='*') {status=rem_mul; buffer+="/*"; return;}
00533 else FormatChar('/'),status=work;
00534 }
00535 switch(status) {
00536 case ready:
00537 if(sym=='#') {status=prepro1; buffer+=sym;break;}
00538 case work:
00539 if(DetectPair(old,sym)) {buffer+=old,buffer+=sym,sym=0,FormatBuffer();return;}
00540 if(FirstPair(old)) FormatChar(old);
00541 if(FirstPair(sym)) return;
00542 if(sym=='\n') break;
00543 switch(sym) {
00544 case ' ': status=space; space_count=0; break;
00545 case '"': status=str; break;
00546 case '\'': status=ch; break;
00547 case '.': status=float1; break;
00548 case '/': status=rem; break;
00549 default:
00550 if(isdigit(sym)) status=num;
00551 else if(isalpha(sym) || sym=='_') status=tok;
00552 else status=work,FormatChar(sym);
00553 }
00554 break;
00555 case prepro1:
00556 if(sym!=' ') status=prepro2;
00557 break;
00558 }
00559 if(sym=='\n' && status!=rem_mul && status!=rem_mul1) {
00560 FormatBuffer();
00561 FormatChar('\n');
00562 status=ready;
00563 } else {
00564 bool fl=false;
00565 switch(status) {
00566 case space: if(sym!=' ') fl=true; break;
00567 case tok: if(!isalnum(sym) && sym!='_') fl=true; break;
00568 case float4:
00569 status=float5;
00570 if(sym=='+' || sym=='-') break;
00571 case float3:
00572 case num:
00573 if(sym=='e' || sym=='E') {status=float4;break;}
00574 if(sym=='.') {status=float2; break;}
00575 case float2:
00576 case float5: if(status==float2) status=float3;
00577 if(!isdigit(sym)) fl=true;
00578 break;
00579 case float1: status=float2; break;
00580 }
00581 if(fl) {FormatBuffer(),status=work;goto l1;}
00582 switch(status) {
00583 case space: space_count++; break;
00584 case tok:
00585 case num:
00586 case float2: case float3: case float4: case float5:
00587 case rem_single: case rem_mul: case rem_mul1:
00588 case str: case str1:
00589 case ch: case ch1:
00590 case prepro2:
00591 buffer+=sym;
00592 break;
00593 }
00594 switch(status) {
00595 case rem_mul: if(sym=='*') status=rem_mul1; break;
00596 case rem_mul1: if(sym=='/') FormatBuffer(),status=work; else status=rem_mul; break;
00597 case str1: status=str; break;
00598 case str:
00599 if(sym=='\\') status=str1;
00600 if(sym=='"' && buffer.size()>1) FormatBuffer(),status=work;
00601 break;
00602 case ch1: status=ch; break;
00603 case ch:
00604 if(sym=='\\') status=ch1;
00605 if(sym=='\'' && buffer.size()>1) FormatBuffer(),status=work;
00606 break;
00607 }
00608 }
00609 }
00610
00612 void Syntax_engine::FormatBuffer()
00613 {
00614 if(buffer.empty()) return;
00615 form_engine.Format(space_count,(char*)buffer.c_str());
00616 buffer.erase();
00617 space_count=0;
00618 }
00619
00620
00621 void Syntax_engine::FormatChar(char sym)
00622 {
00623 form_engine.Format(space_count,sym);
00624 space_count=0;
00625 }
00626
00627
00628
00629
00630
00631
00632 void HTML::PrintChar(char sym)
00633 {
00634 switch(sym) {
00635 case '<': fprintf(options.out_file,"<"); break;
00636 case '>': fprintf(options.out_file,">"); break;
00637 case '&': fprintf(options.out_file,"&"); break;
00638 default: fputc(sym,options.out_file); break;
00639 }
00640 }
00641
00642
00643 void HTML::Write(char *text)
00644 {
00645 char *cls=0;
00646 char sym=*text;
00647 if(sym=='#') cls="prepro"; else
00648 if(sym=='"' || sym=='\'') cls="str"; else
00649 if(sym=='/' && (text[1]=='/' || text[1]=='*')) cls="rem"; else
00650 if(isKeyWord(text)) cls="key";
00651 if(cls) printf("<span class='%s'>",cls);
00652 while(*text) PrintChar(*text++);
00653 if(cls) printf("</span>");
00654 }
00655
00656 void HTML::Start()
00657 {
00658 fprintf(options.out_file,
00659 "<html><style type='text/css'><!--\n"
00660 ".key{color:blue}\n"
00661 ".str{color:orange}\n"
00662 ".rem{color:green}\n"
00663 ".prepro{color:red}\n"
00664 "--></style><body><pre>\n"
00665 );
00666 }
00667
00668
00669 void HTML::Stop()
00670 {
00671 fprintf(options.out_file,"\n</pre></body></html>\n");
00672 }
00673
00674
00675
00676 CPP::CPP()
00677 {
00678 no_tab=false;
00679 }
00680
00681
00682 void CPP::ScanOptions(char** list,int count)
00683 {
00684 if(options.FindArg(list,count,"-no-tab")) no_tab=true;
00685 }
00686
00687
00688 void CPP::Start()
00689 {
00690 out_pos=0;
00691 }
00692
00693 void CPP::PrintChar(char sym)
00694 {
00695 Output_interface::PrintChar(sym);
00696 out_pos++;
00697 if(sym=='\n') out_pos=0;
00698 }
00699
00700 void CPP::Ident(int count)
00701 {
00702 int n;
00703 if(!no_tab && count>1) while((out_pos+count)/options.out_tab_size>out_pos/options.out_tab_size) {
00704 putchar('\t');
00705 n=options.out_tab_size-out_pos%options.out_tab_size;
00706 count-=n;
00707 out_pos+=n;
00708 }
00709 Output_interface::Ident(count);
00710 out_pos+=count;
00711 }
00712
00713
00714
00715 void TEX::Start()
00716 {
00717 fprintf(options.out_file,
00718 "\\documentclass{article}\\usepackage{alltt}\\oddsidemargin=-16pt"
00719 "\\begin{document}\n"
00720 "\\catcode`<=11\\catcode`>=11\n"
00721 "\\tt\\begin{alltt}\n"
00722 );
00723 }
00724
00725 void TEX::Stop()
00726 {
00727 fprintf(options.out_file,
00728 "\\end{alltt}\n"
00729 "\\end{document}\n"
00730 );
00731 }
00732
00733
00734 void TEX::Write(char *text)
00735 {
00736 bool key=isKeyWord(text);
00737 if(key) fprintf(options.out_file,"\\underline{");
00738 while(*text) PrintChar(*text++);
00739 if(key) fputc('}',options.out_file);
00740 }
00741
00742
00743 void TEX::PrintChar(char sym)
00744 {
00745 switch(sym) {
00746 case '_': fprintf(options.out_file,"\\underline{ }"); break;
00747 case '{': fprintf(options.out_file,"\\{"); break;
00748 case '}': fprintf(options.out_file,"\\}"); break;
00749 case '\\': fprintf(options.out_file,"\\(\\backslash\\)"); break;
00750 default: fputc(sym,options.out_file); break;
00751 }
00752 }
00753
00754
00755
00760 char *Options::FindArg(char **list,int count,char *key)
00761 {
00762 int n,len; char *ref;
00763 len=strlen(key);
00764 for(n=1; n<count; n++) if(!strncmp(list[n],key,len)) {
00765 ref=&list[n][len];
00766 if(*ref==' ' || !*ref) return "";
00767 if(*ref=='=') return ref+1;
00768 }
00769 return 0;
00770 }
00771
00780 void Options::SetArg(char **list,int count,char *key,int *par,int min,int max)
00781 {
00782 char *ref=FindArg(list,count,key);
00783 if(!ref || !*ref) return;
00784 *par=atoi(ref);
00785 }
00786
00787
00788 void Help()
00789 {
00790 printf(
00791 "-help\n"
00792 "-out=[cpp,html,tex] *\n"
00793 "-in-tab-size=<n>\n"
00794 "-out-tab-size=<n>\n"
00795 "-cpp-notab\n"
00796 );
00797 }
00798
00799
00800 void Options::ScanOptions(char **list,int count)
00801 {
00802 char *ref;
00803 ref=options.FindArg(list,count,"-out"); if(ref) {
00804 if(!strncmp(ref, "cpp",1)) options.out_interface=new CPP;
00805 if(!strncmp(ref, "tex",2)) options.out_interface=new TEX;
00806 if(!strncmp(ref,"html",3)) options.out_interface=new HTML;
00807 }
00808 ref=options.FindArg(list,count,"-fmt"); if(ref) {
00809 if(!strncmp(ref, "none",4)) options.fmt_mode=none;
00810 if(!strncmp(ref,"ident",5)) options.fmt_mode=ident;
00811 if(!strncmp(ref, "full",4)) options.fmt_mode=full;
00812 }
00813 options.SetArg(list,count,"-in-tab-size",&options.in_tab_size,1,32);
00814 options.SetArg(list,count,"-oun-tab-size",&options.out_tab_size,1,32);
00815 }
00816
00817
00818 void main(int argc,char **argl)
00819 {
00820 int mode=0;
00821 if(options.FindArg(argl,argc,"-help")) {Help();return;}
00822 options.ScanOptions(argl,argc);
00823 if(!options.out_interface) {Help();return;}
00824 options.in_file=stdin;
00825 options.out_file=stdout;
00826 options.out_interface->ScanOptions(argl,argc);
00827 Syntax_engine engine;
00828 engine.Work();
00829 }
00830
00831