#include <stdio.h>
#include <ctype.h>
#include <string.h>
#define VERSION "1.80 release"
#define WORDOPS "`|fgi"
#define MAXSTRSIZE 1024
// number of vars
#define MAXVARS    1000
// number of function calls
#define IPSTACK    1000
// size of stack in bytes
#define STACKSIZE  32768

/* operators
   + - * / %      you know        ab+ ab* ab% etc
   _              unary minus     a_
   :              assignment      1b:       (assign 1 to *b)
   ;              long value at   b;        (long value at *b)
   @              byte value at   b@        (byte value at *b)
   w              far long write  1bw       (write long 1 at far ptr b)
   W              far byte write  1bW       (write byte 1 at far ptr b)
   e              Dereference     be        (long value at far pointer b)
   E              byte defef.     bE        (byte value at far pointer b)
   F              convert to far  bF        (convert near ptr b to far ptr)
   = < > !        == < > !=       ab=(code) (push a==b, jmp to ) if false)
   $              swap            $         (swaps top two stack values)
   d              dup             d         (copies top of stack 2 5 -> 2 5 5)
   ^              over            ^         (copies 2nd item  2 3 5 -> 2 3 5 3)
   l r            rotate l/r      r         (rotates top three/right is towards TOS)
   ~              drop            ~         (pops stack value)
   n a o          logical ! && || b!        (if (b) b=0; else b=1;)
   N A O X        bitwise ~ & | ^ 1 2^      (1 xor 2)
   I D            inc dec TOS     i         (TOS += 1)
   L R            shift l/r       1 2 L     (shift 1 over 2 places)
   ( ) [ ]        if else         1 2=()[]  (if 1==2 [pop] do () else do []) the []'s are required
   other stuff

   |              label ending    |label    (a global value for a label name)
   f              define funct    fFunct    defines a function named Funct
   \              end a function  \         (return from funct)
   { }            loop start/end  { code }  (loop until a break occurs)
   #              break           #         breaks from a loop
   &              continue        &         continue a loop
   `              call function   `fname    call fname() (declare as |fname)
   g              goto            glabel    jump to label
   .              set up parms    .         push ecx, set up base pointer as esp
   v              access vars     3v        3rd var FROM bottom of var stack (pushes ptr to)
   p              access params   4p        access the 4th parm ON the stack (pushes ptr to)
   ?              start/end asm   ?cld?     do inline asm!
   s              push stack ptr  s         push stack pointer (for strings)
   S              set stack ptr   S         mov esp,ecx, pop ecx
   b              push base ptr   b         push ebp
   B              pop base ptr    B         pop ebp
   ,              allocate mem    500,      allocate 500 longs on STACK
   '              access mem      1'        access the second allocated long (pushes ptr to)
   i              include file    itest.th  include files...
   "              strings!!       "1hello"  push hello
                                  "2hello"  push olleh

   memory allocated with , is destroyed (from the end) by further pushes
   so be careful (this includes function calls!) it doesnt need to be freed.
   if you need real memory allocation, use malloc!
   or push your data!

   while . sets up the stack frame, calls to other functions may set up
   their own stack frame, changing yours! so save the base pointer somewhere
   (with p) if you are going to call functions that do
   none of the standard library functions do

   comments can be
   ?; comment ?

   ?; comment2
    ; comment3
   ?


   IMPLEMENTATION

   eax,ebx,edx - varies
   ecx         - top of stack
   esp         - top of stack ptr
   ebp         - bottom of var stack/top of parm stack
   edi         - local memory
   esi         - ip stack

   +     pop eax, add ecx,eax
   -     pop eax, sub eax,ecx, xchg eax,ecx
   *     pop eax, imul ecx, mov ecx,eax
   /     pop eax, sub edx,edx, idiv ecx, mov ecx,eax
   %     pop eax, sub edx,edx, idiv ecx, mov ecx,edx
   _     neg ecx
   :     pop ebx, mov dword ptr [ecx],ebx, pop ecx
   ;     mov ecx, dword ptr [ecx]
   @     mov ebx,ecx, sub ecx,ecx, mov cl, byte ptr [ebx]
   e     mov bx,cx, shr ecx,16, mov es,cx, mov ecx,[es:bx]
   E     mov bx,cx, shr ecx,16, mov es,cx, sub ecx,ecx, mov cl,[es:bx]
   w     mov bx,cx, shr ecx,16, mov es,cx, pop ecx, mov [es:bx],ecx, pop ecx
   W     mov bx,cx, shr ecx,16, mov es,cx, pop ecx, mov [es:bx],cl, pop ecx
   F     mov ax,ds, rol ecx,16, mov cx,ax, rol ecx,16
   =     pop ebx, sub ecx,ebx, je equal, mov ecx,0, jmp skip,
         equal: inc ecx, skip:
   !     pop ebx, sub ecx,ebx, je equal, mov ecx,1, equal:
   <     pop ebx, cmp ebx,ecx, jl less, mov ecx,0, jmp skip,
         less: mov ecx,1, skip:
   >     pop ebx, cmp ebx,ecx, jg more, mov ecx,0, jmp skip,
         more: mov ecx,1, skip:
   n     cmp ecx,0, je equal, mov ecx,0, jmp skip, equal: mov ecx,1, skip:
   a     pop ebx, cmp ebx,0 je fail, cmp ecx,0, je fail, mov ecx,1, jmp skip,
         fail: mov ecx,0 skip:
   o     pop ebx, cmp ebx,0 jne true, cmp ecx,0, jne true, mov ecx,0, jmp skip,
         true: mov ecx,1 skip:
   N     not ecx
   A     pop ebx, and ecx,ebx
   O     pop ebx, or ecx,ebx
   X     pop ebx, xor ecx,ebx
   I     inc ecx
   D     dec ecx
   L     pop eax, shl eax,cl, mov ecx,eax
   R     pop eax, shr eax,cl, mov ecx,eax
   $     pop eax, xchg eax,ecx, push eax
   d     push ecx
   ^     mov eax,[esp], push ecx, mov ecx,eax
   l     pop ebx, pop eax, push ebx, push ecx, mov ecx,eax
   r     pop ebx, pop eax, push ecx, push eax, mov ecx,ebx
   ~     pop ecx
   |     label:
   f     name: pop ax, mov [si],ax, add si,2
   \     sub si,2, mov ax,[si], push ax, ret
   {     loops:
   }     jmp loops, loope:
   #     jmp loope
   &     jmp loops
   `     call fname
   g     jmp label
   .     push ecx,mov ebp,esp
   v     shl ecx,2, add ecx,OFFSET _prog_vars
   p     shl ecx,2, add ecx,ebp
   s     push ecx, mov ecx,esp
   S     mov esp,ecx, pop ecx
   b     push ecx, mov ecx,ebp
   B     mov ebp,ecx pop ecx
   ,     shl ecx,2, mov edi,esp, sub edi,ecx
   '     shl ecx,2, add ecx,edi
   (     cmp ecx,0, pop ecx, je iflabel
   )     jmp endelse
   [     iflabel:
   ]     endelse:


   c (const, only in CompileChar) push ecx, mov ecx, const
   ?     flag=~flag; while(flag) send all directly to output
   s     push ecx, mov ecx,esp
   b     push ecx, mov ecx,ebp
*/


FILE *infile;
FILE *outfile;

void Include(char *fn)
{
   unsigned char c;

   FILE *f=fopen(fn,"r");
   fprintf(outfile,"\n; #include %s\n",fn);
   while(!feof(f) && (c=getc(f))!=255) putc(c,outfile);
   putc('\n',outfile);
   fclose(f);
}

void Startup(void)
{
  fprintf(outfile,
          "; Optimizing Third compiler "VERSION" by Adam Milazzo\n"
          "; Compile as an EXE file\n\n"
          "; Startup Code\n"
          "LOCALS @@\n"
          ".model flat\n"
          ".stack %d\n"
          ".486p\n"
          ".code\n\n"
          "start:\n"
          "cli\n"
          "push ds\n"
          "pop ss\n"
          "mov esp, 0FFFCh\n"
          "sti\n"
          "mov esi,OFFSET _ip_stack\n"
          "mov ah, 62h\n"
          "int 21h\n"
          "mov es,bx\n"
          "mov bx,4096\n"
          "mov ah, 4ah\n"
          "int 21h\n"
          "jc _prog_error\n"
          ,STACKSIZE,STACKSIZE);
}

void Expand(char *str)
{
   char str2[MAXSTRSIZE],c;
   int l=strlen(str),i,j=0;

   for(i=0;i<l;i++)
   {
      if ((c=str[i])=='\\')
      {
         c=str[++i];
         switch(c)
         {
            case 't': c='\t'; break;
            case 'n': c='\n'; break;
            case 'r': c='\r'; break;
            case '0': c='\0'; break;
            case '\\': c='\\'; break;
            case 'v': c='\v'; break;
         }
      }
      str2[j++]=c;
   }
   str2[j]=0;
   strcpy(str,str2);
}

int Reverse(char *str)
{
   int len=strlen(str),i,j=0;
   char str2[MAXSTRSIZE];

   for (i=len;i>=0;i--) str2[j++]=str[i];
   str2[j]=0;

   memcpy(str,str2,len+1);
   return len;
}

void Terminate(void)
{
   fprintf(outfile,
         "; Termination Code\n"
         "_end_prog:\n"
         "mov al,cl\n"
         "mov ah,4ch\n"
         "int 21h\n"
         "_prog_error:\n"
         "push cs\n"
         "pop ds\n"
         "mov dx, OFFSET _prog_errmsg\n"
         "mov ah,9h\n"
         "int 21h\n"
         "jmp _end_prog\n"
         "_prog_errmsg:\n"
         "db \"Program error.\",13,10,'$'\n"
         "_prog_vars:\n"
         "dd %d dup(?)\n"
         "_ip_stack:\n"
         "dw %d dup(?)\n"
         "_prog_end:\n"
         "end start\n",
         MAXVARS,IPSTACK);
}

void Functions(void)
{
  fputs("; Functions\n"
        "exit:\n"
        "jmp _end_prog\n\n",outfile);
}

void CompileChar(char c, char *s)
{
   static char asmf=0;
   static int nextlab=0,inest=0,inests[128],lnest=0,lnests[128];

   if (c=='?')
   {
      asmf=!asmf;
      if (!asmf) fputc('\n', outfile);
   }
      
   if (asmf)
   {
      fprintf(outfile,"%c%s",c,s);
      return;
   }

   if (isspace(c)) return;
   if (strchr(WORDOPS,c) || c == 'c') fprintf(outfile,";%c%s\n",c,s);
   else fprintf(outfile,"; %c\n",c);

   switch (c)
   {
      case '+': fputs("pop eax\nadd ecx,eax\n",outfile); break;
      case '-': fputs("pop eax\nsub eax,ecx\nxchg eax,ecx\n",outfile); break;
      case '*': fputs("pop eax\nimul ecx\nmov ecx,eax\n",outfile); break;
      case '/': fputs("pop eax\nsub edx,edx\nidiv ecx\nmov ecx,eax\n",outfile); break;
      case '%': fputs("pop eax\nsub edx,edx\nidiv ecx\nmov ecx,edx\n",outfile); break;
      case '_': fputs("neg ecx\n",outfile); break;
      case ':': fputs("pop ebx\nmov [ecx],ebx\npop ecx\n",outfile); break;
      case ';': fputs("mov ecx,[ecx]\n",outfile); break;
      case '@': fputs("mov ebx,ecx\nsub ecx,ecx\nmov cl,byte ptr [ebx]\n",outfile);break;
      case 'e': fputs("mov bx,cx\nshr ecx,16\nmov es,cx\nmov ecx,[es:bx]\n",outfile); break;
      case 'E': fputs("mov bx,cx\nshr ecx,16\nmov es,cx\nsub ecx,ecx\nmov cl,[es:bx]\n",outfile); break;
      case 'w': fputs("mov bx,cx\nshr ecx,16\nmov es,cx\npop ecx\nmov [es:bx],ecx\npop ecx\n",outfile); break;
      case 'W': fputs("mov bx,cx\nshr ecx,16\nmov es,cx\npop ecx\nmov [es:bx],cl\npop ecx\n",outfile); break;
      case 'F': fputs("mov ax,ds\nrol ecx,16\nmov cx,ax\nrol ecx,16\n",outfile); break;
      case '=': fprintf(outfile,"pop ebx\nsub ecx,ebx\nje equal%d\nmov ecx,0\n"
                       "jmp skip%d\nequal%d:\ninc ecx\nskip%d:\n",nextlab,
                       nextlab,nextlab,nextlab); nextlab++; break;
      case '!': fprintf(outfile,"pop ebx\nsub ecx,ebx\nje equal%d\n"
                       "mov ecx,1\nequal%d:\n",nextlab, nextlab);
                       nextlab++; break;
      case '<': fprintf(outfile,"pop ebx\ncmp ebx,ecx\njl less%d\nmov ecx,0\n"
                       "jmp skip%d\nless%d:\nmov ecx,1\nskip%d:\n",nextlab,
                       nextlab,nextlab,nextlab); nextlab++; break;
      case '>': fprintf(outfile,"pop ebx\ncmp ebx,ecx\njg more%d\nmov ecx,0\n"
                       "jmp skip%d\nmore%d:\nmov ecx,1\nskip%d:\n",nextlab,
                       nextlab,nextlab,nextlab); nextlab++; break;
      case 'n': fprintf(outfile,"cmp ecx,0\nje equal%d\nmov ecx,0\njmp skip%d\n"
                       "equal%d:\nmov ecx,1\nskip%d:\n",nextlab,nextlab,
                       nextlab,nextlab); nextlab++; break;
      case 'a': fprintf(outfile,"pop ebx\ncmp ebx,0\nje fail%d\ncmp ecx,0\nje fail%d\n"
                       "mov ecx,1\njmp skip%d\nfail%d:\nmov ecx,0\nskip%d:\n",
                       nextlab,nextlab,nextlab,nextlab,nextlab);
                       nextlab++; break;
      case 'o': fprintf(outfile,"pop ebx\ncmp ebx,0\njne true%d\ncmp ecx,0\njne true%d"
                       "\nmov ecx,0\njmp skip%d\ntrue%d:\nmov ecx,1\nskip%d:\n"
                       ,nextlab,nextlab,nextlab,nextlab); nextlab++; break;
      case 'N': fprintf(outfile,"not ecx\n"); break;
      case 'A': fprintf(outfile,"pop ebx\nand ecx,ebx\n"); break;
      case 'O': fprintf(outfile,"pop ebx\nor ecx,ebx\n"); break;
      case 'X': fprintf(outfile,"pop ebx\nxor ecx,ebx\n"); break;
      case 'I': fputs("inc ecx\n",outfile); break;
      case 'D': fputs("dec ecx\n",outfile); break;
      case 'L': fputs("pop eax\nshl eax,cl\nmov ecx,eax\n",outfile); break;
      case 'R': fputs("pop eax\nshr eax,cl\nmov ecx,eax\n",outfile); break;
      case '$': fputs("pop eax\nxchg eax,ecx\npush eax\n",outfile); break;
      case 'd': fputs("push ecx\n",outfile); break;
      case '^': fputs("mov eax,[esp]\npush ecx\nmov ecx,eax\n",outfile); break;
      case 'l': fputs("pop ebx\npop eax\npush ebx\npush ecx\nmov ecx,eax\n",outfile); break;
      case 'r': fputs("pop ebx\npop eax\npush ecx\npush eax\nmov ecx,ebx\n",outfile); break;
      case '~': fputs("pop ecx\n",outfile); break;
      case '|': fprintf(outfile,"%s:\n",s); break;
      case 'f': fprintf(outfile,"%s:\npop ax\nmov [si],ax\nadd si,2\n",s); break;
      case '\\': fputs("sub si,2\nmov ax,[si]\npush ax\nret\n",outfile); break;
      case '{': fprintf(outfile,"loops%d:\n",lnests[lnest++]=nextlab++); break;
      case '}': lnest--; fprintf(outfile,"jmp loops%d\nloope%d:\n",lnests[lnest],
                lnests[lnest]); break;
      case '#': fprintf(outfile,"jmp loope%d\n",lnests[lnest-1]); break;
      case '&': fprintf(outfile,"jmp loops%d\n",lnests[lnest-1]); break;
      case '`': fprintf(outfile,"call %s\n",s); break;
      case 'g': fprintf(outfile,"jmp %s\n",s); break;
      case '.': fputs("push ecx\nmov ebp, esp\n",outfile); break;
      case 'v': fputs("shl ecx, 2\nadd ecx, OFFSET _prog_vars\n",outfile); break;
      case 'p': fputs("shl ecx, 2\nadd ecx, ebp\n",outfile); break;
      case '\'': fputs("shl ecx,2\nadd ecx,edi\n",outfile); break;
      case ',': fputs("shl ecx,2\nmov edi,esp\nsub edi,ecx\n",outfile); break;
      case '(': fprintf(outfile,"cmp ecx,0\npop ecx\nje iflab%d\n",inests[inest++]=nextlab++);
                break;
      case ')': fprintf(outfile,"jmp endelse%d\n",inests[inest-1]); break;
      case '[': fprintf(outfile,"iflab%d:\n",inests[inest-1]); break;
      case ']': fprintf(outfile,"endelse%d:\n",inests[--inest]); break;
      case 'c': fprintf(outfile,"push ecx\nmov ecx,%s\n",s); break;
      case 's': fputs("push ecx\nmov ecx,esp\n",outfile); break;
      case 'S': fputs("mov esp,ecx\npop ecx\n", outfile); break;
      case 'b': fputs("push ecx\nmov ecx,ebp\n",outfile); break;
      case 'B': fputs("mov ebp,ecx\npop ecx\n",outfile); break;
      case 'i': Include(s); break;
   }
}

void CompileFile(void)
{
   static char asmf=0,strf=0,tok[MAXSTRSIZE];
   char c,t;
   int len,i;

   while(isspace(c=getc(infile)) && !feof(infile));

   while(!feof(infile))
   {
      if (strchr(WORDOPS,c))
      {
         i=0;
         while(!feof(infile) && !isspace(tok[i]=getc(infile))) i++;
         tok[i]=0;
      }
      else if (isdigit(c))
      {
         i=1;
         tok[0]=c;
         while(!feof(infile) && isdigit(tok[i]=getc(infile))) i++;
         c=tok[i];
         tok[i]=0;
         CompileChar('c',tok);
         continue;
      }
      else if (c=='-')
      {
         t=getc(infile);
         if (isdigit(t))
         {
            tok[0]=c;
            tok[1]=t;
            i=2;
            while(!feof(infile) && isdigit(tok[i]=getc(infile))) i++;
            c=tok[i];
            tok[i]=0;
            CompileChar('c',tok);
         }
         else
         {
            CompileChar('-',0);
            c=t;
         }
         continue;
      }
      else if (c=='?') asmf=!asmf;
      else if (c=='"') strf=getc(infile)-48;
      if (asmf)
      {
         while((c=getc(infile)) != '?' && !feof(infile)) putc(c,outfile);
         if (c=='?')
         {
            asmf=!asmf;
            if (!asmf) fputc('\n',outfile);
         }
      }
      else if (strf)
      {
         i=0;
         tok[0]=0;
         while(!feof(infile))
         {
            c=getc(infile);
            if (c=='"' && tok[i]!='\\') break;
            tok[i++]=c;
         }
         tok[i]=0;
         fprintf(outfile,"; %s\n",tok);

         Expand(tok);
         if (strf==2) len=Reverse(tok);
         else len=strlen(tok);
         strf=0;

         fputs("push ecx\n",outfile);
         for(i=0;i<len;i++) fprintf(outfile,"push dword ptr %d\n",tok[i]);
         fprintf(outfile,"mov ecx,%d\n\n",tok[i]);
      }
      else CompileChar(c,tok);
      while(isspace(c=getc(infile)) && !feof(infile));
   }
}

int main (int argc,char *argv[])
{
   char in[64];
   char out[64];
   char *p=strchr(argv[1],'.');

   fputs("Optimizing Third compiler "VERSION" by Adam Milazzo\n",stderr);

   switch(argc)
   {
      case 1:
         infile=stdin;
         outfile=stdout;
         break;
      case 2:
         strcpy(in,argv[1]);

         if (!p)
         {
            strcpy(out,argv[1]);
            strcat(in,".t");
            strcat(out,".asm");
         }
         else
         {
            strcpy(out,argv[1]);
            p=strchr(out,'.');
            *p='\0';
            strcat(out,".asm");
         }

         infile=fopen(in,"r");
         outfile=fopen(out,"w");
         break;
      case 3:
         infile=fopen(argv[1],"r");
         outfile=fopen(argv[2],"w");
         break;
   }

   if (!infile || !outfile)
   {
      fputs("Error opening file(s).",stderr);
      return 1;
   }

   Startup();
   CompileFile();
   Functions();
   Terminate();
   return 0;
}

