/*
**-------------------------------------------------------------------
**
** Mini Linguagem:
** Sintax tipo C/Script, compilada( x86 ) 32 bits.
**
** ARQUIVO:
** mini_0003.c
**
** PRINCIPAL FUNCAO:
** int Parse (ASM *a, char *text);
**
** PALAVRAS RESERVADAS:
** int, char *, struct *,
** if, for, break.
**
** COMPILE:
** gcc mini.c -o mini -Wall
**
** BUILD/VERSION:
** 0003 - 26/08/2015 - 19:15
** ----------------------------------------------
**
** Credito do Analizador de Expressao:
** -----------------------------------
** Essas funcoes abaixo eh o analizador de expressao ...
** Este eh baseada no exemplo do livro ( C Completo e Total, de HERBERT SCHILDT ).
** ------------------------------
** void expr0 (ASM *a);
** void expr1 (ASM *a);
** void expr2 (ASM *a);
** void expr3 (ASM *a); //: atom
** ------------------------------
**
** 01: Inicio de implementacao: void expresion (ASM *a);
** 02: Implementado o analizador de expressao:
** a: O resultado de uma expressao eh armazenado no topo da pilha ( %esp ).
** b: Depois de chamar uma expressao EH NECESSARIO estabilizar a pilha
** chamando um "POP" ...
**
** 03: Implementado chamada de funcoes ( proc_func() ):
** Para criar novas C_funcoes veja na linha ( 141 ) ...
** funcoes existentes ( print, soma ).
** PARA MAIS VEJA ( LIB ) LINHA 135.
**
** ----------------------------------------------
**
** BY: Francisco G. A. - "gokernel" ( gokernel@hotmail.com )
**
**-------------------------------------------------------------------
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <ctype.h> // to: isdigit();
#ifdef _WIN32
#include <windows.h>
#endif
#ifdef __linux__
#include <unistd.h>
#include <sys/mman.h> // to: mprotect()
#endif
//-------------------------------------------------------------------
//######################## DEFINE/ENUM ############################
//-------------------------------------------------------------------
//
#define ASM_DEFAULT_SIZE 50000
#define PROG_SIZE 50000
#define UCHAR unsigned char
enum {
TOK_INT = 255, TOK_CHAR, TOK_STRUCT, TOK_IF, TOK_FOR, TOK_BREAK,
//----------------------------
TOK_ID, // identifier
TOK_NUMBER,
TOK_STRING,
TOK_PLUS_PLUS, // ++
TOK_MINUS_MINUS // --
};
enum {
TYPE_LONG = 1,
TYPE_FLOAT,
TYPE_STRING,
TYPE_POINTER,
TYPE_STRUCT,
TYPE_PSTRUCT // struct data *p;
};
// register 32 bit
enum { EAX = 0, ECX, EDX, EBX, ESP, EBP, ESI, EDI };
// Lib Prototype:
void lib_print (int i);
int lib_soma (int a, int b);
//-------------------------------------------------------------------
//########################### STRUCT ##############################
//-------------------------------------------------------------------
//
typedef struct ASM ASM;
typedef struct ASM_label ASM_label;
typedef struct ASM_jump ASM_jump;
typedef struct VAR VAR;
typedef struct LIB LIB;
struct ASM {
UCHAR *p; // to increment ...
UCHAR *code; // code_len=: p - code
ASM_label *label;
ASM_jump *jump;
int label_len; // len of ( label ) array to realloc
int jump_len; // len of ( jump ) array to realloc
};
struct ASM_label {
char *text;
int pos;
};
struct ASM_jump {
char *text;
int pos;
int type;
};
struct VAR {
char *name;
int type;
void *info; // any information ... struct type use this
union {
long l; //: type long integer
float f; //: type float
char *s; //: type pointer of char
void *p; //: type pointer
}value;
};
struct LIB {
char *name;
UCHAR *code;
}lib[]={
{ "print", (UCHAR *)lib_print },
{ "soma", (UCHAR *)lib_soma },
// { "nome_funcao", (UCHAR *)lib_sua_funcao_em_puro_C },
{ NULL, NULL }
};
//-------------------------------------------------------------------
//########################### PROTOTYPE ###########################
//-------------------------------------------------------------------
//
void word_int (ASM *a);
//
void expression (ASM *a);
void expr0 (ASM *a);
void expr1 (ASM *a);
void expr2 (ASM *a);
void expr3 (ASM *a); //: atom
//
void g (ASM *a, UCHAR c); // gen/emit apenas: 1 OPCODE
void g2 (ASM *a, UCHAR c1, UCHAR c2); // gen/emit: 2 OPCODEs
void g3 (ASM *a, UCHAR c1, UCHAR c2, UCHAR c3); // gen/emit: 3 OPCODEs
void g4 (ASM *a, UCHAR c1, UCHAR c2, UCHAR c3, UCHAR c4); // gen/emit: 4 OPCODEs
//
void asm_imul_eax_esp (ASM *a); // Multipliva no ANALIZADOR DE EXPRESSAO.
void asm_idivl_eax_esp (ASM *a); // Divide no ANALIZADOR DE EXPRESSAO.
void asm_add_eax_esp (ASM *a); // Soma no ANALIZADOR DE EXPRESSAO.
void asm_sub_eax_esp (ASM *a); // Subtrai no ANALIZADOR DE EXPRESSAO.
void asm_pushl_var (ASM *a, void *var); // envia para pilha uma variavel
void asm_push_number (ASM *a, long value); // envia para a pilha um numero
void asm_popl_var (ASM *a, void *var); // POP: copia o TOPO DA PILHA para uma variavel
void asm_sub_stack (ASM *a, char c); // habilita chamar funcoes com parametros
void asm_call (ASM *a, void *func); // chama uma funcao/lib
void asm_mov_reg_var (ASM *a, int reg, void *var); // copia um REGISTRO para uma variavel
//
void CreateVarInt (char *name, int value);
int VarFind (char *name); // if not exist return -1
void erro_line (char *s);
void proc_func (ASM *a, UCHAR *code);
//-------------------------------------------------------------------
//########################### VARIABLE ############################
//-------------------------------------------------------------------
//
VAR *Gvar = NULL; // global
static char
*str,
prog[PROG_SIZE], token[1024]
;
static int
tok, erro, line, Gvar_len, stack
;
static int lex (void)
{
char *p = token;
*p = 0;
top:
switch (*str) {
//################## SPACE ################
case '\n': line++;
case ' ': case 9: case 13:
str++; goto top;
break;
//################ STRING #################
case '"':
str++; // '"'
while (*str && *str != '"' && *str != '\r' && *str != '\n')
*p++ = *str++;
*p = 0;
if (*str=='"') str++; else erro = 1;
return TOK_STRING;
//########## WORD, IDENTIFIER ... #########
case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
case 'v': case 'w': case 'x': case 'y': case 'z':
case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
case 'V': case 'W': case 'X': case 'Y': case 'Z':
case '_':
while (
(*str >= 'a' && *str <= 'z') || (*str >= 'A' && *str <= 'Z') ||
(*str >= '0' && *str <= '9') || *str == '_')
{
*p++ = *str++;
}
*p = 0;
if (!strcmp(token, "int")) return TOK_INT;
if (!strcmp(token, "char")) return TOK_CHAR;
if (!strcmp(token, "struct")) return TOK_STRUCT;
if (!strcmp(token, "if")) return TOK_IF;
if (!strcmp(token, "for")) return TOK_FOR;
if (!strcmp(token, "break")) return TOK_BREAK;
return TOK_ID;
//################# NUMBER ################
case '0': case '1': case '2': case '3': case '4': case '5': case '6':
case '7': case '8': case '9':
while ((*str >= '0' && *str <= '9') || *str == '.')
*p++ = *str++;
*p = 0;
return TOK_NUMBER;
//######## C suported character ... #######
case ',':
case '=':
case ';':
case '&':
case '+':
case '-':
case '*':
case '/':
case '.':
case '#':
case '!':
case '{': case '}': case '(': case ')': case '<': case '>': case '[': case ']':
//########## REMOVE COMMENTS ##########
if (*str == '/'){
if (str[1] == '*') { // comment block
str += 2;
do {
while (*str && *str != '*') {
if (*str == '\n') line++; //<<<<<<<<<< line++ >>>>>>>>>>
str++;
}
str++;
} while (*str && *str != '/');
if (*str=='/') str++;
else erro = 1;
goto top;
} else if (str[1] == '/') { // comment line
str += 2;
while ((*str) && (*str != '\n') && (*str != '\r'))
str++;
goto top;
}
}
if (*str== '#') { str++; goto top; }
if (*str=='+' && str[1]=='+') { str += 2; return TOK_PLUS_PLUS; }
if (*str=='-') {
if (str[1]=='-') { str += 2; return TOK_MINUS_MINUS; }
if (isdigit(str[1])) { // number: -100
*p++ = *str++;
while ((*str >= '0' && *str <= '9') || *str == '.')
*p++ = *str++;
*p = 0;
return TOK_NUMBER;
}
}
return *str++;
case 0: return 0;
default:
printf ("ERRO LINE(%d): Ilegal char: (%c) %d\n", line, *str, *str);
erro = 1;
}
return 0;
}
static int stmt (ASM *a)
{
tok = lex();
switch (tok) {
case TOK_INT: word_int(a); break;
case ';':
case '}':
return 1;
case 0: return 0;
default:
expression(a);
}
return 1;
}
void expression (ASM *a)
{
if (tok==TOK_ID) {
int i;
char temp[255];
// chama uma funcao/lib SEM RETORNO ... E RETORNA
//
// func_name();
//
LIB *l = lib;
while (l->name) {
if (!strcmp(l->name, token)) {
proc_func (a, l->code);
return;
}
l++;
}
sprintf (temp, "%s", token); // save
if ((i=VarFind(token))!=-1) {
tok=lex();
if (tok=='=') {
tok=lex();
// chama uma funcao/lib COM RETORNO... E RETORNA
//
// i = func_name();
//
LIB *l = lib;
while (l->name) {
if (!strcmp(l->name, token)) {
//
// chama a funcao e armazena o retorno em %EAX
//
proc_func (a, l->code);
// OBS:
// todo retorno eh armazenado em %eax, exceto tipo (float e double).
// AGORA, copia %EAX para a VARIAVEL
//
asm_mov_reg_var(a, EAX, &Gvar[i].value.l);
return;
}
l++;
}
// OU UMA EXPRESSION:
//
// i = a * b + c * d;
//
expr0(a);
//
// Copia o TOPO DA PILHA ( %ESP ) para a variavel
//
asm_popl_var(a, &Gvar[i].value.l);
}
}
else
{
erro_line("EXPRESSION:\n");
printf ("Variable not found: '%s'\n", temp);
}
}
else
{
erro_line("EXPRESSION:\n");
if(*token) printf("TOKEN(%s)",token); else printf("%c = %d\n", tok, tok);
}
}
// +, - : soma
void expr0 (ASM *a)
{
int op;
expr1(a);
while ((op=tok) == '+' || op == '-') {
tok=lex();
expr1(a);
switch (op) {
case '+': asm_add_eax_esp(a); break;
case '-': asm_sub_eax_esp(a); break;
}
}
}
// *, / : multiplica
void expr1 (ASM *a)
{
int op;
expr2(a);
while ((op=tok) == '*' || op == '/') {
tok=lex();
expr2(a);
switch (op) {
case '*': asm_imul_eax_esp(a); break;
case '/': asm_idivl_eax_esp (a); break;
}
}
}
// (
void expr2 (ASM *a)
{
if (tok=='(') {
tok=lex();
expr0(a);
if (tok != ')') {
printf ("ERRO )\n");
erro_line (" ");
}
tok=lex();
}
else expr3(a); // atom:
}
// atom:
void expr3 (ASM *a)
{
if (tok==TOK_ID) {
int i;
if ((i=VarFind(token)) != -1) {
asm_pushl_var (a, &Gvar[i].value.l);
tok=lex();
}else{
char buf[255];
sprintf(buf, "%s: '%s'", "EXPRESSION VAR NOT FOUND:", token);
erro_line(buf);
}
}
else if (tok==TOK_NUMBER) {
asm_push_number (a, atoi(token));
tok=lex();
}
else erro_line("Expression");
}
// SINTAX:
//
// int a, b, c, d;
// int a = 100, b = 200, c = 1500;
//
void word_int (ASM *a)
{
char name[255];
int value;
while ((tok=lex())) {
if (tok==TOK_ID) {
strcpy (name, token);
value = 0;
tok = lex();
if (tok == '=') {
if ((tok=lex())==TOK_NUMBER)
value = atoi (token);
}
CreateVarInt (name, value);
}
if (tok == ';') break;
}
if (tok != ';') {
printf ("ERRO: word int need char ';'\n");
erro = 1;
}
}
void proc_func (ASM *a, UCHAR *code)
{
int count=0, pos=0;
while ((tok=lex())) {
if (tok==TOK_ID || tok==TOK_NUMBER) {
// Processa uma expressao e armazena em no TOPO DA PILHA ( %ESP ):
//
expr0(a);
// UM POP EH NECESSASIO(para zerar a pilha) ...
// Armazena o valor do TOP DA PILHA ( %ESP ) em %EAX
//
g (a,0x58); // 58 pop %eax
// AGORA, passa os parametros:
// Copia %EAX para a posicao da pilha ( de acordo com o parametro INTEIRO )
//
if (count == 0) {
g3 (a,0x89,0x04,0x24); // 89 04 24 mov %eax,(%esp)
} else {
g4 (a,0x89,0x44,0x24,(char)pos); // 89 44 24 04 mov %eax,0x4(%esp)
}
count++;
pos += 4;
}
if (count > 15) break;
if (tok==';') break;
}
asm_call (a, code);
}
void CreateVarInt (char *name, int value)
{
register int i;
VAR v;
for(i=0;i<Gvar_len;i++)
if(!strcmp(Gvar[i].name, name)){
if(Gvar[i].type == TYPE_LONG)
Gvar[i].value.l = value;
return;
}
v.name = strdup (name);
v.type = TYPE_LONG;
v.value.l = value;
v.info = NULL;
Gvar = (VAR*) realloc (Gvar, (Gvar_len+1) * sizeof (VAR));
Gvar[Gvar_len++] = v;
}
// if not exist return -1
int VarFind (char *name)
{
register int i;
for(i=0;i<Gvar_len;i++)
if (!strcmp(Gvar[i].name, name)) return i;
return -1;
}
//-------------------------------------------------------------------
//########################### ASM API #############################
//-------------------------------------------------------------------
//
ASM *asm_new (unsigned int size)
{
ASM *a = (ASM*)malloc(sizeof(ASM));
if (a && (a->code=(UCHAR*)malloc(size)) != NULL) {
a->p = a->code;
a->label = NULL; // this use realloc
a->jump = NULL; // this use realloc
a->label_len = 0;
a->jump_len = 0;
return a;
}
return NULL;
}
//-------------------------------------------------------------------
// This function use the code of Fabrice Bellard:
//
// LIB: tcc-0.9.25
// FILE: libtcc.c
// FUNC: void set_pages_executable (void *ptr, unsigned long length);
// LINE: 400
//
// Set executable: a->code
//
//-------------------------------------------------------------------
void asm_set_executable (void *ptr, unsigned long len)
{
#ifdef _WIN32
unsigned long old_protect;
if (!VirtualProtect(ptr, len, PAGE_EXECUTE_READWRITE, &old_protect))
{
printf ("ERROR: asm_set_executable() ... NOT FOUND - VirtualProtect()\n");
exit (-1);
}
#endif
#ifdef __linux__
unsigned long start, end, PageSize;
PageSize = sysconf (_SC_PAGESIZE);
start = (unsigned long)ptr & ~(PageSize - 1);
end = (unsigned long)ptr + len;
end = (end + PageSize - 1) & ~(PageSize - 1);
if (mprotect((void *)start, end - start, PROT_READ | PROT_WRITE | PROT_EXEC) == -1)
{
printf ("ERROR: asm_set_executable() ... NOT FOUND - mprotec()\n");
exit (-1);
}
#endif
}
void asm_get_addr (ASM *a, void *ptr) {
*(void**)a->p = ptr;
a->p += sizeof(void*);
}
//--------------------------------
//########## gen/emit ##########
//--------------------------------
void g (ASM *a, UCHAR c) {
*a->p++ = c;
}
void g2 (ASM *a, UCHAR c1, UCHAR c2) {
a->p[0] = c1;
a->p[1] = c2;
a->p += 2;
}
void g3 (ASM *a, UCHAR c1, UCHAR c2, UCHAR c3) {
a->p[0] = c1;
a->p[1] = c2;
a->p[2] = c3;
a->p += 3;
}
void g4 (ASM *a, UCHAR c1, UCHAR c2, UCHAR c3, UCHAR c4) {
a->p[0] = c1;
a->p[1] = c2;
a->p[2] = c3;
a->p[3] = c4;
a->p += 4;
}
//--------------------------------
void asm_begin (ASM *a) {
a->p[0] = 0x55; //: 55 push %ebp
a->p[1] = 0x89; a->p[2] = 0xe5; //: 89 e5 mov %esp,%ebp
a->p += 3;
}
void asm_end (ASM *a) {
a->p[0] = 0xc9; // leave
a->p[1] = 0xc3; // ret
a->p += 2;
// if (a->jump_len)
// asm_change_label (a);
}
//------------------------------------------------
void asm_imul_eax_esp (ASM *a) {
stack--;
g(a,0x58); // 58 pop %eax
g4(a,0x0f,0xaf,0x04,0x24); // 0f af 04 24 imul (%esp),%eax
g3(a,0x89,0x04,0x24); // 89 04 24 mov %eax,(%esp)
}
void asm_idivl_eax_esp (ASM *a) {
stack--;
stack--;
g(a,0x59); // 59 pop %ecx
g(a,0x58); // 58 pop %eax
g(a,0x99); // 99 cltd
g2(a,0xf7,0xf9); // f7 f9 idiv %ecx
g(a,0x50); // 50 push %eax
stack++;
}
void asm_add_eax_esp (ASM *a) {
stack--;
g(a,0x58); // 58 pop %eax
g3(a,0x01,0x04,0x24); // 01 04 24 add %eax,(%esp)
}
void asm_sub_eax_esp (ASM *a) {
stack--;
g(a,0x58); // 58 pop %eax
g3(a,0x29,0x04,0x24); // 29 04 24 sub %eax,(%esp)
}
//------------------------------------------------
// push variable on: %esp:
//
void asm_pushl_var (ASM *a, void *var) {
stack++;
g2(a,0xff,0x35); asm_get_addr(a,var); // ff 35 60 40 40 00 pushl 0x404060
}
// push number on: %esp:
//
void asm_push_number (ASM *a, long value) {
stack++;
if (value == (char)value) {
g(a,0x6a); // 6a 64 push $0x64
*a->p = value;
a->p += sizeof(char);
} else {
g(a,0x68); // 68 00 e1 f5 05 push $0x5f5e100
*(long*)a->p = value;
a->p += sizeof(long);
}
}
// pop variable from( %esp )
//
void asm_popl_var (ASM *a, void *var) {
stack--;
g2(a,0x8f,0x05); asm_get_addr(a, var); // 8f 05 60 40 40 00 popl 0x404060
}
void asm_sub_stack (ASM *a, char c) {
g3(a,0x83,0xec,(char)c); // 83 ec 08 sub $0x8,%esp
}
void asm_call (ASM *a, void *func) {
// b8 7a 13 40 00 mov $0x40137a,%eax
// ff d0 call *%eax
//
g(a,0xb8); asm_get_addr(a, func);
g2(a,0xff,0xd0);
}
// move: %register to variable
void asm_mov_reg_var (ASM *a, int reg, void *var)
{
if (reg >= 0 && reg <= 7) {
switch (reg) {
case EAX: g(a,0xa3); break; // a3 10 40 40 00 mov %eax,0x404010
case ECX: g2(a,0x89,0x0d); break; // 89 0d 60 40 40 00 mov %ecx,0x404060
case EDX: g2(a,0x89,0x15); break; // 89 15 60 40 40 00 mov %edx,0x404060
}
asm_get_addr(a,var);
}
}
void erro_line (char *s) {
printf ("ERRO LINE %d - %s\n", line, s);
erro = 1;
}
int Parse (ASM *a, char *text)
{
str = text;
line = 1;
erro = 0;
asm_begin(a);
//
// Habilita chamar funcoes com parametro.
//
// Teste chamar um "script" que chame uma funcao com parametro
// e COMENTE ESTA LINHA ( vai quebra o seu programa ). ;). SERIO.
//
asm_sub_stack (a, 80); //: 80 / 4 ==: MAXIMO DE 20 PARAMETROS
while (!erro && stmt(a)) { }
asm_end(a);
if (erro) printf("<<<<<<< ERRO >>>>>>>\n");
return erro;
}
int main (int argc, char *argv[])
{
ASM *a;
FILE *fp;
if (argc >= 2 && (a=asm_new(ASM_DEFAULT_SIZE))!=NULL && (fp=fopen(argv[1], "rb"))!=NULL) {
int c, i = 0;
while ((c=getc(fp))!=EOF) prog[i++] = c; // store prog[];
prog[i] = 0;
fclose(fp);
if ( !Parse(a,prog) ) {
asm_set_executable (a->code, (int)(a->p - a->code)+5);
( (void(*)()) a->code ) (); // <<<<<<< execute here >>>>>>>
}
//--------------------------------
//########## Free ASM ##########
//--------------------------------
if (a->label) {
for(i=0;i<a->label_len;i++)
if (a->label[i].text) free(a->label[i].text);
free (a->label);
}
if (a->jump) {
for(i=0;i<a->jump_len;i++)
if (a->jump[i].text) free(a->jump[i].text);
free(a->jump);
}
free(a->code);
free(a);
printf ("\nExiting With Sucess !!!\n");
}
else printf ("USAGE: mini <file.cs>\n");
return 0;
}
//-------------------------------------------------------------------
//############################# LIB ###############################
//-------------------------------------------------------------------
//
void lib_print (int i) { printf ("%d\n", i); }
int lib_soma (int a, int b) { return a + b; }