Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,34 @@ jobs:
make check-sanitizer DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1
make check DYNLINK=${{ steps.determine-mode.outputs.DYNLINK }} || exit 1

preprocessor-host:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this target still necessary?

runs-on: ubuntu-24.04
strategy:
matrix:
compiler: [gcc, clang]
architecture: [arm, riscv]
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Download dependencies
run: |
sudo apt-get update -q -y
sudo apt-get install -q -y graphviz jq
sudo apt-get install -q -y qemu-user
sudo apt-get install -q -y build-essential
- name: Configurate config
run: |
make distclean config ARCH=${{ matrix.architecture }}
- name: Preprocess stage 1 source code
env:
CC: ${{ matrix.compiler }}
run: |
make out/shecc
./out/shecc -E src/main.c > ./out/out.c
- name: Build stage 1 artifact
run: |
./out/shecc --no-libc -o out/shecc-stage1.elf ./out/out.c

coding-style:
runs-on: ubuntu-24.04
steps:
Expand Down
7 changes: 3 additions & 4 deletions COMPLIANCE.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,10 @@ This document tracks compliance gaps and non-standard behaviors.
- `#define` for object-like and function-like macros
- `#ifdef`, `#ifndef`, `#if`, `#elif`, `#else`, `#endif`
- `#undef` for macro removal
- `#pragma once`, other `#pragma` options will be ignored
- `defined()` operator
- `__VA_ARGS__` for variadic macros
- `__FILE__`, `__LINE__` built-in macros

## Missing Features

Expand Down Expand Up @@ -83,15 +85,12 @@ This document tracks compliance gaps and non-standard behaviors.

| Feature | Status | Description |
|---------|--------|-------------|
| `#include` | Parsed only | No file inclusion |
| `#include` | Parsed only | Local file inclusion is supported, but lack of capability too includes system files |
| Token pasting (`##`) | Missing | Cannot concatenate tokens |
| Stringizing (`#`) | Missing | Cannot convert to string |
| `__FILE__` | Missing | No file name macro |
| `__LINE__` | Missing | No line number macro |
| `__DATE__` | Missing | No compile date |
| `__TIME__` | Missing | No compile time |
| `__STDC__` | Missing | No standard compliance indicator |
| `#pragma` | Ignored | Accepted but no effect |

### Advanced Features

Expand Down
27 changes: 27 additions & 0 deletions lib/c.c
Original file line number Diff line number Diff line change
Expand Up @@ -543,6 +543,33 @@ int fputc(int c, FILE *stream)
return c;
}

int fseek(FILE *stream, int offset, int whence)
{
#if defined(__arm__)
__syscall(__syscall_lseek, stream, offset, whence);
return 0;
#elif defined(__riscv)
/* No need to offset */
__syscall(__syscall_lseek, stream, 0, offset, NULL, whence);
return 0;
Comment on lines +548 to +554
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does the implementation ensure that fseek() always succeeds?

#else
#error "Unsupported fseek support for current platform"
#endif
}

int ftell(FILE *stream)
{
#if defined(__arm__)
return __syscall(__syscall_lseek, stream, 0, SEEK_CUR);
#elif defined(__riscv)
int result;
__syscall(__syscall_lseek, stream, 0, 0, &result, SEEK_CUR);
return result;
#else
#error "Unsupported ftell support for current platform"
#endif
}

#define CHUNK_SIZE_FREED_MASK 1
#define CHUNK_SIZE_SZ_MASK 0xFFFFFFFE
#define CHUNK_GET_SIZE(size) (size & CHUNK_SIZE_SZ_MASK)
Expand Down
8 changes: 8 additions & 0 deletions lib/c.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,18 @@
#define INT_MAX 0x7fffffff
#define INT_MIN 0x80000000

#define SEEK_SET 0
#define SEEK_CUR 1
#define SEEK_END 2

#if defined(__arm__)
#define __SIZEOF_POINTER__ 4
#define __syscall_exit 1
#define __syscall_read 3
#define __syscall_write 4
#define __syscall_close 6
#define __syscall_open 5
#define __syscall_lseek 19
#define __syscall_mmap2 192
#define __syscall_munmap 91

Expand All @@ -35,6 +40,7 @@
#define __syscall_close 57
#define __syscall_open 1024
#define __syscall_openat 56
#define __syscall_lseek 62
#define __syscall_mmap2 222
#define __syscall_munmap 215

Expand All @@ -59,6 +65,8 @@ int fclose(FILE *stream);
int fgetc(FILE *stream);
char *fgets(char *str, int n, FILE *stream);
int fputc(int c, FILE *stream);
int fseek(FILE *stream, int offset, int whence);
int ftell(FILE *stream);

/* string-related functions */
int strlen(char *str);
Expand Down
8 changes: 4 additions & 4 deletions src/arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ arm_cond_t arm_get_cond(opcode_t op)
case OP_leq:
return __LE;
default:
error("Unsupported condition IR opcode");
fatal("Unsupported condition IR opcode");
}
return __AL;
}
Expand All @@ -113,7 +113,7 @@ int arm_extract_bits(int imm, int i_start, int i_end, int d_start, int d_end)
{
if (((d_end - d_start) != (i_end - i_start)) || (i_start > i_end) ||
(d_start > d_end))
error("Invalid bit copy");
fatal("Invalid bit copy");

int v = imm >> i_start;
v &= ((2 << (i_end - i_start)) - 1);
Expand Down Expand Up @@ -143,7 +143,7 @@ int __mov(arm_cond_t cond, int io, int opcode, int s, int rn, int rd, int op2)
}
if (op2 > 255)
/* value spans more than 8 bits */
error("Unable to represent value");
fatal("Unable to represent value");
}
return arm_encode(cond, s + (opcode << 1) + (io << 5), rn, rd,
(shift << 8) + (op2 & 255));
Expand Down Expand Up @@ -286,7 +286,7 @@ int arm_halfword_transfer(arm_cond_t cond,
}

if (ofs > 255)
error("Halfword offset too large");
fatal("Halfword offset too large");

/* Halfword encoding: split offset into 4-bit high and low parts */
int imm4H = ((ofs >> 4) & 0xF) << 8;
Expand Down
79 changes: 33 additions & 46 deletions src/defs.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,16 @@

/* definitions */

/* Common macro functions */
#define is_whitespace(c) (c == ' ' || c == '\t')
#define is_newline(c) (c == '\r' || c == '\n')
#define is_alnum(c) \
((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || \
(c >= '0' && c <= '9') || (c == '_'))
Comment on lines +16 to +18
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isalnum() is equivalent to isalpha() || isdigit(), which means the macro should only validate whether the character is a digit or an alphabetic letter.

#define is_digit(c) ((c >= '0' && c <= '9'))
#define is_hex(c) \
(is_digit(c) || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))

/* Limitations */
#define MAX_TOKEN_LEN 256
#define MAX_ID_LEN 64
Expand All @@ -26,15 +36,13 @@
#define MAX_BB_DOM_SUCC 64
#define MAX_BB_RDOM_SUCC 256
#define MAX_GLOBAL_IR 256
#define MAX_SOURCE 1048576
#define MAX_CODE 262144
#define MAX_DATA 262144
#define MAX_SYMTAB 65536
#define MAX_STRTAB 65536
#define MAX_HEADER 1024
#define MAX_PROGRAM_HEADER 1024
#define MAX_SECTION 1024
#define MAX_ALIASES 128
#define MAX_SECTION_HEADER 1024
#define MAX_SHSTR 1024
#define MAX_INTERP 1024
Expand All @@ -56,7 +64,7 @@
#define SMALL_ARENA_SIZE 65536 /* 64 KiB - for small allocations */
#define LARGE_ARENA_SIZE 524288 /* 512 KiB - for instruction arena */
#define DEFAULT_FUNCS_SIZE 64
#define DEFAULT_INCLUSIONS_SIZE 16
#define DEFAULT_SRC_FILE_COUNT 8

/* Arena compaction bitmask flags for selective memory reclamation */
#define COMPACT_ARENA_BLOCK 0x01 /* BLOCK_ARENA - variables/blocks */
Expand Down Expand Up @@ -131,6 +139,7 @@ typedef struct {
/* lexer tokens */
typedef enum {
T_start, /* FIXME: Unused, intended for lexer state machine init */
T_eof, /* end-of-file (EOF) */
T_numeric,
T_identifier,
T_comma, /* , */
Expand Down Expand Up @@ -179,7 +188,6 @@ typedef enum {
T_question, /* ? */
T_colon, /* : */
T_semicolon, /* ; */
T_eof, /* end-of-file (EOF) */
T_ampersand, /* & */
T_return,
T_if,
Expand Down Expand Up @@ -211,38 +219,36 @@ typedef enum {
T_cppd_endif,
T_cppd_ifdef,
T_cppd_ifndef,
T_cppd_pragma
} token_t;
T_cppd_pragma,
/* C pre-processor specific, these kinds
* will be removed after pre-processing is done.
*/
T_newline,
T_backslash,
T_whitespace,
T_tab
} token_kind_t;

/* Source location tracking for better error reporting */
typedef struct {
int pos; /* raw source file position */
int len; /* length of token */
int line;
int column;
char *filename;
} source_location_t;

/* Token structure with metadata for enhanced lexing */
typedef struct token_info {
token_t type;
char value[MAX_TOKEN_LEN];
typedef struct token {
token_kind_t kind;
char *literal;
source_location_t location;
struct token_info *next; /* For freelist management */
} token_info_t;

/* Token freelist for memory reuse */
typedef struct {
token_info_t *freelist;
int allocated_count;
} token_pool_t;
struct token *next;
} token_t;

/* Token buffer for improved lookahead */
#define TOKEN_BUFFER_SIZE 8
typedef struct {
token_info_t *tokens[TOKEN_BUFFER_SIZE];
int head;
int tail;
int count;
} token_buffer_t;
typedef struct token_stream {
token_t *head;
token_t *tail;
} token_stream_t;

/* String pool for identifier deduplication */
typedef struct {
Expand Down Expand Up @@ -387,7 +393,7 @@ struct var {
int in_loop;
struct var *base;
int subscript;
struct var *subscripts[64];
struct var *subscripts[128];
int subscripts_idx;
rename_t rename;
ref_block_list_t ref_block_list; /* blocks which kill variable */
Expand All @@ -412,25 +418,13 @@ struct var {
bool ofs_based_on_stack_top;
};

typedef struct {
char name[MAX_VAR_LEN];
bool is_variadic;
int start_source_idx;
var_t param_defs[MAX_PARAMS];
int num_param_defs;
int params[MAX_PARAMS];
int num_params;
bool disabled;
} macro_t;

typedef struct func func_t;

/* block definition */
struct block {
var_list_t locals;
struct block *parent;
func_t *func;
macro_t *macro;
struct block *next;
};

Expand Down Expand Up @@ -494,13 +488,6 @@ typedef struct {
type_t *type;
} lvalue_t;

/* alias for #defines */
typedef struct {
char alias[MAX_VAR_LEN];
char value[MAX_VAR_LEN];
bool disabled;
} alias_t;

/* constants for enums */
typedef struct {
char alias[MAX_VAR_LEN];
Expand Down
14 changes: 7 additions & 7 deletions src/elf.c
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ void elf_generate_header(void)
{
/* Check for null pointers to prevent crashes */
if (!elf_code || !elf_data || !elf_symtab || !elf_strtab || !elf_header) {
error("ELF buffers not initialized");
fatal("ELF buffers not initialized");
return;
}

Expand Down Expand Up @@ -193,7 +193,7 @@ void elf_generate_program_headers(void)
!dynamic_sections.elf_plt || !dynamic_sections.elf_got ||
!dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym ||
!dynamic_sections.elf_dynamic))) {
error("ELF section buffers not initialized");
fatal("ELF section buffers not initialized");
return;
}

Expand Down Expand Up @@ -316,7 +316,7 @@ void elf_generate_section_headers(void)
!dynamic_sections.elf_plt || !dynamic_sections.elf_got ||
!dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym ||
!dynamic_sections.elf_dynamic))) {
error("ELF section buffers not initialized");
fatal("ELF section buffers not initialized");
return;
}

Expand Down Expand Up @@ -587,7 +587,7 @@ void elf_align(strbuf_t *elf_array)
{
/* Check for null pointers to prevent crashes */
if (!elf_array) {
error("ELF buffers not initialized for alignment");
fatal("ELF buffers not initialized for alignment");
return;
}

Expand All @@ -603,7 +603,7 @@ void elf_generate_sections(void)
!dynamic_sections.elf_plt || !dynamic_sections.elf_got ||
!dynamic_sections.elf_dynstr || !dynamic_sections.elf_dynsym ||
!dynamic_sections.elf_dynamic))) {
error("ELF section buffers not initialized");
fatal("ELF section buffers not initialized");
return;
}

Expand Down Expand Up @@ -820,7 +820,7 @@ void elf_add_symbol(const char *symbol, int pc)
{
/* Check for null pointers to prevent crashes */
if (!symbol || !elf_symtab || !elf_strtab) {
error("Invalid parameters for elf_add_symbol");
fatal("Invalid parameters for elf_add_symbol");
return;
}

Expand Down Expand Up @@ -928,7 +928,7 @@ void elf_generate(const char *outfile)

FILE *fp = fopen(outfile, "wb");
if (!fp) {
error("Unable to open output file for writing");
fatal("Unable to open output file for writing");
return;
}

Expand Down
Loading