If we can provide this information to the callback, then we don't need to determine the block size for ourselves in LibAFL here.
Note the following links are from the qemu repository itself rather than the fork from the bridge.
We can see the code for generating the callback in the bridge here.
/*
* Isolate the portion of code gen which can setjmp/longjmp.
* Return the size of the generated code, or negative on error.
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
int *max_insns, int64_t *ti)
{
...
//// --- Begin LibAFL code ---
struct libafl_block_hook* hook = libafl_block_hooks;
while (hook) {
uint64_t cur_id = 0;
if (hook->gen)
cur_id = hook->gen(pc, hook->data);
if (cur_id != (uint64_t)-1 && hook->exec) {
TCGv_i64 tmp0 = tcg_const_i64(cur_id);
TCGv_i64 tmp1 = tcg_const_i64(hook->data);
TCGTemp *tmp2[2] = { tcgv_i64_temp(tmp0), tcgv_i64_temp(tmp1) };
tcg_gen_callN(hook->exec, NULL, 2, tmp2);
tcg_temp_free_i64(tmp0);
tcg_temp_free_i64(tmp1);
}
hook = hook->next;
}
//// --- End LibAFL code ---
gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
assert(tb->size != 0);
...
We can see that hook->gen
is only passed the pc
. However, if we move the hook function to below the call to gen_intermediate_code
, then we will be able to include the size of the input block. This can be observed by following the call chain to where QEMU logs the input blocks when provided the -d in_asm
argument where this value is used to generate the debug output. If we consider the ARM code base (although other architectures should be the same).
We can see gen_intermediate_code
here.
/* generate intermediate code for basic block 'tb'. */
void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns,
target_ulong pc, void *host_pc)
{
...
translator_loop(cpu, tb, max_insns, pc, host_pc, ops, &dc.base);
}
We can then see the translator_loop
here. Note that the translator is setting the tb->size
value which is available to us in the function setjmp_gen_code
where we call our hook.
void translator_loop(CPUState *cpu, TranslationBlock *tb, int *max_insns,
target_ulong pc, void *host_pc,
const TranslatorOps *ops, DisasContextBase *db)
{
...
/* The disas_log hook may use these values rather than recompute. */
tb->size = db->pc_next - db->pc_first;
tb->icount = db->num_insns;
#ifdef DEBUG_DISAS
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
&& qemu_log_in_addr_range(db->pc_first)) {
FILE *logfile = qemu_log_trylock();
if (logfile) {
fprintf(logfile, "----------------\n");
ops->disas_log(db, cpu, logfile);
fprintf(logfile, "\n");
qemu_log_unlock(logfile);
}
}
#endif
}
Lastly, we can see the code for generating the trace output for -d in_asm
here. Note the IN:
prefix which tells us this is the input block (e.g. the target code being emulated) rather than the intermediate or host representation of the block. Note here the use of the tb->size
by the expression dc->base.tb->size
.
static void arm_tr_disas_log(const DisasContextBase *dcbase,
CPUState *cpu, FILE *logfile)
{
DisasContext *dc = container_of(dcbase, DisasContext, base);
fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first));
target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size);
}
static const TranslatorOps arm_translator_ops = {
.init_disas_context = arm_tr_init_disas_context,
.tb_start = arm_tr_tb_start,
.insn_start = arm_tr_insn_start,
.translate_insn = arm_tr_translate_insn,
.tb_stop = arm_tr_tb_stop,
.disas_log = arm_tr_disas_log,
};
static const TranslatorOps thumb_translator_ops = {
.init_disas_context = arm_tr_init_disas_context,
.tb_start = arm_tr_tb_start,
.insn_start = arm_tr_insn_start,
.translate_insn = thumb_tr_translate_insn,
.tb_stop = arm_tr_tb_stop,
.disas_log = arm_tr_disas_log,
};
And the disassembler here. It is using the size parameter as a bound for the loop.
/* Disassemble this for me please... (debugging). */
void target_disas(FILE *out, CPUState *cpu, target_ulong code,
target_ulong size)
{
...
for (pc = code; size > 0; pc += count, size -= count) {
fprintf(out, "0x" TARGET_FMT_lx ": ", pc);
count = s.info.print_insn(pc, &s.info);
fprintf(out, "\n");
...
}
}