From 6fa5c4ec87fb781891221c389b0f8d28e4bb4472 Mon Sep 17 00:00:00 2001 From: Pierrick Philippe <pierrick.philippe@irisa.fr> Date: Wed, 3 Jul 2024 16:11:22 +0200 Subject: [PATCH] Fixed (hopefully): Crash when looking for rvalue of calling frame variable --- scripts/cmake.sh | 15 ++- scripts/copy_analyzer_header.sh | 9 +- scripts/update-alternatives.sh | 6 +- src/crypto_taint_sm.cc | 89 ++++++++++------- src/is_tainted_diagnostic.cc | 164 ++++++++++++++++++++++++++++++++ src/utils.cc | 16 +++- 6 files changed, 258 insertions(+), 41 deletions(-) create mode 100644 src/is_tainted_diagnostic.cc diff --git a/scripts/cmake.sh b/scripts/cmake.sh index 1e13ae6..00789f2 100644 --- a/scripts/cmake.sh +++ b/scripts/cmake.sh @@ -1,4 +1,9 @@ #!/bin/bash + +usage(){ + echo "$0 <GCC_VERSION> [GENERATOR=Ninja]" +} + set -x if [ ! -d "./build" ] @@ -8,9 +13,15 @@ fi if [ $# -eq 1 ] then - GENERATOR=$1 -else + GCC_VERSION=$1 GENERATOR="Ninja" +elif [ $# -eq 2 ] +then + GCC_VERSION=$1 + GENERATOR=$2 +else + usage + exit 1 fi cd ./build diff --git a/scripts/copy_analyzer_header.sh b/scripts/copy_analyzer_header.sh index 6541de3..04c50f5 100644 --- a/scripts/copy_analyzer_header.sh +++ b/scripts/copy_analyzer_header.sh @@ -1,7 +1,7 @@ #!/bin/bash usage() { - echo "$0 {trunk,custom}" + echo "$0 {trunk,custom,dev} [PATH_TO_GCC_SRC=$HOME/soft/gcc/src/gcc]" } weird() { @@ -34,6 +34,13 @@ then fi cp $GCC_REPO/analyzer/*.h $DEST elif [ $1 = "custom" -a $BASE_VER = "13.0.1_custom" ] +then + if [ ! -d $DEST ] + then + mkdir $DEST + fi + cp $GCC_REPO/analyzer/*.h $DEST +elif [ $1 = "dev" -a $BASE_VER = "13.0.1_custom_dev" ] then if [ ! -d $DEST ] then diff --git a/scripts/update-alternatives.sh b/scripts/update-alternatives.sh index 3627ffa..2691051 100644 --- a/scripts/update-alternatives.sh +++ b/scripts/update-alternatives.sh @@ -1,6 +1,6 @@ #!/bin/bash usage() { - echo "$0 {trunk,custom}" + echo "$0 {trunk,custom,dev}" } set -x @@ -19,6 +19,10 @@ elif [ $1 = "custom" ] then sudo update-alternatives --set gcc $HOME/.local/bin/gcc_modified sudo update-alternatives --set g++ $HOME/.local/bin/g++_modified +elif [ $1 = "dev" ] +then + sudo update-alternatives --set gcc $HOME/.local/bin/gcc_dev + sudo update-alternatives --set g++ $HOME/.local/bin/g++_dev else usage exit 1 diff --git a/src/crypto_taint_sm.cc b/src/crypto_taint_sm.cc index 3c4efd6..05ba04c 100644 --- a/src/crypto_taint_sm.cc +++ b/src/crypto_taint_sm.cc @@ -182,7 +182,7 @@ namespace crypto_taint { bool crypto_taint_state_machine::can_purge_p (state_machine::state_t state) const { LOG_SCOPE(this->get_logger()); this->log("state = %s", state->get_name()); - return false; + return state != this->m_start; } void crypto_taint_state_machine::on_pop_frame (sm_state_map *smap, const frame_region *frame_reg, tree result_lvalue, const gimple *call, sm_context *sm_ctx, const supernode *node) const { @@ -213,40 +213,64 @@ namespace crypto_taint { decl_reg->dump_to_pp(pp, false); logger->end_log_line(); } - if (handle_result) { - if (tree decl = reg->maybe_get_decl()) { - imm_use_iterator imm_iter; - use_operand_p use_p; - FOR_EACH_IMM_USE_FAST (use_p, imm_iter, decl) { - gimple * use_stmt = USE_STMT(use_p); - if (is_a<greturn *>(use_stmt)) { + if (tree decl = reg->maybe_get_decl()) { + imm_use_iterator imm_iter; + use_operand_p use_p; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, decl) { + gimple * use_stmt = USE_STMT(use_p); + if (is_a<greturn *>(use_stmt) && handle_result) { + if (logger) { + logger->start_log_line(); + logger->log_partial("Return stmt found in uses for "); + dump_quoted_tree(logger->get_printer(), decl); + logger->end_log_line(); + } + if (entry.m_state == this->m_tainted) { if (logger) { + auto pp = logger->get_printer(); logger->start_log_line(); - logger->log_partial("Return stmt found in uses for "); - dump_quoted_tree(logger->get_printer(), decl); + logger->log_partial("Returned tree "); + dump_quoted_tree(pp, decl); + logger->log_partial(" is tainted, tainting caller's lvalue ("); + dump_quoted_tree(pp, result_lvalue); + pp_right_paren(pp); logger->end_log_line(); } - if (entry.m_state == this->m_tainted) { - if (logger) { - auto pp = logger->get_printer(); - logger->start_log_line(); - logger->log_partial("Returned tree "); - dump_quoted_tree(pp, decl); - logger->log_partial(" is tainted, tainting caller's lvalue ("); - dump_quoted_tree(pp, result_lvalue); - pp_right_paren(pp); - logger->end_log_line(); - } - } - auto caller_reg = frame_reg->get_calling_frame()->get_region_for_local(sm_ctx->get_new_program_state()->m_region_model->get_manager(), result_lvalue, nullptr); - sm_ctx->on_transition(node, call, caller_reg, this->m_start, this->m_tainted, reg); } + auto caller_reg = frame_reg->get_calling_frame()->get_region_for_local(sm_ctx->get_new_program_state()->m_region_model->get_manager(), result_lvalue, nullptr); + sm_ctx->on_transition(node, call, caller_reg, this->m_start, this->m_tainted, reg); } + // if (is_a<gassign *>(use_stmt)) { + // if (logger) { + // logger->start_log_line(); + // logger->log_partial("Found an assign stmt for "); + // dump_quoted_tree(logger->get_printer(), decl); + // logger->log_partial(" | stmt:"); + // logger->end_log_line(); + // print_gimple_stmt(logger->get_file(), const_cast<gimple*>(use_stmt), logger->get_printer()->indent_skip, TDF_VOPS|TDF_MEMSYMS); + // } + // auto lhs = gimple_assign_lhs(use_stmt); + // if (TREE_CODE(lhs) == MEM_REF) + // lhs = TREE_OPERAND(lhs, 0); + // if (this->is_tainted(sm_ctx, use_stmt, lhs)) { + // if (logger) { + // logger->start_log_line(); + // logger->log_partial("lhs tainted on stmt: "); + // dump_quoted_tree(logger->get_printer(), lhs); + // logger->end_log_line(); + // } + // } + // } } } if (decl_reg->get_parent_region() == frame_reg) { - if (logger) - logger->log("Clearing any state..."); + if (logger) { + auto pp = logger->get_printer(); + logger->start_log_line(); + logger->log_partial("Clearing any state for: "); + decl_reg->dump_to_pp(pp, false); + logger->end_log_line(); + } smap->clear_any_state(reg); } } @@ -542,16 +566,15 @@ namespace crypto_taint { } if (this->is_tainted(sm_ctx, stmt, arg1)) { utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(lhs), lhs, arg1); - // if (reg) - // sm_ctx->on_transition(node, stmt, reg, this->m_start, this->m_tainted, sm_ctx->get_diagnostic_tree(arg1)); - // else - sm_ctx->on_transition(node, stmt, lhs, this->m_start, this->m_tainted, - sm_ctx->get_diagnostic_tree(arg1), !any_pointer_p(lhs)); + sm_ctx->on_transition(node, stmt, lhs, + this->m_start, this->m_tainted, + sm_ctx->get_diagnostic_tree(arg1), + !any_pointer_p(lhs)); if (lhs_is_array_or_ptr_expr || lhs_is_mem_ref) { if (tree converted = utils::array::convert_view(sm_ctx, lhs, this->get_logger())) { - utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(converted), converted, lhs); + utils::dump_taint(this->get_logger(), sm_ctx, any_pointer_p(converted), converted, arg1); sm_ctx->on_transition(node, stmt, converted, this->m_start, - this->m_tainted, lhs, !any_pointer_p(converted)); + this->m_tainted, arg1, !any_pointer_p(converted)); } } ret = true; diff --git a/src/is_tainted_diagnostic.cc b/src/is_tainted_diagnostic.cc new file mode 100644 index 0000000..2223d02 --- /dev/null +++ b/src/is_tainted_diagnostic.cc @@ -0,0 +1,164 @@ +#include <memory> +#include <string> + +#include <gcc-plugin.h> +#include <tree.h> +#include <json.h> +#include <diagnostic.h> +#include <diagnostic-event-id.h> +#include <gimple.h> +#include <analyzer/analyzer.h> +#include <analyzer/analyzer-logging.h> +#include <analyzer/sm.h> +#include <analyzer/pending-diagnostic.h> +#include <analyzer/diagnostic-manager.h> + +// Using namespace ana to avoid error in following analyzer's include +using namespace ana; + +#include <analyzer/store.h> +#include <analyzer/region.h> +#include <analyzer/checker-event.h> + +#include "crypto_taint_sm.h" +#include "is_tainted_diagnostic.h" +#include "utils.h" + +namespace ana { + + static std::string dump_state_change(const evdesc::state_change &ev) { + /* + tree m_expr; + tree m_origin; + state_machine::state_t m_old_state; + state_machine::state_t m_new_state; + diagnostic_event_id_t m_event_id; + const state_change_event &m_event; + */ + char buf[50] = { 0 }; + std::string res ("state_change: { m_expr: "); + if (crypto_taint::utils::get_name(ev.m_expr, buf, 50) == 1) + res.append(buf); + else + res.append("<anon>"); + + res.append(", m_origin: "); + if (crypto_taint::utils::get_name(ev.m_origin, buf, 50) == 1) + res.append(buf); + else + res.append("<anon>"); + res.append(", m_old_state: "); + res.append(ev.m_old_state->get_name()); + res.append(", m_new_state: "); + res.append(ev.m_new_state->get_name()); + res.append("}"); + + return res; + } + + static std::string dump_final_event(const evdesc::final_event &ev) { + /* + tree m_expr; + state_machine::state_t m_state; + const warning_event &m_event; // Can not dump it + */ + char buf[50] = { 0 }; + std::string res ("final_event: { m_expr: "); + if (crypto_taint::utils::get_name(ev.m_expr, buf, 50) == 1) + res.append(buf); + else + res.append("<anon>"); + + res.append(", m_state: "); + res.append(ev.m_state->get_name()); + res.append("}"); + + return res; + } + + // BEGIN crypto_taint_diagnostic impl + + is_tainted_diagnostic::is_tainted_diagnostic(const crypto_taint::crypto_taint_state_machine& sm, tree src) + : m_sm(sm), m_src(src) + {} + + bool is_tainted_diagnostic::subclass_equal_p(const pending_diagnostic &other) const { + return same_tree_p(this->m_src,((is_tainted_diagnostic &) other).m_src); + } + + label_text is_tainted_diagnostic::describe_state_change (const evdesc::state_change &change) { + // TODO: Clean this shit + // fnotice(stderr, "%s\n", dump_state_change(change).data()); + if (change.m_new_state == this->m_sm.m_tainted) { + auto stmt = change.m_event.m_stmt; + // Check whether we're dealing with a COMPONENT_REF, i.e. a struct access, or not + if (change.m_expr && !any_pointer_p(change.m_expr) && stmt && gimple_code(stmt) == GIMPLE_ASSIGN) { + if (tree lhs = gimple_assign_lhs(stmt)) { + if (TREE_CODE(lhs) == COMPONENT_REF) { + if (change.m_origin) + return change.formatted_print("%qE is tainted here because of %qE", + lhs, change.m_origin); + else + return change.formatted_print("%qE is tainted here", lhs); + } + else if (change.m_origin) + return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin); + // Fallback to printing the svalue + return change.formatted_print("%qE gets tainted here", change.m_expr); + } + } + // Try to get the ptr directly from the stmt + else if (stmt && gimple_code(stmt) == GIMPLE_ASSIGN) { + auto var = gimple_assign_lhs(stmt); + if (var && change.m_origin) { + if (any_pointer_p(var)) + return change.formatted_print("%qE points-to tainted data %qE here", var, change.m_origin); + else + return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin); + } + else if (var) return change.formatted_print("%qE points-to tainted data", var); + } + else if (change.m_origin) + return change.formatted_print("%qE gets tainted here because of %qE", change.m_expr, change.m_origin); + // Fallback to printing the svalue + return change.formatted_print("%qE gets tainted here", change.m_expr); + } + return label_text(); + } + + // END crypto_taint_diagnostic impl + + // BEGIN constant_time_diagnostic impl + + taint_diagnostic::taint_diagnostic(const crypto_taint::crypto_taint_state_machine &sm, tree src) + : is_tainted_diagnostic(sm, src) + {} + + const char *taint_diagnostic::get_kind() const { + return "taint_diagnostic"; + } + + int taint_diagnostic::get_controlling_option() const { + return 1; + } + + label_text taint_diagnostic::describe_final_event(const evdesc::final_event &ev) { + if (ev.m_state == this->m_sm.m_tainted) { + // Check if source of the problem is a struct access + if (TREE_CODE(this->m_src) == COMPONENT_REF) + return ev.formatted_print("field %qE is tainted", this->m_src); + else if (TREE_CODE(this->m_src) == ARRAY_REF) + return ev.formatted_print("element %qE is_tainted", this->m_src); + else if (any_pointer_p(this->m_src)) + return ev.formatted_print("pointer %qE is tainted", this->m_src); + else return ev.formatted_print("l-value %qE is tainted", this->m_src); + } + return label_text(); + } + + bool taint_diagnostic::emit(rich_location * loc) { + return warning_at(loc, this->get_controlling_option(), "%qE is tainted", this->m_src); + } + + // END constant_time_diagnostic impl +} \ No newline at end of file diff --git a/src/utils.cc b/src/utils.cc index 43546de..4feb110 100644 --- a/src/utils.cc +++ b/src/utils.cc @@ -530,14 +530,21 @@ namespace array { tree res = NULL_TREE; - if (logger) - LOG_SCOPE(logger); + LOG_SCOPE(logger); auto code = TREE_CODE(t); auto model = sm_ctx->get_new_program_state()->m_region_model; const region *reg = nullptr; const svalue *sval = nullptr; + if (logger) { + auto pp = logger->get_printer(); + logger->start_log_line(); + logger->log_partial("t: "); + dump_quoted_tree(pp, t); + logger->end_log_line(); + } + switch (code) { case ARRAY_REF: reg = model->get_lvalue(t, nullptr); @@ -546,8 +553,9 @@ namespace array { break; case POINTER_PLUS_EXPR: sval = model->get_rvalue(t, nullptr); - if (TREE_CODE(sval->get_type()) == ARRAY_TYPE) - reg = offset_to_elm(sm_ctx, sval, nullptr); + if (TREE_CODE(sval->get_type()) == ARRAY_TYPE + || TREE_CODE(sval->get_type()) == POINTER_TYPE) + reg = offset_to_elm(sm_ctx, sval, logger); res = reg ? model->get_representative_tree(reg) : t; break; default: -- GitLab